# KFIR

## SETUP: DEPENDENCIES AND CREDENTIALS

### Working Directory

What is the current working directory?:

In [2]:
import os
os.getcwd()

'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\notebooks\\KFIR\\notebooks'

Add parent directory to path if necessary:

In [3]:
import sys, os, re

working_directory = os.getcwd()
if re.search('\\\\notebooks$', working_directory):
    one_directory_up = re.sub('\\\\notebooks$', '', working_directory)
    sys.path.append(one_directory_up)
    
sys.path

['',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1\\python36.zip',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1\\DLLs',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1\\lib',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1\\lib\\site-packages',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1\\lib\\site-packages\\win32',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1\\lib\\site-packages\\win32\\lib',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1\\lib\\site-packages\\Pythonwin',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\python-3.6.1\\lib\\site-packages\\IPython\\extensions',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\settings\\.ipython',
 'U:\\PortableApps\\WinPython-32bit-3.6.1.0Qt5\\notebooks\\KFIR']

### Plotly

Read plotly credentials from file:

In [4]:
from preprocessor.Text_File import Text_File

plotly_file = Text_File('..//private//plotly_credentials')
plotly_file = plotly_file.return_content()
plotly_credentials = plotly_file.splitlines()

plotly_username = plotly_credentials[0]
plotly_key = plotly_credentials[1]

Import plotly:

In [5]:
import plotly.plotly as py
from plotly.graph_objs import *
from plotly.tools import set_credentials_file

set_credentials_file(username=plotly_username, api_key=plotly_key)  # put your own plotly username and api key here 

### Gastrodon

For usage examples of Gastrodon, see [example dbpedia notebook](https://github.com/paulhoule/gastrodon/blob/master/notebooks/remote/Querying%20DBpedia.ipynb).

Import gastrodon (for running SPARQL queries in Jupyter):

In [6]:
from gastrodon import RemoteEndpoint,QName,ttl,URIRef,inline
import pandas as pd
pd.options.display.width=120
pd.options.display.max_colwidth=100

Define prefixes:

In [7]:
prefixes=inline("""
    @prefix wos: <http://wos.risis.eu/vocabulary/> .
    @prefix kfir: <http://clokman.com/kfir/ontology#> .
    @prefix ldr: <https://github.com/ali1k/ld-reactor/blob/master/vocabulary/index.ttl#> .
    @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
    @prefix wosGraph: <http://clokman.com/wos> .
    @prefix kfirGraph: <http://clokman.com/kfir> .
    
    @prefix dbo: <http://dbpedia.org/ontology/> .
""").graph

Read endpoint from file:

In [8]:
from preprocessor.Text_File import Text_File

# Get endpoint address from file
eculture_endpoint_url_file = Text_File('..//private//eculture_virtuoso_endpoint_address')
eculture_endpoint_url = eculture_endpoint_url_file.return_content()

Define endpoint:

In [9]:
eculture=RemoteEndpoint(
    url=eculture_endpoint_url,
    default_graph=None,
    prefixes=prefixes
)

## QUERIES

### Database Statistics

Get number of articles:

In [10]:
article_count=eculture.select("""
    SELECT (COUNT(DISTINCT ?article) as ?articles) 
    WHERE{
        GRAPH wosGraph: {
            ?article a wos:Article .
        }
    }
    """
).at[0,'articles']
print(article_count)

136125


Get counts for common fields:

In [11]:
wos_mappings = {'wos:TI':  'title', # wos: is defined in prefixes variable
                'wos:AF':  'author',
                'wos:SN':  'issn',
                'wos:DOI': 'doi',
                'wos:EM':  'email',
                'wos:DE':  'keywords_author',
                'wos:ID':  'keywords_plus',
                'wos:SC':  'subject_category',
                'wos:WC':  'web_of_science_category',
                'wos:PY':  'publication_year',
                'wos:CR':  'has_cited',
                'wos:NR':  'has_cited_count',
                'wos:Z9':  'cited_by_count_universal',
                'wos:TC':  'cited_by_count_local',
                'wos:SO':  'source_publication',
                'wos:PU':  'publisher',
                'wos:C1':  'author_address',
                'ldr:annotations': 'annotation'
                }

wos_field_counts = {}

print('Counting...')
for each_wos_field_name_abbreviation, each_field_name in wos_mappings.items():
    count=eculture.select("""
    SELECT (COUNT(DISTINCT ?%s) as ?%ss) 
    WHERE{
        GRAPH wosGraph: {
            ?article a wos:Article .
            ?article %s ?%s .
        }
    }
    """ % (each_field_name, each_field_name, each_wos_field_name_abbreviation, each_field_name)).at[0,"%ss" % each_field_name]
    wos_field_counts[each_field_name] = count
    print (each_wos_field_name_abbreviation, '/', each_field_name, ': ',count)
print('Counting finished.')

Counting...
wos:TI / title :  135985
wos:AF / author :  3485320
wos:SN / issn :  9627
wos:DOI / doi :  123505
wos:EM / email :  51997
wos:DE / keywords_author :  125552
wos:ID / keywords_plus :  156689
wos:SC / subject_category :  151
wos:WC / web_of_science_category :  2323
wos:PY / publication_year :  35
wos:CR / has_cited :  2854040
wos:NR / has_cited_count :  351
wos:Z9 / cited_by_count_universal :  880
wos:TC / cited_by_count_local :  852
wos:SO / source_publication :  9708
wos:PU / publisher :  2354
wos:C1 / author_address :  118156
ldr:annotations / annotation :  2158243
Counting finished.


Visualize statistics:

In [12]:
columns = [] 
values = []
for each_key, each_value in wos_field_counts.items():
    columns.append(each_key)
    values.append(each_value)

columns.append('article')
values.append(article_count)
    
data = [Bar(x=columns,
            y=values)]

py.iplot(data, filename='wos_stats')