In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [None]:
!pip install sparqlwrapper node2vec

In [None]:
from bs4 import BeautifulSoup
from urllib import request
from itertools import chain
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import networkx as nx
from node2vec.edges import HadamardEmbedder
from node2vec import Node2Vec

In [None]:
def philosopher_influenced(philosopher_url):
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        SELECT ?influenced
        WHERE {{ <http://dbpedia.org/resource/{x}> dbo:influenced ?influenced }}
    """.format(x=philosopher_url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    direct_results = [result['influenced']['value'] for result in results['results']['bindings']]

    sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?influenced
    WHERE {{ <http://dbpedia.org/resource/{x}> ^dbo:influencedBy ?influenced }}
    """.format(x=philosopher_url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    indirect_results = [result['influenced']['value'] for result in results['results']['bindings']]
    results = list(set(direct_results+indirect_results))
    return [result.split('/')[-1] for result in results]

In [None]:
def philosopher_influenced_by(philosopher_url):
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        SELECT ?influencedBy
        WHERE {{ <http://dbpedia.org/resource/{x}> dbo:influencedBy ?influencedBy }}
    """.format(x=philosopher_url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    direct_results = [result['influencedBy']['value'] for result in results['results']['bindings']]

    sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?influencedBy
    WHERE {{ <http://dbpedia.org/resource/{x}> ^dbo:influenced ?influencedBy }}
    """.format(x=philosopher_url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    indirect_results = [result['influencedBy']['value'] for result in results['results']['bindings']]
    results = list(set(direct_results+indirect_results))
    return [result.split('/')[-1] for result in results]

In [None]:
philosopher_influenced('John_Locke')

In [None]:
philosopher_influenced_by('Jeremy_Bentham')

In [None]:
a[0].split('/')

In [None]:
philosopher='Jeremy_Bentham'
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?p ?o
    WHERE {{ <http://dbpedia.org/resource/{x}> ?p ?o}}
""".format(x=philosopher))
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

results

In [None]:
len(results['results']['bindings'])

In [None]:
philosophers_a_c = 'https://en.wikipedia.org/wiki/List_of_philosophers_(A%E2%80%93C)'
philosophers_d_h = 'https://en.wikipedia.org/wiki/List_of_philosophers_(D%E2%80%93H)'
philosophers_i_q = 'https://en.wikipedia.org/wiki/List_of_philosophers_(I%E2%80%93Q)'
philosophers_r_z = 'https://en.wikipedia.org/wiki/List_of_philosophers_(R%E2%80%93Z)'

list_url = [philosophers_a_c, philosophers_d_h, philosophers_i_q, philosophers_r_z]
n_letters = [4, 6, 10, 10]

In [None]:
def get_philosopher_urls(list_url, n_letters):
    response = request.urlopen(list_url)
    html = response.read()
    soup  = BeautifulSoup(html, 'html.parser')
    philosopher_urls = [thing.select('a[href*="wiki/"]') for thing in soup.find_all('ul')[1:n_letters]]
    philosopher_urls = chain(*philosopher_urls)
    philosopher_urls = [thing['href'] for thing in philosopher_urls]
    return philosopher_urls

In [None]:
%%time
urls = [get_philosopher_urls(list_url=a, n_letters=b) for a,b in zip(list_url, n_letters)]

In [None]:
urls = list(chain(*urls))

In [None]:
urls = [url.split('/wiki/')[-1] for url in urls]

In [None]:
wikipedia_philosophers = urls

In [None]:
set([thing[0] for thing in sorted(urls)])

In [None]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?person
    WHERE { ?person a dbo:Philosopher}
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

dbpedia_philosophers = [result['person']['value'].split('/')[-1] for result in results['results']['bindings']]

In [None]:
philosophers = list(set(wikipedia_philosophers+dbpedia_philosophers))

In [None]:
philosophers

In [None]:
def get_name(philosopher_url):
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
                    PREFIX dbo: <http://dbpedia.org/ontology/>
                    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                    SELECT ?name
                    WHERE {{ <http://dbpedia.org/resource/{x}> dbp:name ?name}}
                    """.format(x=philosopher_url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    return [result['name']['value'] for result in results['results']['bindings']]

In [None]:
def get_title(url):
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
                    PREFIX dbo: <http://dbpedia.org/ontology/>
                    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                    SELECT ?title
                    WHERE {{ <http://dbpedia.org/resource/{x}> dbp:title ?title}}
                    """.format(x=url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    return [result['title']['value'] for result in results['results']['bindings']]

In [None]:
def get_notable_ideas(philosopher_url):
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
                    PREFIX dbo: <http://dbpedia.org/ontology/>
                    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                    SELECT ?idea
                    WHERE {{ <http://dbpedia.org/resource/{x}> dbo:notableIdea ?idea}}
                    """.format(x=philosopher_url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return [result['idea']['value'] for result in results['results']['bindings']]

In [None]:
def get_abstract(url):
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
                    PREFIX dbo: <http://dbpedia.org/ontology/>
                    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                    SELECT ?abstract
                    WHERE {{ <http://dbpedia.org/resource/{x}> dbo:abstract ?abstract}}
                    """.format(x=url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
#     return results['results']['bindings']
    return [result['abstract']['value'] for result in results['results']['bindings'] if result['abstract']['xml:lang']=='en'][0]

In [None]:
get_abstract('Syllogism')

In [None]:
get_notable_ideas('Aristotle')

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON
philosopher='Joseph_Butler'
sparql = SPARQLWrapper("http://localhost:8890/sparql")
sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?influenced
    WHERE {{ <http://dbpedia.org/resource/{x}> dbo:influenced ?influenced }}
""".format(x=philosopher))
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

results

In [None]:
get_title('On_the_Soul')

In [None]:
def get_name(philosopher_url):
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
                    PREFIX dbo: <http://dbpedia.org/ontology/>
                    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                    SELECT ?name
                    WHERE {{ <http://dbpedia.org/resource/{x}> dbp:name ?name}}
                    """.format(x=philosopher_url))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    return [result['name']['value'] for result in results['results']['bindings']]

In [None]:
dataset = pd.DataFrame(pd.Series(philosophers), columns=['philosopher_uri'])

In [None]:
%%time
dataset['names'] = dataset['philosopher_uri'].apply(get_name)
dataset

In [None]:
%%time
dataset['influenced_by'] = dataset['philosopher_uri'].apply(philosopher_influenced_by)
dataset['influenced'] = dataset['philosopher_uri'].apply(philosopher_influenced)

In [None]:
dataset['undirected_adjacency_list'] = dataset.apply(lambda x: list(set(x['influenced_by']+x['influenced'])),axis=1)

In [None]:
dataset = dataset[dataset['undirected_adjacency_list'].apply(bool)]

In [None]:
dataset.to_pickle('philosophers.pkl')

In [None]:
dataset = pd.read_pickle('philosophers.pkl')

In [None]:
dataset

In [None]:
network = dataset[['philosopher_uri', 'undirected_adjacency_list']]

In [None]:
network

In [None]:
graph = nx.Graph(network.set_index('philosopher_uri').to_dict()['undirected_adjacency_list'])

In [None]:
nx.draw(graph)

In [None]:
network.set_index('philosopher_uri').to_dict

In [None]:
nx.write_graphml(graph, 'philosophers.graphml')

In [2]:
from node2vec import Node2Vec

In [3]:
Node2Vec?

In [None]:
%%time
node2vec = Node2Vec(graph, dimensions=64, walk_length=30, num_walks=200, workers=8)

In [None]:
Node2Vec?

In [None]:
%%time
model = node2vec.fit(window=10, min_count=1, batch_words=4)

In [None]:
model.wv.most_similar('Bertrand_Russell', topn=50)

In [None]:
model.wv.most_similar('Arabic', topn=50)

In [None]:
model.wv['Immanuel_Kant']

In [None]:
model.save('node2vec_v1')

In [None]:
dir(node2vec.node2vec)

In [None]:
network

In [None]:
%%time
edges_embs = HadamardEmbedder(keyed_vectors=model.wv)

In [None]:
%%time
edges_kv = edges_embs.as_keyed_vectors()

In [None]:
len(edges_kv.vocab)

In [None]:
edges_kv.most_similar("('Aristotle', 'Plato')", topn=50)

In [None]:
sum(nx.to_pandas_adjacency(graph)['Arabic']==0.0)

In [None]:
!pip install umap-learn[plot]

In [None]:
model.vocabulary

In [None]:
dic = {node:model.wv[node] for node in graph.nodes}
pd.DataFrame(dic).T

In [None]:
embeddings = pd.DataFrame(dic).T.values

In [None]:
import umap
reducer = umap.UMAP()

In [None]:
mapper = umap.UMAP().fit(embeddings)

In [None]:
%%time
embeddings_2d = reducer.fit_transform(embeddings)

In [None]:
embeddings_2d

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
%matplotlib notebook

In [None]:
plt.scatter(
    embeddings_2d[:, 0],
    embeddings_2d[:, 1])

In [None]:
import umap.plot

In [None]:
umap.plot.points(mapper, theme='fire')