In [7]:
from IPython.core.display import HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [8]:
import pandas as pd
import numpy as np
import networkx as nx
from snapy import MinHash, LSH
from node2vec import Node2Vec
from itertools import chain
from bs4 import BeautifulSoup

In [2]:
wiki_dataset = pd.read_pickle('../data/dbpedia_with_articles.pkl')
plato_dataset = pd.read_pickle('../data/plato.pkl')

In [28]:
entities = list(set(list(chain.from_iterable(plato_dataset.apply(lambda x: x['related_entries']+ [x['entity_name']], axis=1)))))
philosophers = list(set(list(chain.from_iterable(dataset['undirected_influence'].dropna()))))
entities.sort()
philosophers.sort()

In [104]:
# content = entities + philosophers
# content = entities
content = philosophers

In [105]:
%%time
content_exploded = [' '.join(list(item)) for item in content]
labels = list(range(len(content)))
seed = 42
minhash = MinHash(content_exploded, n_gram=3, permutations=250, hash_bits=64, seed=seed)
lsh = LSH(minhash, labels, no_of_bands=50)
adjacency_list = lsh.adjacency_list(min_jaccard=0.8)
adjacency_list = {k:v for k,v in adjacency_list.items() if v}
for k,v in adjacency_list.items():
    print(f'{content[k]} is probably similar to: {[content[item] for item in v]}')

Carl_F.H._Henry is probably similar to: ['Carl_F._H._Henry']
Carl_F._H._Henry is probably similar to: ['Carl_F.H._Henry']
Georg_Wilhelm_Friedrich_Hegel is probably similar to: ['George_Wilhelm_Friedrich_Hegel']
George_Wilhelm_Friedrich_Hegel is probably similar to: ['Georg_Wilhelm_Friedrich_Hegel']
Gottfried_Wilhelm_Leibniz is probably similar to: ['Gottfried_Wilhelm__Leibniz']
Gottfried_Wilhelm__Leibniz is probably similar to: ['Gottfried_Wilhelm_Leibniz']
Johannes_Scottus_Eriugena is probably similar to: ['Johannes_Scotus_Eriugena']
Johannes_Scotus_Eriugena is probably similar to: ['Johannes_Scottus_Eriugena']
Søren_Kierkegaard is probably similar to: ['Søren_Kierkegaard,']
Søren_Kierkegaard, is probably similar to: ['Søren_Kierkegaard']
CPU times: user 30.7 s, sys: 107 ms, total: 30.8 s
Wall time: 30.8 s


In [3]:
wiki_dataset

Unnamed: 0,philosopher_url,name,abstract,notable_ideas,influence_inbound,influence_outbound,undirected_influence,text
0,Stephen_Law,[Stephen Law],Stephen Law (born 1960) is an English philoso...,,,,,Stephen Law (born 1960) is an English philoso...
1,Henry_S._Richardson,[Henry S. Richardson],Henry S. Richardson is an American philosopher...,,[John_Rawls],,[John_Rawls],Henry S. Richardson is an American philosopher...
2,John_Amos_Comenius,"[John Amos Comenius, Johann Amos Comenius]",John Amos Comenius (Czech: Jan Amos Komenský; ...,,,,,John Amos Comenius (Czech: Jan Amos Komenský; ...
3,Javier_Gomá,[Javier Gomá Lanzón],"Javier Gomá Lanzón (Bilbao, 24 May 1965) is a ...",,,,,"Javier Gomá Lanzón (Bilbao, 24 May 1965) is a ..."
4,Oskar_Negt,,Oskar Negt (German pronunciation: [ˈneːkt]; bo...,,,,,Oskar Negt (German pronunciation: [ˈneːkt]; bo...
...,...,...,...,...,...,...,...,...
6084,Stanisław_Krajewski,,Stanisław Krajewski (born 1950) is a Polish ph...,,,,,Stanisław Krajewski (born 1950) is a Polish ph...
6085,Patrick_Stokes_(philosopher),[Patrick Stokes],Patrick Stokes (born 1978) is an Australian ph...,,,,,Patrick Stokes (born 1978) is an Australian ph...
6086,Ernst_Mach,[Ernst Mach],Ernst Waldfried Josef Wenzel Mach (; German: [...,,"[Gustav_Fechner, George_Berkeley, Andreas_von_...","[Pierre_Duhem, Henri_Poincaré, Ludwig_Boltzman...","[William_James, Friedrich_Hayek, Albert_Einste...",Ernst Waldfried Josef Wenzel Mach (; German: [...
6087,Jessica_Pierce,[Jessica Pierce],"Jessica Pierce (born October 21, 1965) is an A...",,,,,"Jessica Pierce (born October 21, 1965) is an A..."


In [4]:
plato_dataset

Unnamed: 0,title,url,related_entries,abstract,publication_date,authors,full_article_with_tags,bibliography,entity_name
0,Abduction,https://plato.stanford.edu/entries/abduction/,"[epistemology-bayesian, induction-problem, pei...","In the philosophical literature, the term “abd...",2011/03/09,"[Douven, Igor]","<div id=""main-text"">\n\n<h2><a name=""AbdGenIde...","<div id=""bibliography"">\n\n<h2><a name=""Bib"">B...",abduction
1,Affirmative Action,https://plato.stanford.edu/entries/affirmative...,"[equality, equal-ed-opportunity, equal-opportu...",“Affirmative action” means positive steps take...,2001/12/28,"[Fullinwider, Robert]","<div id=""main-text"">\n\n<h2 id=""Begi"">1. In th...","<div id=""bibliography"">\n\n<h2 id=""Bib"">Biblio...",affirmative-action
2,Aesthetics of the Everyday,https://plato.stanford.edu/entries/aesthetics-...,"[adorno, aesthetic-judgment, environmental-aes...","In the history of Western aesthetics, the subj...",2015/09/30,"[Saito, Yuriko]","<div id=""main-text"">\n\n<h2><a id=""RecHis"">1. ...","<div id=""bibliography"">\n\n<h2><a id=""Bib"">Bib...",aesthetics-of-everyday
3,Wittgenstein’s Aesthetics,https://plato.stanford.edu/entries/wittgenstei...,[wittgenstein],Given the extreme importance that Wittgenstein...,2007/01/26,"[Hagberg, Garry]","<div id=""main-text"">\n\n<h2><a name=""CriTraAes...","<div id=""bibliography"">\n\n<h2><a name=""Bib"">B...",wittgenstein-aesthetics
4,Schopenhauer’s Aesthetics,https://plato.stanford.edu/entries/schopenhaue...,"[aesthetic-judgment, aesthetics-18th-german, k...",The focus of this entry is on Schopenhauer’s a...,2012/05/09,"[Shapshay, Sandra]","<div id=""main-text"">\n\n<h2><a name=""BriBac"">1...","<div id=""bibliography"">\n\n<h2><a name=""Bib"">B...",schopenhauer-aesthetics
...,...,...,...,...,...,...,...,...,...
1699,Judah Abrabanel,https://plato.stanford.edu/entries/abrabanel/,"[ficino, maimonides]","Judah Abrabanel (ca. 1465–after 1521), also kn...",2005/12/02,"[Hughes, Aaron]","<div id=""main-text"">\n\n<h2><a name=""LifWor"">1...","<div id=""bibliography"">\n\n<h2><a name=""Bib"">B...",abrabanel
1700,Abner of Burgos,https://plato.stanford.edu/entries/abner-burgos/,"[aristotle-natphil, crescas, determinism-causa...",Abner of Burgos (Alfonso de Valladolid; c. 126...,2012/07/09,"[Sadik, Shalom]","<div id=""main-text"">\n\n<h2 id=""Life"">1. Life<...","<div id=""bibliography"">\n\n<h2 id=""Bib"">Biblio...",abner-burgos
1701,Abhidharma,https://plato.stanford.edu/entries/abhidharma/,"[atomism-modern, atomism-ancient, consciousnes...",The first centuries after Śākyamuni Buddha’s d...,2010/08/16,"[Ronkin, Noa]","<div id=""main-text"">\n\n<!--pdf include\n<br/>...","<div id=""bibliography"">\n\n<h2><a name=""Bib"">B...",abhidharma
1702,Peter Abelard,https://plato.stanford.edu/entries/abelard/,"[aristotle-logic, mereology-medieval, relation...",Peter Abelard (1079–21 April 1142) [‘Abailard’...,2004/08/03,"[King, Peter, Arlig, Andrew]","<div id=""main-text"">\n\n<h2><a name=""LifWor"">1...","<div id=""bibliography"">\n\n<h2><a name=""Bib"">B...",abelard


In [5]:
plato_network = plato_dataset[['entity_name', 'related_entries']].dropna()
wiki_network = wiki_dataset[['philosopher_url', 'undirected_influence']].dropna()

In [6]:
graph_plato = nx.Graph(plato_network.set_index('entity_name').to_dict()['related_entries'])
graph_wiki = nx.Graph(wiki_network.set_index('philosopher_url').to_dict()['undirected_influence'])

In [10]:
%%time
sentences_homophily_plato = Node2Vec(graph_plato, dimensions=128, p=1, q=0.5, walk_length=100, num_walks=200, workers=1)
sentences_structural_plato = Node2Vec(graph_plato, dimensions=128, p=1, q=2, walk_length=100, num_walks=200, workers=1)
sentences_homophily_wiki = Node2Vec(graph_wiki, dimensions=128, p=1, q=0.5, walk_length=100, num_walks=200, workers=1)
sentences_structural_wiki = Node2Vec(graph_wiki, dimensions=128, p=1, q=2, walk_length=100, num_walks=200, workers=1)

Computing transition probabilities:   0%|          | 0/1704 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 200/200 [22:35<00:00,  6.78s/it]


Computing transition probabilities:   0%|          | 0/1704 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 200/200 [22:34<00:00,  6.77s/it]


Computing transition probabilities:   0%|          | 0/5989 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 200/200 [1:19:28<00:00, 23.84s/it]


Computing transition probabilities:   0%|          | 0/5989 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 200/200 [1:21:01<00:00, 24.31s/it]

CPU times: user 3h 26min 51s, sys: 4min 29s, total: 3h 31min 20s
Wall time: 3h 25min 52s





In [11]:
%%time
model_structural_plato = sentences_structural_plato.fit(window=16, min_count=1, batch_words=64, workers=-1)
model_homophily_plato = sentences_homophily_plato.fit(window=16, min_count=1, batch_words=64, workers=-1)
model_structural_wiki = sentences_structural_wiki.fit(window=16, min_count=1, batch_words=64, workers=-1)
model_homophily_wiki = sentences_homophily_wiki.fit(window=16, min_count=1, batch_words=64, workers=-1)

CPU times: user 5min 26s, sys: 6.02 s, total: 5min 32s
Wall time: 5min 24s


In [13]:
structural_plato_dict = {node:model_structural_plato.wv[node] for node in graph_plato.nodes}
nodes, structural_plato_embeddings = list(pd.DataFrame(structural_plato_dict).T.index), pd.DataFrame(structural_plato_dict).T.values
structural_plato_embeddings.shape

(1704, 128)

In [15]:
homophily_plato_dict = {node:model_homophily_plato.wv[node] for node in graph_plato.nodes}
nodes, homophily_plato_embeddings = list(pd.DataFrame(homophily_plato_dict).T.index), pd.DataFrame(homophily_plato_dict).T.values
homophily_plato_embeddings.shape

(1704, 128)

In [16]:
structural_wiki_dict = {node:model_structural_wiki.wv[node] for node in graph_wiki.nodes}
nodes, structural_wiki_embeddings = list(pd.DataFrame(structural_wiki_dict).T.index), pd.DataFrame(structural_wiki_dict).T.values
structural_wiki_embeddings.shape

(5989, 128)

In [17]:
homophily_wiki_dict = {node:model_homophily_wiki.wv[node] for node in graph_wiki.nodes}
nodes, homophily_wiki_embeddings = list(pd.DataFrame(homophily_wiki_dict).T.index), pd.DataFrame(homophily_wiki_dict).T.values
homophily_wiki_embeddings.shape

(5989, 128)

In [19]:
with open('../data/structural_plato_dict.npy', 'wb') as f:
    np.save(f, structural_plato_embeddings)
with open('../data/homophily_plato_dict.npy', 'wb') as f:
    np.save(f, homophily_plato_embeddings)
with open('../data/structural_wiki_dict.npy', 'wb') as f:
    np.save(f, structural_wiki_embeddings)
with open('../data/structural_wiki_dict.npy', 'wb') as f:
    np.save(f, homophily_wiki_embeddings)

In [25]:
pd.DataFrame(nodes)

Unnamed: 0,0
0,Henry_S._Richardson
1,Gillian_Rose
2,Alexander_Zinoviev
3,Sextus_of_Chaeronea
4,Alexander_Potebnja
...,...
5984,Julian_Barbour
5985,Wilhelm_Kienzl
5986,Andreas_von_Ettingshausen
5987,Hans_Robert_Jauss


In [26]:
pd.DataFrame(graph_wiki.nodes)

Unnamed: 0,0
0,Henry_S._Richardson
1,Gillian_Rose
2,Alexander_Zinoviev
3,Sextus_of_Chaeronea
4,Alexander_Potebnja
...,...
5984,Julian_Barbour
5985,Wilhelm_Kienzl
5986,Andreas_von_Ettingshausen
5987,Hans_Robert_Jauss
