In [1]:
from py2neo import Graph
from bs4 import BeautifulSoup
import pywikibot
import pandas as pd
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
import wikipedia
import spacy
nlp = spacy.load("en_core_web_sm")
import gensim
from collections import defaultdict
from gensim.parsing.preprocessing import strip_multiple_whitespaces, preprocess_string, remove_stopwords, strip_tags, strip_punctuation 

In [2]:
graph = Graph(password="kshen3778")

In [3]:
graph.run("MATCH (n:Category) RETURN n LIMIT 25").data()

[{'n': (_5214855:Category:Page {id: 690070, isNew: false, isRedirect: false, title: 'Futurama'})},
 {'n': (_5214856:Category:Page {id: 690451, isNew: false, isRedirect: false, title: 'World_War_II'})},
 {'n': (_5214857:Category:Page {id: 690571, isNew: false, isRedirect: false, title: 'Programming_languages'})},
 {'n': (_5214858:Category:Page {id: 690578, isNew: false, isRedirect: false, title: 'Professional_wrestling'})},
 {'n': (_5214859:Category:Page {id: 690637, isNew: false, isRedirect: false, title: 'Algebra'})},
 {'n': (_5214860:Category:Page {id: 690649, isNew: false, isRedirect: false, title: 'Anime'})},
 {'n': (_5214861:Category:Page {id: 690672, isNew: false, isRedirect: false, title: 'Abstract_algebra'})},
 {'n': (_5214862:Category:Page {id: 690747, isNew: false, isRedirect: false, title: 'Mathematics'})},
 {'n': (_5214863:Category:Page {id: 690777, isNew: false, isRedirect: false, title: 'Linear_algebra'})},
 {'n': (_5214864:Category:Page {id: 690803, isNew: false, isRedir

In [20]:
graph.run("MATCH (n:Category { title: 'Cold_War' }) RETURN n").data()

[{'n': (_5214918:Category:Page {id: 691614, isNew: false, isRedirect: false, title: 'Cold_War'})}]

In [55]:
#get all sub cat
cats = graph.run("MATCH (n:Category{title:'Constraint_logic_programming'})-[:BELONGS_TO*..1]-(p:Category) RETURN p LIMIT 100").data()

In [56]:
len(cats)

2

In [57]:
cats

[{'p': (_6124108:Category:Page {id: 22968506, isNew: false, isRedirect: false, title: 'Constraint_programming'})},
 {'p': (_5594005:Category:Page {id: 1188828, isNew: false, isRedirect: false, title: 'Logic_programming'})}]

In [27]:
getBadCategories("Artificial_intelligence")

['Areas_of_computer_science',
 'Cognitive_science',
 'Computational_neuroscience',
 'Cybernetics',
 'Emerging_technologies',
 'Formal_sciences',
 'Futurology',
 'Intelligence_by_type',
 'Personhood',
 'Technology_in_society',
 'Unsolved_problems_in_computer_science']

In [63]:
#check if a specific sub category references the select category an two way relationship (which we don't want)
subs = graph.run("MATCH (n:Category{title:'Emerging_technologies'})-[:BELONGS_TO*..1]-(p:Category) RETURN p LIMIT 100").data()

In [64]:
subs

[]

In [3]:
#verify if category actually exists on online Wikipedia
def categoryExist(name):
    res = requests.get("https://en.wikipedia.org/wiki/Category:" + name)
    soup = BeautifulSoup(res.content, "html.parser")
    div = soup.find("div", {"id": "mw-normal-catlinks"})
    if(div):
        return True
    else:
        return False

#Use beautifulsoup to scrape the bottom categories of a parent category (these we don't want)
def getBadCategories(parent):
    res = requests.get("https://en.wikipedia.org/wiki/Category:" + parent)
    soup = BeautifulSoup(res.content, "html.parser")
    div = soup.find("div", {"id": "mw-normal-catlinks"})
    if(div):
        to_return = []
        for item in div.findAll("li"):
            to_return.append("_".join(item.get_text().split(" ")))
        return to_return
    else:
        #parent does not exist
        return []

#get all immediate Wikipedia subcategories of main minus two way connections

# def getAllSubCat(parent):
#     all_cats = graph.run("MATCH (n:Category{title:'" + parent + "'})-[:BELONGS_TO*..1]-(p:Category) RETURN p LIMIT 100").data()
#     bad_cats = getBadCategories(parent)
#     filtered = []
#     for category in all_cats:
#         cat_title = category['p']["title"]
#         if (not (cat_title in bad_cats)):
#             filtered.append(cat_title)
#     #search all these categories to see if any of them match the other way (if so, we remove)
# #     filtered = []
# #     for category in all_cats:
# #         cat_title = category['p']["title"]
# #         subs = graph.run("MATCH (n:Category{title:'" + cat_title + "'})-[:BELONGS_TO*..1]-(p:Category) RETURN p LIMIT 100").data()
# #         found = False
# #         for category_2 in subs:
# #             sub_title = category_2['p']["title"]
# #             if(sub_title == parent):
# #                 found = True
# #         if (not found):
# #             filtered.append(cat_title)
    
#     return filtered

def getSubCategories(name):
    site = pywikibot.Site()
    gen = pywikibot.Category(site,'Category:' + name).subcategories(recurse=False)
    subcats = []
    for item in gen:
        subcats.append("_".join(item.aslink().split(":")[1][:-2].split(" ")))
    return subcats  

In [11]:
getAllSubCat("Works_about_personality")
#getBadCategories("Constraint_logic_programming")

['Borderline_personality_disorder_in_fiction',
 'Histrionic_personality_disorder_in_fiction']

In [4]:
blacklist = ["July_events"] #nodes that should not be visited
all_nodes = []
all_relations = [] #child parent

def buildGraph(root, path):
    path = path + "/" + root
    print("Current Path: " + path)
    print()
    if((not categoryExist(root)) or (root in blacklist)):
        print("Category DNE or is BLACKLISTED")
        print()
        return
    
    #add the node
    all_nodes.append(root)
    
    #get all subcategories
    sub_cat = getSubCategories(root)
    if(len(sub_cat) == 0): #base case
        return

    
    #add all the relationships
    for item in sub_cat:
        #check if this relationship already exists (cycle)
        relation = item + " " + root
        if(relation in all_relations):
            print("Relation already exists: ", relation)
            print()
            return
        all_relations.append(item + " " + root)
   
    #recurse on all sub nodes
    for item in sub_cat:
        buildGraph(item, path)


In [5]:
buildGraph("Roman_Republic", "")

Current Path: /Roman_Republic

Current Path: /Roman_Republic/Ancient_Roman_Republican_art

Current Path: /Roman_Republic/Government_of_the_Roman_Republic

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province/Lists_of_Roman_governors

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province/Roman_governors_of_Achaea

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province/Roman_governors_of_Africa

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province/Roman_governors_of_Arabia_Petraea

Current Path: /Roman_Repub

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province/Roman_governors_of_Syria/2nd-century_Roman_governors_of_Syria

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province/Roman_governors_of_Syria/3rd-century_Roman_governors_of_Syria

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province/Roman_governors_of_Thracia

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Ancient_Roman_governors_by_province/Roman_governors_of_Tuscia_et_Umbria

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Byzantine_governors

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancient_Roman_governors/Byzantine_governors/Governors_of_the_Anatolic_Theme

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Ancien

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Roman_consuls/Medieval_Roman_consuls

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Roman_consuls/Medieval_Roman_consuls/Charlemagne

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Roman_consuls/Medieval_Roman_consuls/Charlemagne/Cultural_depictions_of_Charlemagne

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Roman_consuls/Medieval_Roman_consuls/Charlemagne/Family_of_Charlemagne

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Roman_consuls/Medieval_Roman_consuls/Charlemagne/Family_of_Charlemagne/Children_of_Charlemagne

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Roman_consuls/Medieval_Roman_consul

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Late_Roman_Empire_political_office-holders/Byzantine_titles_and_offices/Byzantine_court_titles/Despots_(court_title)/Despotates/Despotate_of_Epirus/People_of_the_Despotate_of_Epirus/Illegitimate_children_of_despots_of_Epirus

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Late_Roman_Empire_political_office-holders/Byzantine_titles_and_offices/Byzantine_court_titles/Despots_(court_title)/Despotates/Despotate_of_Epirus/People_of_the_Despotate_of_Epirus/Prisoners_and_detainees_of_the_Despotate_of_Epirus

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Late_Roman_Empire_political_office-holders/Byzantine_titles_and_offices/Byzantine_court_titles/Despots_(court_title)/Despotates/Despotate_of_Epirus/People_of_the_Despotate_of_Epirus/Women_of_the_Despotate_of_Epirus

Current Path

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Late_Roman_Empire_political_office-holders/Byzantine_titles_and_offices/Byzantine_court_titles/Vestarchai

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Late_Roman_Empire_political_office-holders/Byzantine_titles_and_offices/Byzantine_titles_and_offices_reserved_for_eunuchs

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Late_Roman_Empire_political_office-holders/Byzantine_titles_and_offices/Byzantine_imperial_titles

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Late_Roman_Empire_political_office-holders/Byzantine_titles_and_offices/Byzantine_imperial_titles/Nobilissimi

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Late_Roman_Empire_political_office-holde

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Ancient_Roman_priests/Priests_of_the_Roman_Republic/Priestesses_of_the_Roman_Republic

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Ancient_Roman_priests/Ancient_Roman_religious_titles

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Ancient_Roman_priests/Ancient_Roman_religious_titles/Vestal_Virgins

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Ancient_Roman_proconsuls

Relation already exists:  Imperial_Roman_proconsuls Ancient_Roman_proconsuls

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Roman_quaestors

Current Path: /Roman_Republic/Government_of_the_Roman_Republic/Political_office-holders_in_ancient_Rome/Roman_tribunes_of_the_plebs

Current Path: /R

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/Establishments_in_the_Roman_Republic_by_century/3rd-century_BC_establishments_in_the_Roman_Republic/Gladiatorial_combat/Gladiatorial_games_in_fiction/Fictional_gladiators

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/Establishments_in_the_Roman_Republic_by_century/3rd-century_BC_establishments_in_the_Roman_Republic/Gladiatorial_combat/Gladiatorial_games_in_fiction/Films_about_gladiatorial_combat

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/Establishments_in_the_Roman_Republic_by_century/3rd-century_BC_establishments_in_the_Roman_Republic/Gladiatorial_combat/Gladiatorial_games_in_fiction/Gladiator_(novel_series)

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/Establishments_in_the_Roman_Republic_by_century/3rd-century_BC_establishments_in_the_Roman_Republic/G

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_women/Servilia/Family_of_Servilia/Children_of_Servilia/Brutus

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_women/Servilia/Family_of_Servilia/Children_of_Servilia/Brutus/Cultural_depictions_of_Marcus_Junius_Brutus_the_Younger

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_women/Servilia/Family_of_Servilia/Children_of_Servilia/Brutus/Cultural_depictions_of_Marcus_Junius_Brutus_the_Younger/Julius_Caesar_(play)

Relation already exists:  Works_based_on_Julius_Caesar_(play) Julius_Caesar_(play)

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_R

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Cultural_depictions_of_Julius_Caesar/Cultural_depictions_of_Julius_Caesar's_mistresses/Cultural_depictions_of_Cleopatra/Depictions_of_Cleopatra_in_literature

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Cultural_depictions_of_Julius_Caesar/Cultural_depictions_of_Julius_Caesar's_mistresses/Cultural_depictions_of_Cleopatra/Depictions_of_Cleopatra_in_literature/Fictional_depictions_of_Cleopatra_in_literature

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Cultural_depictions_of_Julius_Caesar/Cultural_depictions_of_Julius_Caesar's_mistresses/Cultural_depictions_of_Cleopatra/Depictions_of_Cleop

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Cultural_depictions_of_Julius_Caesar/Paintings_depicting_Julius_Caesar

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Cultural_depictions_of_Julius_Caesar/Paintings_depicting_Julius_Caesar/Paintings_of_Julius_Caesar_and_Cleopatra

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Cultural_depictions_of_Julius_Caesar/Paintings_depicting_Julius_Caesar/Paintings_of_the_death_of_Julius_Caesar

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Cultural_depictions_of_Julius_Caesar/Depict

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Family_of_Julius_Caesar/Children_of_Julius_Caesar/Augustus/Family_of_Augustus/Children_of_Augustus/Tiberius/Children_of_Tiberius/Germanicus/Children_of_Germanicus/Caligula/Cultural_depictions_of_Caligula/Depictions_of_Caligula_in_literature

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Family_of_Julius_Caesar/Children_of_Julius_Caesar/Augustus/Family_of_Augustus/Children_of_Augustus/Tiberius/Children_of_Tiberius/Germanicus/Children_of_Germanicus/Caligula/Cultural_depictions_of_Caligula/Depictions_of_Caligula_in_literature/Fictional_depictions_of_Caligula_in_literature

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-centur

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Gallic_Wars/People_of_the_Gallic_Wars/Barbarian_people_of_the_Gallic_Wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Gallic_Wars/People_of_the_Gallic_Wars/Roman_people_of_the_Gallic_Wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Gallic_Wars/Battles_of_the_Gallic_Wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Gallic_Wars/Campaigns_of_the_Gallic_Wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_th

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Monuments_and_memorials_to_Julius_Caesar/Things_named_after_Julius_Caesar/Caesars_Entertainment_Corporation/World_Series_of_Poker/World_Series_of_Poker_bracelet_winners

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Monuments_and_memorials_to_Julius_Caesar/Things_named_after_Julius_Caesar/Caesars_Entertainment_Corporation/World_Series_of_Poker/World_Series_of_Poker_bracelet_winners/World_Series_of_Poker_Main_Event_winners

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Monuments_and_memorials_to_Julius_Caesar/Things_named_after_Julius_Caesar/Caesars_Entertainment_Corporation/World_Series_of_

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/People_associated_with_Julius_Caesar/Military_personnel_of_Julius_Caesar/Roman_legions_involved_in_Caesar's_invasions_of_Britain

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Works_about_Julius_Caesar

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Works_by_Julius_Caesar

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/Julius_Caesar/Works_by_Julius_Caesar/Quotes_by_Julius_Caesar

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_t

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_poets/Virgil/Aeneid/Characters_in_the_Aeneid/Characters_in_Book_VI_of_the_Aeneid/Kings_of_Rome/Cultural_depictions_of_Roman_kings

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_poets/Virgil/Aeneid/Characters_in_the_Aeneid/Characters_in_Book_VI_of_the_Aeneid/Kings_of_Rome/Cultural_depictions_of_Roman_kings/Cultural_depictions_of_Romulus_and_Remus

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_poets/Virgil/Aeneid/Characters_in_the_Aeneid/Characters_in_Book_VI_of_the_Aeneid/Kings_of_Rome/Cultural_depictions_of_Roman_kings/Cultural_depictions_of_Servius_Tullius

Current Pat

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_poets/Virgil/Aeneid/Characters_in_the_Aeneid/Deities_in_the_Aeneid/Mercury_(mythology)

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_poets/Virgil/Aeneid/Characters_in_the_Aeneid/Deities_in_the_Aeneid/Mercury_(mythology)/Temples_of_Mercury

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_poets/Virgil/Aeneid/Characters_in_the_Aeneid/Phoenician_characters_in_the_Aeneid

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/1st_century_BC_in_the_Roman_Republic/1st-century_BC_Romans/1st-century_BC_Roman_poets/Virgil/Aeneid/Works_based_on_

Relation already exists:  Cultural_depictions_of_Servilia Servilia

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/2nd_century_BC_in_the_Roman_Republic/Cimbrian_War

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/2nd_century_BC_in_the_Roman_Republic/Cimbrian_War/People_of_the_Cimbrian_War

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/2nd_century_BC_in_the_Roman_Republic/Cimbrian_War/Battles_of_the_Cimbrian_War

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/2nd_century_BC_in_the_Roman_Republic/2nd-century_BC_establishments_in_the_Roman_Republic

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/2nd_century_BC_in_the_Roman_Republic/2nd_century_BC_in_Hispania

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/2nd_century_BC_in

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/3rd_century_BC_in_the_Roman_Republic/Pyrrhic_War/Battles_of_the_Pyrrhic_War

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/3rd_century_BC_in_the_Roman_Republic/Roman–Greek_wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/4th_century_BC_in_the_Roman_Republic

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/4th_century_BC_in_the_Roman_Republic/4th-century_BC_Romans

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/4th_century_BC_in_the_Roman_Republic/4th-century_BC_establishments_in_the_Roman_Republic

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the_Roman_Republic/4th_century_BC_in_the_Roman_Republic/Sicilian_Wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Centuries_in_the

Relation already exists:  People_of_the_Cimbrian_War Cimbrian_War

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman_Republican_civil_wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman_Republican_civil_wars/Roman_Servile_Wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman_Republican_civil_wars/Roman_Servile_Wars/Battles_of_the_Roman_Servile_Wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman_Republican_civil_wars/Roman_Servile_Wars/Rebel_slaves_in_ancient_Rome

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman_Republican_civil_wars/Roman_Servile_Wars/Rebel_slaves_in_ancient_Rome/Spartacus

Relation already exists:  Cultural_depictions_of_Spartacus Spartacus

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_invol

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman–Persian_Wars/Roman–Sasanian_Wars/Byzantine–Sasanian_War_of_602–628/Sasanian_Egypt

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman–Persian_Wars/Roman–Sasanian_Wars/Byzantine–Sasanian_War_of_602–628/Sasanian_Egypt/Sasanian_governors_of_Egypt

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman–Persian_Wars/Roman–Sasanian_Wars/Anastasian_War

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman–Persian_Wars/Roman–Sasanian_Wars/Battles_of_the_Roman–Sasanian_Wars

Relation already exists:  Battles_of_the_Byzantine–Sasanian_War_of_602–628 Battles_of_the_Roman–Sasanian_Wars

Current Path: /Roman_Republic/History_of_the_Roman_Republic/Wars_involving_the_Roman_Republic/Roman–Persian_Wars/Roman–Sasanian_Wars/Iberian_War

Current Path: /Roman_Republic/H

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_families/Gausian_dynasty

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_families/Gausian_dynasty/Burial_sites_of_the_Gausian_dynasty

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_families/Harodingian_dynasty

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_families/Harodingian_dynasty/Burial_sites_of_the_Harodingian_dynasty

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_families/Herbertien_dynasty

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_families/Herbertien_dynasty/Burial_sites_of_the_Herbertien_dynasty

Current Path: /Roman_Repu

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_people/Princes_of_Benevento

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_people/Princes_of_Salerno

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_people/Lombard_warriors

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_people/Lombard_women

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_people/Lombard_women/Lombard_princesses

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Lombards/Lombard_people/Lombard_women/Lombardic_queens_consort

Current Path: /Roman_Republic/Military_units_and_formations_of_the_Roman_Republic/Foederati/Suebi/Marcomanni

Current P

Current Path: /Roman_Republic/People_of_the_Roman_Republic/Jews_and_Judaism_in_the_Roman_Republic/Herodian_kingdom/Establishments_in_the_Herodian_kingdom

Current Path: /Roman_Republic/People_of_the_Roman_Republic/Jews_and_Judaism_in_the_Roman_Republic/Herodian_kingdom/Establishments_in_the_Herodian_kingdom/1st-century_BC_establishments_in_Judea

Current Path: /Roman_Republic/People_of_the_Roman_Republic/Jews_and_Judaism_in_the_Roman_Republic/Herodian_Tetrarchy

Current Path: /Roman_Republic/People_of_the_Roman_Republic/Jews_and_Judaism_in_the_Roman_Republic/Herodian_Tetrarchy/Establishments_in_the_Herodian_Tetrarchy

Current Path: /Roman_Republic/People_of_the_Roman_Republic/Pontifices_Maximi_of_the_Roman_Republic

Current Path: /Roman_Republic/People_of_the_Roman_Republic/Roman_Republican_praetors

Current Path: /Roman_Republic/People_of_the_Roman_Republic/Religious_leaders_of_the_Roman_Republic

Current Path: /Roman_Republic/People_of_the_Roman_Republic/Religious_leaders_of_the_Roma

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/Roman-era_Egyptians/2nd-century_people_of_Roman_Egypt

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/Roman-era_Egyptians/3rd-century_people_of_Roman_Egypt

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/Roman-era_Egyptians/3rd-century_people_of_Roman_Egypt/3rd-century_Egyptian_women

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/Roman-era_Egyptians/4th-century_Egyptian_people

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/Roman-era_Egyptians/4th-century_Egyptian_people/4th-century_Egyptian_women

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/Roman-era_Egyptians/4th-century_Egyptian_people/4th-century_Egyptian_women/Hypatia

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/Roman-era_Egyptians/4th-century_Egyptian_people/4th-century_Egyptian_women/Hypati

Relation already exists:  5th-century_Egyptian_people 5th_century_in_Byzantine_Egypt

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/1st_millennium_in_Roman_Egypt/6th_century_in_Byzantine_Egypt

Relation already exists:  6th-century_Egyptian_people 6th_century_in_Byzantine_Egypt

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/1st_millennium_in_Roman_Egypt/7th_century_in_Byzantine_Egypt

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/1st_millennium_in_Roman_Egypt/1st-millennium_establishments_in_Roman_Egypt

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/1st_millennium_in_Roman_Egypt/1st-millennium_establishments_in_Roman_Egypt/1st-century_establishments_in_Roman_Egypt

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Egypt/1st_millennium_in_Roman_Egypt/1st-millennium_establishments_in_Roman_Egypt/2nd-century_establishments_in_Roman_Egypt

Current Path: /Roman_R

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis/Asterix

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis/Asterix/Asterix_books

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis/Asterix/Asterix_characters

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis/Asterix/Asterix_films

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis/Asterix/Video_games_based_on_Asterix

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis/Asterix/Asterix_images

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis/Roman_fortifications_in_Gallia_Lugdunensis

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Gallia_Lugdunensis/Roman_governors_of_Gallia_Lugdunensis

Current Path: /Roman_Republic/Provinces_of

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Macedonia/Roman_Thessalonica/Second_Epistle_to_the_Thessalonians/Second_Epistle_to_the_Thessalonians_papyri

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Macedonia/Third_Macedonian_War

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Asia

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Asia/Roman_fortifications_in_Roman_Asia

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Asia/Roman_fortifications_in_Roman_Asia/Amorium

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Asia/Roman_fortifications_in_Roman_Asia/Amorium/People_from_Amorium

Current Path: /Roman_Republic/Provinces_of_the_Roman_Republic/Roman_Asia/Roman_governors_of_Asia

Current Path: /Roman_Republic/Reform_in_the_Roman_Republic

Current Path: /Roman_Republic/Socii

Current Path: /Roman_Republic/Treaties_of_the_Roman_Republic

Current Path: /Roman_Republic

In [22]:
len(all_nodes)

1121

In [8]:
all_relations

['Ancient_Roman_Republican_art Roman_Republic',
 'Government_of_the_Roman_Republic Roman_Republic',
 'History_of_the_Roman_Republic Roman_Republic',
 'Military_units_and_formations_of_the_Roman_Republic Roman_Republic',
 'Old_Latin_literature Roman_Republic',
 'Optimates Roman_Republic',
 'People_of_the_Roman_Republic Roman_Republic',
 'Populares Roman_Republic',
 'Provinces_of_the_Roman_Republic Roman_Republic',
 'Reform_in_the_Roman_Republic Roman_Republic',
 'Socii Roman_Republic',
 'Treaties_of_the_Roman_Republic Roman_Republic',
 'Tribes_conquered_by_Roman_republic Roman_Republic',
 'Ancient_Roman_governors Government_of_the_Roman_Republic',
 'Political_office-holders_in_ancient_Rome Government_of_the_Roman_Republic',
 'Roman_Republican_consuls Government_of_the_Roman_Republic',
 'First_Triumvirate Government_of_the_Roman_Republic',
 'Roman_Republican_praetors Government_of_the_Roman_Republic',
 'Roman_Senate Government_of_the_Roman_Republic',
 'Senators_of_the_Roman_Republic Gove

In [16]:
#wikipedia.search("Herbertien_dynasty")
wikipedia.search(line.rstrip("Herbertien_dynasty"))[0]

'Pepin, Count of Vermandois'

In [19]:
try:
    p = wikipedia.page('Pepin, Count of Vermandois')
    print(p.content)
except wikipedia.DisambiguationError as e:
    print(e.options[0])
except Exception:
    print("Caught")

Caught


In [9]:
#save graph to files
with open('Roman_republic_nodes.txt', 'w') as f:
    for item in all_nodes:
        f.write("%s\n" % item)

with open('Roman_republic_relations.txt', 'w') as f:
    for item in all_relations:
        f.write("%s\n" % item)        

In [10]:
def preprocess(x):
    x = gensim.utils.simple_preprocess(x)
    x = " ".join(x)
    
    CUSTOM_FILTERS = [lambda x: x.lower(), strip_tags, strip_punctuation, remove_stopwords]
    x = preprocess_string(x, CUSTOM_FILTERS)
    return x

In [20]:
#Extract wikipedia articles (to convert into feature vectors)
file = open("Roman_republic_nodes.txt", "r")
articles_cleaned = []
freq_threshold = 3
for line in file:
    print("=====" + line + "=======")
    search = wikipedia.search(line.rstrip("\n"))[0]
    try:
        page = wikipedia.page(search)
    except wikipedia.DisambiguationError as e:
        page = wikipedia.page(e.options[0])
    except Exception:
        articles_cleaned.append("")
        continue;
    #page = wikipedia.page(wikipedia.search(line.rstrip("\n"))[0])
    content = page.content
    cleaned = preprocess(content) #strip punctuations and stopwords and weird characters
    cleaned = " ".join(cleaned)
    doc = nlp(cleaned)
    lemm = [token.lemma_ for token in doc]
    d = defaultdict(int)
    for item in lemm:
        d[item] += 1
    tokens=[key for key,value in d.items() if value>freq_threshold] #all words with frequency of more than some number
    texts = [word for word in lemm if word in tokens]
    texts = " ".join(texts)
    articles_cleaned.append(texts)
    #print(texts)
file.close()

=====Roman_Republic
=====Ancient_Roman_Republican_art
=====Government_of_the_Roman_Republic
=====Ancient_Roman_governors
=====Ancient_Roman_governors_by_province
=====Lists_of_Roman_governors
=====Roman_governors_of_Achaea
=====Roman_governors_of_Africa
=====Roman_governors_of_Arabia_Petraea
=====Roman_governors_of_Asia
=====Roman_governors_of_Bithynia_and_Pontus
=====Roman_governors_of_Britain
=====Roman_governors_of_Campania
=====Roman_governors_of_Cappadocia
=====Roman_governors_of_Cilicia
=====Roman_governors_of_Crete_and_Cyrenaica
=====Roman_governors_of_Cyprus
=====Roman_governors_of_Dacia
=====Roman_governors_of_Dalmatia
=====Roman_governors_of_Egypt
=====Roman_governors_of_Galatia
=====Roman_governors_of_Gallia_Aquitania
=====Roman_governors_of_Gallia_Belgica
=====Roman_governors_of_Gallia_Lugdunensis
=====Roman_governors_of_Gallia_Narbonensis
=====Roman_governors_of_Gaul
=====Praetorian_prefects_of_Gaul
=====Roman_governors_of_Germania_Inferior
=====Roman_governors_of_Germania

=====Praetorian_prefects_of_the_East
=====Praetorian_prefects_of_Gaul
=====Praetorian_prefects_of_the_Illyricum
=====Praetorian_prefects_of_Italy
=====Praetorian_prefects
=====Roman_praetors
=====Imperial_Roman_praetors
=====Roman_Republican_praetors
=====Ancient_Roman_priests
=====Roman_augurs
=====Augurs_of_the_Roman_Republic
=====Augurs_of_the_Roman_Empire
=====Ancient_Roman_priestesses
=====Priestesses_of_the_Roman_Empire
=====Priestesses_of_the_Roman_Republic
=====Vestal_Virgins
=====Priests_of_the_Roman_Empire
=====Augurs_of_the_Roman_Empire
=====Pontifices_Maximi_of_the_Roman_Empire
=====Priestesses_of_the_Roman_Empire
=====Priests_of_the_Roman_Republic
=====Augurs_of_the_Roman_Republic
=====Pontifices_Maximi_of_the_Roman_Republic
=====Priestesses_of_the_Roman_Republic
=====Ancient_Roman_religious_titles
=====Vestal_Virgins
=====Ancient_Roman_proconsuls
=====Roman_quaestors
=====Roman_tribunes_of_the_plebs
=====Ancient_Roman_senators
=====Senators_of_the_Roman_Empire
=====Senato

=====Cultural_depictions_of_Germanicus
=====Generals_of_Tiberius
=====Cultural_depictions_of_Tiberius
=====Wives_of_Tiberius
=====Wives_of_Augustus
=====Generals_of_Augustus
=====Cultural_depictions_of_Augustus
=====Augustus_in_Ancient_Roman_sculpture
=====Depictions_of_Augustus_on_film
=====Films_based_on_Antony_and_Cleopatra
=====Films_based_on_Julius_Caesar_(play)
=====Julius_Caesar_(play)
=====Depictions_of_Augustus_in_literature
=====Fictional_depictions_of_Augustus_in_literature
=====Depictions_of_Augustus_on_television
=====Augustan_sculptures
=====Augustus_in_Ancient_Roman_sculpture
=====Caesarion
=====Cultural_depictions_of_Caesarion
=====Gallic_Wars
=====People_of_the_Gallic_Wars
=====Barbarian_people_of_the_Gallic_Wars
=====Roman_people_of_the_Gallic_Wars
=====Battles_of_the_Gallic_Wars
=====Campaigns_of_the_Gallic_Wars
=====Caesar's_invasions_of_Britain
=====Individuals_involved_in_Caesar's_invasions_of_Britain
=====Ancient_Romans_involved_in_Caesar's_invasions_of_Britain
=

=====Roman_people_of_the_Second_Punic_War
=====Roman_commanders_of_the_Second_Punic_War
=====3rd-century_BC_establishments_in_the_Roman_Republic
=====First_Macedonian_War
=====First_Punic_War
=====Battles_of_the_First_Punic_War
=====Naval_battles_of_the_First_Punic_War
=====People_of_the_First_Punic_War
=====First_Punic_War_commanders
=====Carthaginian_commanders_of_the_First_Punic_War
=====Roman_commanders_of_the_First_Punic_War
=====Roman_people_of_the_First_Punic_War
=====3rd_century_BC_in_Hispania
=====3rd-century_BC_establishments_in_Spain
=====Second_Punic_War
=====Battles_of_the_Second_Punic_War
=====Naval_battles_of_the_Second_Punic_War
=====Second_Punic_War_films
=====People_of_the_Second_Punic_War
=====Second_Punic_War_commanders
=====Carthaginian_commanders_of_the_Second_Punic_War
=====Roman_commanders_of_the_Second_Punic_War
=====Roman_people_of_the_Second_Punic_War
=====Pyrrhic_War
=====Battles_of_the_Pyrrhic_War
=====Roman–Greek_wars
=====4th_century_BC_in_the_Roman_Repub

=====9th-century_Icelandic_people
=====9th-century_Icelandic_women
=====9th-century_Norwegian_people
=====9th-century_Norwegian_women
=====9th-century_Norwegian_poets
=====9th-century_Saxon_people
=====9th-century_Swedish_people
=====9th-century_Swedish_women
=====10th-century_Lombard_people
=====11th-century_Lombard_people
=====12th-century_Lombard_people
=====Counts_of_Capua
=====Dukes_of_Benevento
=====Lombard_kings
=====Lombard_monks
=====Princes_of_Benevento
=====Princes_of_Salerno
=====Lombard_warriors
=====Lombard_women
=====Lombard_princesses
=====Lombardic_queens_consort
=====Marcomanni
=====Marcomannic_people
=====Quadi
=====Suebian_people
=====Suebian_kings
=====Warini
=====Old_Latin_literature
=====Old_Latin-language_writers
=====Plays_by_Plautus
=====Optimates
=====Cato_the_Younger
=====People_of_the_Roman_Republic
=====Women_of_the_Roman_Republic
=====Servilia
=====Julius_Caesar
=====Cicero
=====Roman_Republican_consuls
=====People_executed_by_the_Roman_Republic
=====Jews

=====Oxyrhynchus_papyri_vol._IV
=====Oxyrhynchus_papyri_vol._V
=====Oxyrhynchus_papyri_vol._VI
=====Oxyrhynchus_papyri_vol._VII
=====Oxyrhynchus_papyri_vol._VIII
=====Oxyrhynchus_papyri_vol._IX
=====Oxyrhynchus_papyri_vol._X
=====Oxyrhynchus_papyri_vol._XI
=====Oxyrhynchus_papyri_vol._XII
=====Oxyrhynchus_papyri_vol._XIII
=====Oxyrhynchus_papyri_vol._XV
=====Oxyrhynchus_papyri_vol._XVIII
=====Oxyrhynchus_papyri_vol._XXII
=====Oxyrhynchus_papyri_vol._XXIV
=====Oxyrhynchus_papyri_vol._XXXIV
=====Oxyrhynchus_papyri_vol._XLI
=====Oxyrhynchus_papyri_vol._XLII
=====Oxyrhynchus_papyri_vol._L
=====Oxyrhynchus_papyri_vol._LVIII
=====Oxyrhynchus_papyri_vol._LXIV
=====Oxyrhynchus_papyri_vol._LXV
=====Oxyrhynchus_papyri_vol._LXVI
=====Oxyrhynchus_papyri_vol._LXXI
=====Oxyrhynchus_papyri_vol._LXXII
=====Oxyrhynchus_papyri_vol._LXXIII
=====Oxyrhynchus_papyri_vol._LXXIV
=====Oxyrhynchus_papyri_vol._LXXVII
=====Roman_fortifications_in_Egypt
=====Roman_fortified_camps_in_Egypt
=====Roman_legionary_fort

In [21]:
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(articles_cleaned)
print(vectorizer.get_feature_names())
print(vectors.shape)

(1121, 7034)


In [23]:
article_names = []
file = open("Roman_republic_nodes.txt", "r")
for line in file:
    article_names.append(line.rstrip("\n"))
file.close()

In [24]:
vectors

<1121x7034 sparse matrix of type '<class 'numpy.float64'>'
	with 86605 stored elements in Compressed Sparse Row format>

In [25]:
df = pd.DataFrame(vectors.toarray(), columns=vectorizer.get_feature_names())

In [26]:
df["article_name"] = article_names

In [27]:
#save to a file
df.to_hdf("Roman_republic_embeddings.h5", key='df')

In [28]:
#read from file
pd.read_hdf("Roman_republic_embeddings.h5", 'df')

Unnamed: 0,aachen,ab,abandon,abbandonata,abbasid,abbey,abbie,abbot,abdel,abdicate,...,đurađ,šokci,και,κλεοπάτρας,μάγιστρος,πρωτοσπαθάριος,τη,හල,ῥωμαίων,article_name
0,0.0,0.0,0.015866,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Roman_Republic
1,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Ancient_Roman_Republican_art
2,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Government_of_the_Roman_Republic
3,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Ancient_Roman_governors
4,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Ancient_Roman_governors_by_province
5,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Lists_of_Roman_governors
6,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Roman_governors_of_Achaea
7,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Roman_governors_of_Africa
8,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Roman_governors_of_Arabia_Petraea
9,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,Roman_governors_of_Asia
