# Project (P2)

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

DATA_FOLDER = 'data/'
WIKISPEEDIA_PATHS = DATA_FOLDER + "wikispeedia_paths-and-graph/"
WIKIDATA_PATH = DATA_FOLDER + "wikidata/"

ARTICLES = WIKISPEEDIA_PATHS + "articles.tsv"
CATEGORIES = WIKISPEEDIA_PATHS + "categories.tsv"
LINKS = WIKISPEEDIA_PATHS + "links.tsv"
PATHS_FINISHED = WIKISPEEDIA_PATHS + "paths_finished.tsv"
PATHS_UNFINISHED = WIKISPEEDIA_PATHS + "paths_unfinished.tsv"

FEMALE_ACTORS = WIKIDATA_PATH + "female_actors.csv"
FEMALE_CELEB = WIKIDATA_PATH + "female_celeb.csv"
FEMALE_MODEL = WIKIDATA_PATH + "female_model.csv"

FEMALE = WIKIDATA_PATH + "female_wikidata.csv"
FEMALES = "listwomen.txt"

In [10]:
articles = pd.read_csv(ARTICLES, sep='\t', names=["article"], skiprows = 12)
categories = pd.read_csv(CATEGORIES, sep='\t', names=["article", "category"], skiprows = 13)
links = pd.read_csv(LINKS, sep='\t', names=["link_source", "link_target"], skiprows = 12)
paths_finished = pd.read_csv(PATHS_FINISHED, sep='\t', names=["hashedIpAddress", "timestamp", "durationInSec", "path", "rating"], skiprows = 15)
paths_unfinished = pd.read_csv(PATHS_UNFINISHED, sep='\t', names=["hashedIpAddress", "timestamp", "durationInSec", "path", "target", "type"], skiprows = 17)

In [11]:
female_actors = pd.read_csv(FEMALE_ACTORS, sep=',', names=["women", "women_label"], skiprows = 1)
female_celeb =  pd.read_csv(FEMALE_CELEB, sep=',', names=["women", "women_label"], skiprows = 1)
female_model =  pd.read_csv(FEMALE_MODEL, sep=',', names=["women", "women_label"], skiprows = 1)

female = pd.read_csv(FEMALE, sep=',', names=["women", "women_label"], skiprows = 1)

#### Pre-processing for merging

In [28]:
females = pd.read_csv(FEMALES, sep=',', names=["women", "women_label"], skiprows = 1)

In [29]:
paths = paths_finished["path"].str.split(";", n = 1, expand = True)

paths_finished['source'] = paths_finished['path'].str.split(';').str[0]
paths_finished['target'] = paths_finished['path'].str.split(';').str[-1]
(paths_finished.groupby('source')['path'].count()).loc[lambda x: x > 1].sort_values(ascending = False)

source
Brain                                 1092
Asteroid                              1059
Theatre                                931
Pyramid                                674
Batman                                 602
                                      ... 
Richmond%2C_Virginia                     2
Stornoway                                2
Collapse_of_the_World_Trade_Center       2
Stock_car_%28rail%29                     2
Lighthouse_of_Alexandria                 2
Name: path, Length: 4028, dtype: int64

In [30]:
paths_unfinished['source'] = paths_unfinished['path'].str.split(';').str[0]

In [31]:
women = pd.read_csv(FEMALES, sep=',', names=['Name', 'Subject'], skiprows=1)
womenName = women['Name']
women.head()

Unnamed: 0,Name,Subject
0,Bette_Davis,subject.People.Actors_models_and_celebrities
1,Britney_Spears,subject.People.Actors_models_and_celebrities
2,Diane_Keaton,subject.People.Actors_models_and_celebrities
3,Emma_Roberts,subject.People.Actors_models_and_celebrities
4,Evan_Rachel_Wood,subject.People.Actors_models_and_celebrities


In [32]:
people = categories.loc[list(map(lambda x: x.startswith('subject.People.'), categories['category']))]
people.head()

Unnamed: 0,article,category
1,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,subject.People.Historical_figures
4,%C3%89douard_Manet,subject.People.Artists
8,%C3%93engus_I_of_the_Picts,subject.People.Historical_figures
67,A._E._J._Collins,subject.People.Sports_and_games_people
88,Abbas_I_of_Persia,subject.People.Historical_figures


In [33]:
menName = people[~people['article'].isin(womenName)]['article']

paths_women_target = paths_finished[paths_finished['target'].isin(womenName)]
paths_men_target = paths_finished[paths_finished['target'].isin(menName)]

In [34]:
unpaths_women_target = paths_unfinished[paths_unfinished['target'].isin(womenName)]
unpaths_men_target = paths_unfinished[paths_unfinished['target'].isin(menName)]

In [19]:
print(f"Number of unfinished paths that have women as a target {len(unpaths_women_target)}")
print(f"Number of unfinished paths that have men as a target {len(unpaths_men_target)}")

Number of unfinished paths that have women as a target 342
Number of unfinished paths that have men as a target 2374


In [80]:
print(f"Number of finished paths that have women as a target {len(paths_women_target)}")
print(f"Number of finished paths that have men as a target {len(paths_men_target)}")

Number of finished paths that have women as a target 347
Number of finished paths that have men as a target 5566


In [85]:
print("The percentage of women targets in the People category are {:.0%}".format(len(paths_women_target)/(len(paths_women_target)+len(paths_men_target))))
print("The percentage of men targets in the People category are {:.0%}".format(len(paths_men_target)/(len(paths_women_target)+len(paths_men_target))))

The percentage of women targets in the People category are 6%
The percentage of men targets in the People category are 94%


#### MERGE 

In [20]:
comb = paths_women_target[["source","target"]].merge(paths_men_target[["source","target"]], on=['source'], how = "inner")

In [23]:
comb

Unnamed: 0,source,target_x,target_y
0,14th_century,Elizabeth_I_of_England,John_F._Kennedy
1,14th_century,Elizabeth_I_of_England,John_F._Kennedy
2,14th_century,Elizabeth_I_of_England,Henry_David_Thoreau
3,14th_century,Elizabeth_I_of_England,John_F._Kennedy
4,14th_century,Elizabeth_I_of_England,John_F._Kennedy
...,...,...,...
469,Ununoctium,Anne_Frank,Henry_IV_of_England
470,William_Pitt_the_Younger,Miranda_Otto,Socrates
471,Windows_2000,Bette_Davis,Ronald_Reagan
472,Windows_2000,Bette_Davis,Ronald_Reagan


#### Getting familiar with the merged data

In [35]:
prufa = paths_finished[paths_finished['source'] == "Windows_2000"]
prufa

Unnamed: 0,hashedIpAddress,timestamp,durationInSec,path,rating,source,target
50476,014b4d6036ffb8b8,1272461517,47,Windows_2000;North_America;Rocky_Mountains,1.0,Windows_2000,Rocky_Mountains
50477,71fb60d162b33586,1361738031,341,Windows_2000;North_America;Atlantic_Ocean;Hond...,4.0,Windows_2000,Whale_shark
50478,1163a49e1ad6af95,1361879467,353,Windows_2000;North_America;Atlantic_Ocean;Dolp...,,Windows_2000,Whale_shark
50479,606e0dce1de1789e,1249939459,256,Windows_2000;English_language;United_Kingdom;S...,3.0,Windows_2000,Liverpool_and_Manchester_Railway
50480,016b7c9754b71479,1381240053,120,Windows_2000;Microsoft;United_States;France;Su...,2.0,Windows_2000,R%C3%A9union
50481,6dc0945a51d8a4c7,1359013117,80,Windows_2000;Microsoft;United_States;George_Wa...,,Windows_2000,Ronald_Reagan
50482,1117b7cc476daae7,1359133051,100,Windows_2000;North_America;Europe;Renaissance;...,,Windows_2000,Mona_Lisa
50483,126c53a444812f50,1383746274,163,Windows_2000;Microsoft;United_States;Mexico;Ag...,3.0,Windows_2000,Avocado
50484,538533bf5bd35049,1324525766,670,Windows_2000;North_America;California;Arnold_S...,4.0,Windows_2000,Bette_Davis
50485,79a893b03dd4d771,1366290581,126,Windows_2000;North_America;United_States;Cinem...,,Windows_2000,Bette_Davis


In [21]:
p = comb[comb["source"] == "Stratford-upon-Avon"]
p

Unnamed: 0,source,target_x,target_y
131,Stratford-upon-Avon,Mary_I_of_Scotland,Winston_Churchill
132,Stratford-upon-Avon,Mary_I_of_Scotland,Tenzing_Norgay
133,Stratford-upon-Avon,Mary_I_of_Scotland,Tenzing_Norgay
134,Stratford-upon-Avon,Mary_I_of_Scotland,Tenzing_Norgay


In [122]:
sources = comb.groupby(comb["source"]).count()
print("The number of distinct sources that go both to women and men are {}".format(len(sources)))
print("The number of paths from all of these sources are {}".format(len(comb)))

The number of distinct sources that go both to women and men are 118
The number of paths from all of these sources are 474


In [129]:
df = sources.loc[sources['target_x'] != 1]
df

Unnamed: 0_level_0,target_x,target_y
source,Unnamed: 1_level_1,Unnamed: 2_level_1
14th_century,15,15
16th_century,2,2
Able_Archer_83,2,2
Acid_rain,8,8
Apollo_11,2,2
...,...,...
Vitamin_D,10,10
Walrus,2,2
Western_Sahara,3,3
Windows_2000,2,2


In [160]:
paths_women_target

Unnamed: 0,hashedIpAddress,timestamp,durationInSec,path,rating,source,target
36,565d45cd17793fc6,1258502232,340,14th_century;England;London;Spanish_Armada;Eng...,4.0,14th_century,Elizabeth_I_of_England
37,42941e9c2e2391e3,1339792936,43,14th_century;England;James_I_of_England;Elizab...,,14th_century,Elizabeth_I_of_England
38,067ebbe12b292e09,1347194180,186,14th_century;Hundred_Years%27_War;Henry_VI_of_...,,14th_century,Elizabeth_I_of_England
39,067ebbe12b292e09,1347306098,109,14th_century;Hundred_Years%27_War;Henry_VI_of_...,3.0,14th_century,Elizabeth_I_of_England
40,50a37fed218932d3,1349365685,57,14th_century;Edward_III_of_England;Windsor_Cas...,2.0,14th_century,Elizabeth_I_of_England
...,...,...,...,...,...,...,...
50485,79a893b03dd4d771,1366290581,126,Windows_2000;North_America;United_States;Cinem...,,Windows_2000,Bette_Davis
50940,3d09bfe7630f45f2,1298847424,103,C%C3%A6dmon;English_language;United_States;Cin...,,C%C3%A6dmon,Vivien_Leigh
51110,493ee14c16f55ef9,1299115757,406,J%C3%B3zef_Pi%C5%82sudski;Russia;United_Kingdo...,,J%C3%B3zef_Pi%C5%82sudski,Mary_II_of_England
51229,4d2b481a6649f6a0,1328564835,45,San_Diego_Electric_Railway;World_War_I;British...,1.0,San_Diego_Electric_Railway,Indira_Gandhi


In [None]:
women


In [24]:
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(comb)

                                                source  \
0                                         14th_century   
1                                         14th_century   
2                                         14th_century   
3                                         14th_century   
4                                         14th_century   
5                                         14th_century   
6                                         14th_century   
7                                         14th_century   
8                                         14th_century   
9                                         14th_century   
10                                        14th_century   
11                                        14th_century   
12                                        14th_century   
13                                        14th_century   
14                                        14th_century   
15                                        Christianity   
16            

### **Articles**
Contains the name of the articles

In [18]:
print(f"Number of articles in the dataset: {len(articles)}")
articles.head(10)

Number of articles in the dataset: 4604


Unnamed: 0,article
0,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in
1,%C3%85land
2,%C3%89douard_Manet
3,%C3%89ire
4,%C3%93engus_I_of_the_Picts
5,%E2%82%AC2_commemorative_coins
6,10th_century
7,11th_century
8,12th_century
9,13th_century


### Categories

Every row contains an article and its corresponding category. 

In [19]:
print(f"Number of rows in the dataset: {len(categories)}")
categories.head()

Number of rows in the dataset: 5204


Unnamed: 0,article,category
0,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,subject.History.British_History.British_Histor...
1,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,subject.People.Historical_figures
2,%C3%85land,subject.Countries
3,%C3%85land,subject.Geography.European_Geography.European_...
4,%C3%89douard_Manet,subject.People.Artists


In [32]:
# Number of articles in each category
n_artcat = categories.groupby(categories["category"]).nunique()

print(f"The number of categories is {len(n_artcat)}")
n_artcat

The number of categories is 129


Unnamed: 0_level_0,article
category,Unnamed: 1_level_1
subject.Art.Art,36
subject.Art.Artists,2
subject.Business_Studies.Business,28
subject.Business_Studies.Companies,18
subject.Business_Studies.Currency,14
...,...
subject.Science.Physics.Electricity_and_Electronics,20
subject.Science.Physics.General_Physics,55
subject.Science.Physics.Space_Astronomy,105
subject.Science.Physics.Space_transport,33


In [43]:
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(n_artcat)

                                                    article
category                                                   
subject.Art.Art                                          36
subject.Art.Artists                                       2
subject.Business_Studies.Business                        28
subject.Business_Studies.Companies                       18
subject.Business_Studies.Currency                        14
subject.Business_Studies.Economics                       28
subject.Citizenship.Animal_and_Human_Rights              17
subject.Citizenship.Community_organisations              11
subject.Citizenship.Conflict_and_Peace                   10
subject.Citizenship.Culture_and_Diversity                16
subject.Citizenship.Education                            17
subject.Citizenship.Environment                          26
subject.Citizenship.Law                                  19
subject.Citizenship.Media                                11
subject.Citizenship.Politics_and_governm

In [50]:
prufa = categories[categories["category"] == "subject.Science.Biology.Birds"]
prufa

Unnamed: 0,article,category,categories_people
144,African_Black_Oystercatcher,subject.Science.Biology.Birds,False
147,African_Darter,subject.Science.Biology.Birds,False
149,African_Grey_Hornbill,subject.Science.Biology.Birds,False
150,African_Grey_Parrot,subject.Science.Biology.Birds,False
151,African_Jacana,subject.Science.Biology.Birds,False
...,...,...,...
5056,Wigeon,subject.Science.Biology.Birds,False
5119,Wood_Pigeon,subject.Science.Biology.Birds,False
5121,Woodpecker,subject.Science.Biology.Birds,False
5136,Wren,subject.Science.Biology.Birds,False


**Actors_models_and_celebrities**

In [11]:
# Add underscore inbetween first and last names in female_actors dataframe
female_actors.women_label = list(map(lambda x: x.replace(' ', '_'), female_actors.women_label))
female_celeb.women_label = list(map(lambda x: x.replace(' ', '_'), female_celeb.women_label))
female_model.women_label = list(map(lambda x: x.replace(' ', '_'), female_model.women_label))

female.women_label = list(map(lambda x: x.replace(' ', '_'), female.women_label))

In [139]:
# Let's first take a look at the category subject.People.Actors_models_and_celebrities
wikispeedia_actors = categories[categories["category"] == "subject.People.Actors_models_and_celebrities"]
wikispeedia_actors

Unnamed: 0,article,category
276,Andrew_Robinson,subject.People.Actors_models_and_celebrities
638,Bette_Davis,subject.People.Actors_models_and_celebrities
760,Brandon_Routh,subject.People.Actors_models_and_celebrities
787,Britney_Spears,subject.People.Actors_models_and_celebrities
793,Bruce_Lee,subject.People.Actors_models_and_celebrities
971,Charlie_Chaplin,subject.People.Actors_models_and_celebrities
1017,Christian_Bale,subject.People.Actors_models_and_celebrities
1278,Daniel_Day-Lewis,subject.People.Actors_models_and_celebrities
1344,Diane_Keaton,subject.People.Actors_models_and_celebrities
1547,Emma_Roberts,subject.People.Actors_models_and_celebrities


In [135]:
act = wikispeedia_actors.merge(female_actors, left_on='article', right_on='women_label')
cele = wikispeedia_actors.merge(female_celeb, left_on='article', right_on='women_label')
mod = wikispeedia_actors.merge(female_model, left_on='article', right_on='women_label')

In [136]:
act

Unnamed: 0,article,category,women,women_label
0,Bette_Davis,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q71206,Bette_Davis
1,Emma_Roberts,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q228598,Emma_Roberts
2,Evan_Rachel_Wood,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q229230,Evan_Rachel_Wood
3,Jane_Fonda,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q41142,Jane_Fonda
4,Keira_Knightley,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q42581,Keira_Knightley
5,Miranda_Otto,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q294975,Miranda_Otto
6,Natalie_Portman,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q37876,Natalie_Portman
7,Sharon_Tate,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q228699,Sharon_Tate
8,Vivien_Leigh,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q129429,Vivien_Leigh


In [137]:
cele

Unnamed: 0,article,category,women,women_label
0,Britney_Spears,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q11975,Britney_Spears


In [138]:
mod

Unnamed: 0,article,category,women,women_label
0,Britney_Spears,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q11975,Britney_Spears
1,Emma_Roberts,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q228598,Emma_Roberts
2,Evan_Rachel_Wood,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q229230,Evan_Rachel_Wood
3,Sharon_Tate,subject.People.Actors_models_and_celebrities,http://www.wikidata.org/entity/Q228699,Sharon_Tate


In [117]:
wikispeedia_actors

Unnamed: 0,article,category
276,Andrew_Robinson,subject.People.Actors_models_and_celebrities
638,Bette_Davis,subject.People.Actors_models_and_celebrities
760,Brandon_Routh,subject.People.Actors_models_and_celebrities
787,Britney_Spears,subject.People.Actors_models_and_celebrities
793,Bruce_Lee,subject.People.Actors_models_and_celebrities
971,Charlie_Chaplin,subject.People.Actors_models_and_celebrities
1017,Christian_Bale,subject.People.Actors_models_and_celebrities
1278,Daniel_Day-Lewis,subject.People.Actors_models_and_celebrities
1344,Diane_Keaton,subject.People.Actors_models_and_celebrities
1547,Emma_Roberts,subject.People.Actors_models_and_celebrities


**TACKLE ALL PEOPLE CATEGORIES**

In [5]:
df = categories
df['categories_people'] = list(
    map(lambda x: x.startswith('subject.People.'), df['category'])) 

In [6]:
people = df[df["categories_people"] == True]
people

Unnamed: 0,article,category,categories_people
1,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,subject.People.Historical_figures,True
4,%C3%89douard_Manet,subject.People.Artists,True
8,%C3%93engus_I_of_the_Picts,subject.People.Historical_figures,True
67,A._E._J._Collins,subject.People.Sports_and_games_people,True
88,Abbas_I_of_Persia,subject.People.Historical_figures,True
...,...,...,...
5188,Zhang_Qian,subject.People.Historical_figures,True
5190,Zheng_He,subject.People.Historical_figures,True
5191,Ziad_Jarrah,subject.People.Historical_figures,True
5197,Zionism,subject.People.Political_People,True


In [7]:
prufa = people.merge(female, left_on='article', right_on='women_label')

In [8]:
prufa

Unnamed: 0,article,category,categories_people,women,women_label
0,H.D.,subject.People.Writers_and_critics,True,http://www.wikidata.org/entity/Q236469,H.D.
1,Selena,subject.People.Performers_and_composers,True,http://www.wikidata.org/entity/Q42718422,Selena
2,Selena,subject.People.Performers_and_composers,True,http://www.wikidata.org/entity/Q522995,Selena


#### Check every subgroup 

In [35]:
actors = categories[categories["category"] == "subject.People.Actors_models_and_celebrities"]
artist = categories[categories["category"] == "subject.People.Artists"]
astro = categories[categories["category"] == "subject.People.Astronomers_and_physicists"]
chemist = categories[categories["category"] == "subject.People.Chemists"]
comp = categories[categories["category"] == "subject.People.Computing_People"]
eng = categories[categories["category"] == "subject.People.Engineers_and_inventors"]
geo = categories[categories["category"] == "subject.People.Geographers_and_explorers"]
hist = categories[categories["category"] == "subject.People.Historical_figures"]

humsci = categories[categories["category"] == "subject.People.Human_Scientists"]
math = categories[categories["category"] == "subject.People.Mathematicians"]
mil = categories[categories["category"] == "subject.People.Military_People"]
mon = categories[categories["category"] == "subject.People.Monarchs_of_Great_Britain"]
perf = categories[categories["category"] == "subject.People.Performers_and_composers"]
phil = categories[categories["category"] == "subject.People.Philosophers"]
pol = categories[categories["category"] == "subject.People.Political_People"]
prod = categories[categories["category"] == "subject.People.Producers_directors_and_media_figures"] 

rel = categories[categories["category"] == "subject.People.Religious_figures_and_leaders"]
sport = categories[categories["category"] == "subject.People.Sports_and_games_people"]
usa = categories[categories["category"] == "subject.People.USA_Presidents"]
writ = categories[categories["category"] == "subject.People.Writers_and_critics"]

In [44]:
actors

Unnamed: 0,article,category,categories_people
276,Andrew_Robinson,subject.People.Actors_models_and_celebrities,True
638,Bette_Davis,subject.People.Actors_models_and_celebrities,True
760,Brandon_Routh,subject.People.Actors_models_and_celebrities,True
787,Britney_Spears,subject.People.Actors_models_and_celebrities,True
793,Bruce_Lee,subject.People.Actors_models_and_celebrities,True
971,Charlie_Chaplin,subject.People.Actors_models_and_celebrities,True
1017,Christian_Bale,subject.People.Actors_models_and_celebrities,True
1278,Daniel_Day-Lewis,subject.People.Actors_models_and_celebrities,True
1344,Diane_Keaton,subject.People.Actors_models_and_celebrities,True
1547,Emma_Roberts,subject.People.Actors_models_and_celebrities,True


In [45]:
12+1+1+10+3+1+6+6+7+2+3+7

59

### Links

Contains the source and target article that the dataset contains.

In [51]:
print(f"Number of combinations of source and target articles in the dataset: {len(links)}")
links

Number of combinations of source and target articles in the dataset: 119882


Unnamed: 0,link_source,link_target
0,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,Bede
1,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,Columba
2,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,D%C3%A1l_Riata
3,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,Great_Britain
4,%C3%81ed%C3%A1n_mac_Gabr%C3%A1in,Ireland
...,...,...
119877,Zulu,South_Africa
119878,Zulu,Swaziland
119879,Zulu,United_Kingdom
119880,Zulu,Zambia


### Finished paths

In [36]:
print(f"Number of finished paths: {len(paths_finished)}")
paths_finished.head(5)

Number of finished paths: 51318


Unnamed: 0,hashedIpAddress,timestamp,durationInSec,path,rating
0,6a3701d319fc3754,1297740409,166,14th_century;15th_century;16th_century;Pacific...,
1,3824310e536af032,1344753412,88,14th_century;Europe;Africa;Atlantic_slave_trad...,3.0
2,415612e93584d30e,1349298640,138,14th_century;Niger;Nigeria;British_Empire;Slav...,
3,64dd5cd342e3780c,1265613925,37,14th_century;Renaissance;Ancient_Greece;Greece,
4,015245d773376aab,1366730828,175,14th_century;Italy;Roman_Catholic_Church;HIV;R...,3.0


### Unfinished paths

In [38]:
print(f"Number of finished paths: {len(paths_unfinished)}")
paths_unfinished.head(5)

Number of finished paths: 24875


Unnamed: 0,hashedIpAddress,timestamp,durationInSec,path,target,type
0,2426091a53125110,1297054935,1804,Obi-Wan_Kenobi,Microsoft,timeout
1,26141fd878806294,1297055651,1805,Julius_Caesar,Caracas,timeout
2,2b015fb8181c48f2,1297090819,1818,Malawi;Democracy;Alexander_the_Great,First_Crusade,timeout
3,53a53bc244e08a6a,1297094761,49,Paraguay,Mount_St._Helens,restart
4,53a53bc244e08a6a,1297099105,1808,Paraguay;Bolivia,Mount_St._Helens,timeout
