# Load the CSV data

Load the data and do some exploring.

Paper:
https://www.nature.com/articles/s41597-022-01369-4

In [3]:
import pandas as pd

In [4]:
df = pd.read_csv('./cross-verified-database.csv', encoding='unicode_escape')

In [5]:
df.shape

(2291817, 49)

In [3]:
print(df.columns)

Index(['wikidata_code', 'birth', 'death', 'updated_death_date', 'approx_birth',
       'approx_death', 'birth_min', 'birth_max', 'death_min', 'death_max',
       'gender', 'level1_main_occ', 'name', 'un_subregion', 'birth_estimation',
       'death_estimation', 'bigperiod_birth_graph_b',
       'bigperiod_death_graph_b', 'curid', 'level2_main_occ', 'freq_main_occ',
       'freq_second_occ', 'level2_second_occ', 'level3_main_occ',
       'bigperiod_birth', 'bigperiod_death', 'wiki_readers_2015_2018',
       'non_missing_score', 'total_count_words_b', 'number_wiki_editions',
       'total_noccur_links_b', 'sum_visib_ln_5criteria',
       'ranking_visib_5criteria', 'all_geography_groups',
       'string_citizenship_raw_d', 'citizenship_1_b', 'citizenship_2_b',
       'list_areas_of_rattach', 'area1_of_rattachment', 'area2_of_rattachment',
       'list_wikipedia_editions', 'un_region', 'group_wikipedia_editions',
       'bplo1', 'dplo1', 'bpla1', 'dpla1', 'pantheon_1', 'level3_all_occ'],
 

In [4]:
person = df.loc[df['name'].str.contains('Su_Shi')]

In [5]:
person2 = df.loc[df['name'].str.contains('Liu_Cixin')]

In [6]:
print(person2)

        wikidata_code   birth  death  updated_death_date approx_birth  \
1833100       Q607588  1963.0    NaN                 NaN          NaN   

        approx_death  birth_min  birth_max  death_min  death_max  ...  \
1833100          NaN     1963.0     1963.0        NaN        NaN  ...   

        area2_of_rattachment  \
1833100              Missing   

                                   list_wikipedia_editions un_region  \
1833100  zhwiki|enwiki|eswiki|frwiki|jawiki|ruwiki|itwi...      Asia   

        group_wikipedia_editions       bplo1  dplo1     bpla1 dpla1  \
1833100                      grA  116.407524    NaN  39.90403   NaN   

         pantheon_1                                     level3_all_occ  
1833100           0  D:_writer_writer_P:_science_writer_literary_En...  

[1 rows x 49 columns]


In [7]:
print(person)

        wikidata_code   birth   death  updated_death_date approx_birth  \
1220507        Q36020  1037.0  1101.0                 NaN          NaN   
2228792      Q8292372  1950.0     NaN                 NaN          NaN   

        approx_death  birth_min  birth_max  death_min  death_max  ...  \
1220507          NaN     1037.0     1037.0     1101.0     1101.0  ...   
2228792          NaN     1950.0     1950.0        NaN        NaN  ...   

        area2_of_rattachment  \
1220507              Missing   
2228792              Missing   

                                   list_wikipedia_editions un_region  \
1220507  zh_min_nanwiki|zhwiki|jvwiki|bowiki|plwiki|frw...      Asia   
2228792                        zhwiki|jawiki|enwiki|viwiki      Asia   

        group_wikipedia_editions       bplo1      dplo1      bpla1      dpla1  \
1220507                      grA  103.838058  119.96917  30.057461  31.812166   
2228792                      grA  118.479721        NaN  36.696667        NaN   


In [6]:
pd.set_option('display.max_columns', None)

# How to convert CSV to database

In [67]:
from sqlalchemy import create_engine

In [68]:
engine = create_engine('postgresql://postgres:891518@localhost:5432/notable_individuals')

In [48]:
df.to_sql('individuals', engine)

817

# How to get the date of birth from Wikidata

In [19]:
from wikidata.client import Client

In [20]:
client = Client() 

In [21]:
p1 = client.get("Q607588", load=True)

In [22]:
p1.description

m'Chinese science fiction writer'

In [23]:
p1.attributes

{'pageid': 572063,
 'ns': 0,
 'title': 'Q607588',
 'lastrevid': 1995571970,
 'modified': '2023-10-23T07:56:35Z',
 'type': 'item',
 'id': 'Q607588',
 'labels': {'zh': {'language': 'zh', 'value': '刘慈欣'},
  'en': {'language': 'en', 'value': 'Liu Cixin'},
  'en-ca': {'language': 'en-ca', 'value': 'Liu Cixin'},
  'en-gb': {'language': 'en-gb', 'value': 'Liu Cixin'},
  'es': {'language': 'es', 'value': 'Liu Cixin'},
  'fr': {'language': 'fr', 'value': 'Liu Cixin'},
  'ja': {'language': 'ja', 'value': '劉慈欣'},
  'zh-hans': {'language': 'zh-hans', 'value': '刘慈欣'},
  'zh-hant': {'language': 'zh-hant', 'value': '劉慈欣'},
  'zh-cn': {'language': 'zh-cn', 'value': '刘慈欣'},
  'zh-sg': {'language': 'zh-sg', 'value': '刘慈欣'},
  'zh-my': {'language': 'zh-my', 'value': '刘慈欣'},
  'zh-hk': {'language': 'zh-hk', 'value': '劉慈欣'},
  'zh-tw': {'language': 'zh-tw', 'value': '劉慈欣'},
  'zh-mo': {'language': 'zh-mo', 'value': '劉慈欣'},
  'nl': {'language': 'nl', 'value': 'Liu Cixin'},
  'ru': {'language': 'ru', 'value'

We can see from the 'claims', that the date of birth is unter 'P569'.

In [24]:
attrs = [client.get(attribute) for attribute in  p1.attributes.get('claims').keys()]

In [25]:
date_of_birth = client.get('P569')

In [26]:
p1[date_of_birth]

datetime.date(1963, 6, 23)

# Add date of birth to test data

In [14]:
test_data = df.head(5)

In [15]:
test_data = test_data.copy(deep=True)

In [16]:
test_data['date_of_birth'] = ""

In [17]:
test_data

Unnamed: 0,wikidata_code,birth,death,updated_death_date,approx_birth,approx_death,birth_min,birth_max,death_min,death_max,gender,level1_main_occ,name,un_subregion,birth_estimation,death_estimation,bigperiod_birth_graph_b,bigperiod_death_graph_b,curid,level2_main_occ,freq_main_occ,freq_second_occ,level2_second_occ,level3_main_occ,bigperiod_birth,bigperiod_death,wiki_readers_2015_2018,non_missing_score,total_count_words_b,number_wiki_editions,total_noccur_links_b,sum_visib_ln_5criteria,ranking_visib_5criteria,all_geography_groups,string_citizenship_raw_d,citizenship_1_b,citizenship_2_b,list_areas_of_rattach,area1_of_rattachment,area2_of_rattachment,list_wikipedia_editions,un_region,group_wikipedia_editions,bplo1,dplo1,bpla1,dpla1,pantheon_1,level3_all_occ,date_of_birth
0,Q1000002,1932.0,1990.0,,,,1932.0,1932.0,1990.0,1990.0,Male,Culture,Claus_Hammel,Western Europe,1932.0,1990.0,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2949539,Culture-core,0.8,0.2,Culture-periphery,playwright,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,1669,3,1777,1,11,18.083672,1058542.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,11.833333,12.42,53.416668,54.38139,0,D:_playwright_journalist_writer_screenwriter_P...,
1,Q1000005,1860.0,1927.0,,,,1860.0,1860.0,1927.0,1927.0,Male,Culture,Karel_MatÄj_Äapek-Chod,Western Europe,1860.0,1927.0,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,4217319,Culture-core,0.538462,0.307692,Culture-periphery,writer,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,25008,3,6491,9,15,23.98061,131428.0,Czech_Republic,'Czech_Republic',Czech_Republic,,D:_'Czech_Republic'_mismatchB2_P:_'Czech_Repub...,Old_(before_year_1993_AD)_Czech_Republic,Missing,dewiki|cswiki|enwiki|eowiki|itwiki|kkwiki|rowi...,Europe,grA,12.929798,14.421389,49.440605,50.087502,0,D:_writer_journalist_P:_naturalist_writer_jour...,
2,Q1000006,1971.0,,,,,1971.0,1971.0,,,Male,Culture,Florian_Eichinger,Western Europe,1971.0,2053.8447,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,5050967,Culture-core,1.0,,Missing,film,5.Contemporary period 1901-2020AD,Missing,27285,3,1573,1,10,20.666656,775768.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,9.191944,,48.897499,,0,D:_film_screenwriter_film_P:_regisseur_autor_f...,
3,Q1000015,1983.0,,,,,1983.0,1983.0,,,Male,Culture,Florian_Jahr,Western Europe,1983.0,2067.1899,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2588583,Culture-core,1.0,,Missing,actor,5.Contemporary period 1901-2020AD,Missing,37331,3,1931,1,10,21.18504,691735.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,13.383333,,52.516666,,0,D:_actor_P:_schauspiel_German,
4,Q1000023,1912.0,1977.0,,,,1912.0,1912.0,1977.0,1977.0,Female,Leadership,Wiltraut_Rupp-von_BrÃ¼nneck,Western Europe,1912.0,1977.0,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,922120,Administration/Law,0.833333,0.166667,Politics,judge,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2955,3,1578,1,6,17.99621,1103282.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Old_(before_year_1990_AD)_Germany,Missing,dewiki,Europe,grB,13.35,8.4,52.4333,49.016666,0,D:_judge_jurist_P:_ richter_verfassung_German,


In [43]:
from typing import *

def insert_date_of_birth(df, wikidata_client, failed_wikicodes: List):
    failed_wikicodes = []
    date_of_birth = client.get('P569')
    count = 0
    for index, row in df.iterrows():
        count += 1
        wikidata_code = row['wikidata_code']
        data = wikidata_client.get(wikidata_code)
        if count%1000 == 0:
            print('{0} data processed...', index)
            
        try:
            birthday = data[date_of_birth]
            df.at[index, 'date_of_birth'] = birthday
        except Exception as error:
            print('Failed to get date of birth at row {0}, with wikidata code {1}: {2}'
                  .format(index, wikidata_code, error))
            failed_wikicodes.append(wikidata_code)


In [29]:
failed_codes = []
insert_date_of_birth(test_data, client, failed_codes)

{0} data processed... 0


In [30]:
test_data

Unnamed: 0,wikidata_code,birth,death,updated_death_date,approx_birth,approx_death,birth_min,birth_max,death_min,death_max,gender,level1_main_occ,name,un_subregion,birth_estimation,death_estimation,bigperiod_birth_graph_b,bigperiod_death_graph_b,curid,level2_main_occ,freq_main_occ,freq_second_occ,level2_second_occ,level3_main_occ,bigperiod_birth,bigperiod_death,wiki_readers_2015_2018,non_missing_score,total_count_words_b,number_wiki_editions,total_noccur_links_b,sum_visib_ln_5criteria,ranking_visib_5criteria,all_geography_groups,string_citizenship_raw_d,citizenship_1_b,citizenship_2_b,list_areas_of_rattach,area1_of_rattachment,area2_of_rattachment,list_wikipedia_editions,un_region,group_wikipedia_editions,bplo1,dplo1,bpla1,dpla1,pantheon_1,level3_all_occ,date_of_birth
0,Q1000002,1932.0,1990.0,,,,1932.0,1932.0,1990.0,1990.0,Male,Culture,Claus_Hammel,Western Europe,1932.0,1990.0,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2949539,Culture-core,0.8,0.2,Culture-periphery,playwright,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,1669,3,1777,1,11,18.083672,1058542.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,11.833333,12.42,53.416668,54.38139,0,D:_playwright_journalist_writer_screenwriter_P...,1932-12-04
1,Q1000005,1860.0,1927.0,,,,1860.0,1860.0,1927.0,1927.0,Male,Culture,Karel_MatÄj_Äapek-Chod,Western Europe,1860.0,1927.0,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,4217319,Culture-core,0.538462,0.307692,Culture-periphery,writer,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,25008,3,6491,9,15,23.98061,131428.0,Czech_Republic,'Czech_Republic',Czech_Republic,,D:_'Czech_Republic'_mismatchB2_P:_'Czech_Repub...,Old_(before_year_1993_AD)_Czech_Republic,Missing,dewiki|cswiki|enwiki|eowiki|itwiki|kkwiki|rowi...,Europe,grA,12.929798,14.421389,49.440605,50.087502,0,D:_writer_journalist_P:_naturalist_writer_jour...,1860-02-21
2,Q1000006,1971.0,,,,,1971.0,1971.0,,,Male,Culture,Florian_Eichinger,Western Europe,1971.0,2053.8447,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,5050967,Culture-core,1.0,,Missing,film,5.Contemporary period 1901-2020AD,Missing,27285,3,1573,1,10,20.666656,775768.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,9.191944,,48.897499,,0,D:_film_screenwriter_film_P:_regisseur_autor_f...,1971-07-14
3,Q1000015,1983.0,,,,,1983.0,1983.0,,,Male,Culture,Florian_Jahr,Western Europe,1983.0,2067.1899,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2588583,Culture-core,1.0,,Missing,actor,5.Contemporary period 1901-2020AD,Missing,37331,3,1931,1,10,21.18504,691735.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,13.383333,,52.516666,,0,D:_actor_P:_schauspiel_German,1983-06-23
4,Q1000023,1912.0,1977.0,,,,1912.0,1912.0,1977.0,1977.0,Female,Leadership,Wiltraut_Rupp-von_BrÃ¼nneck,Western Europe,1912.0,1977.0,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,922120,Administration/Law,0.833333,0.166667,Politics,judge,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2955,3,1578,1,6,17.99621,1103282.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Old_(before_year_1990_AD)_Germany,Missing,dewiki,Europe,grB,13.35,8.4,52.4333,49.016666,0,D:_judge_jurist_P:_ richter_verfassung_German,1912-08-07


# Add date of birth the all data

Add another column to the dataframe

In [32]:
df["date_of_birth"] = ""

In [33]:
df

Unnamed: 0,wikidata_code,birth,death,updated_death_date,approx_birth,approx_death,birth_min,birth_max,death_min,death_max,gender,level1_main_occ,name,un_subregion,birth_estimation,death_estimation,bigperiod_birth_graph_b,bigperiod_death_graph_b,curid,level2_main_occ,freq_main_occ,freq_second_occ,level2_second_occ,level3_main_occ,bigperiod_birth,bigperiod_death,wiki_readers_2015_2018,non_missing_score,total_count_words_b,number_wiki_editions,total_noccur_links_b,sum_visib_ln_5criteria,ranking_visib_5criteria,all_geography_groups,string_citizenship_raw_d,citizenship_1_b,citizenship_2_b,list_areas_of_rattach,area1_of_rattachment,area2_of_rattachment,list_wikipedia_editions,un_region,group_wikipedia_editions,bplo1,dplo1,bpla1,dpla1,pantheon_1,level3_all_occ,date_of_birth
0,Q1000002,1932.0,1990.0,,,,1932.0,1932.0,1990.0,1990.0,Male,Culture,Claus_Hammel,Western Europe,1932.0,1990.0000,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2949539,Culture-core,0.800000,0.200000,Culture-periphery,playwright,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,1669,3,1777,1,11,18.083672,1058542.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,11.833333,12.420000,53.416668,54.381390,0,D:_playwright_journalist_writer_screenwriter_P...,
1,Q1000005,1860.0,1927.0,,,,1860.0,1860.0,1927.0,1927.0,Male,Culture,Karel_MatÄj_Äapek-Chod,Western Europe,1860.0,1927.0000,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,4217319,Culture-core,0.538462,0.307692,Culture-periphery,writer,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,25008,3,6491,9,15,23.980610,131428.0,Czech_Republic,'Czech_Republic',Czech_Republic,,D:_'Czech_Republic'_mismatchB2_P:_'Czech_Repub...,Old_(before_year_1993_AD)_Czech_Republic,Missing,dewiki|cswiki|enwiki|eowiki|itwiki|kkwiki|rowi...,Europe,grA,12.929798,14.421389,49.440605,50.087502,0,D:_writer_journalist_P:_naturalist_writer_jour...,
2,Q1000006,1971.0,,,,,1971.0,1971.0,,,Male,Culture,Florian_Eichinger,Western Europe,1971.0,2053.8447,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,5050967,Culture-core,1.000000,,Missing,film,5.Contemporary period 1901-2020AD,Missing,27285,3,1573,1,10,20.666656,775768.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,9.191944,,48.897499,,0,D:_film_screenwriter_film_P:_regisseur_autor_f...,
3,Q1000015,1983.0,,,,,1983.0,1983.0,,,Male,Culture,Florian_Jahr,Western Europe,1983.0,2067.1899,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2588583,Culture-core,1.000000,,Missing,actor,5.Contemporary period 1901-2020AD,Missing,37331,3,1931,1,10,21.185040,691735.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Germany,Missing,dewiki,Europe,grB,13.383333,,52.516666,,0,D:_actor_P:_schauspiel_German,
4,Q1000023,1912.0,1977.0,,,,1912.0,1912.0,1977.0,1977.0,Female,Leadership,Wiltraut_Rupp-von_BrÃ¼nneck,Western Europe,1912.0,1977.0000,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,922120,Administration/Law,0.833333,0.166667,Politics,judge,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,2955,3,1578,1,6,17.996210,1103282.0,Germany,'Germany',Germany,,D:_'Germany'_matchB1_P:_'Germany',Old_(before_year_1990_AD)_Germany,Missing,dewiki,Europe,grB,13.350000,8.400000,52.433300,49.016666,0,D:_judge_jurist_P:_ richter_verfassung_German,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2291812,Q999994,1988.0,,,,,1988.0,1988.0,,,Male,Sports/Games,Mitja_MeÅ¾nar,Southern Europe,1988.0,2065.3259,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,20581328,Sports/Games,0.900000,,Missing,ski,5.Contemporary period 1901-2020AD,Missing,8896,3,5668,8,4,21.543238,350041.5,Slovenia,'Slovenia',Slovenia,,D:_'Slovenia'_matchB1_P:_'Slovenia',Slovenia,Missing,fiwiki|dewiki|enwiki|frwiki|nowiki|plwiki|ruwi...,Europe,grA,14.355610,,46.238869,,0,D:_ski_P:_ ski_jumper_olympic_English_auteur_s...,
2291813,Q999995,1987.0,,,circa,,1987.0,1987.0,,,Male,Sports/Games,Martin_Cikl,Western Europe,1987.0,2065.1482,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,16791925,Sports/Games,0.923077,,Missing,ski,5.Contemporary period 1901-2020AD,Missing,8807,3,6096,9,4,21.711330,339942.5,Czech_Republic,'Czech_Republic',Czech_Republic,,D:_'Czech_Republic'_matchB1_P:_'Czech_Republic',Czech_Republic,Missing,fiwiki|dewiki|enwiki|frwiki|nowiki|plwiki|ruwi...,Europe,grA,14.618354,,50.911613,,0,D:_ski_P:_ ski_jumper_olympic_English_auteur_s...,
2291814,Q999997,1984.0,,,,,1984.0,1984.0,,,Male,Sports/Games,Vincent_Descombes_Sevoie,Western Europe,1984.0,2061.8308,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,21384087,Sports/Games,0.909091,,Missing,ski,5.Contemporary period 1901-2020AD,Missing,51786,3,14372,7,6,24.453411,154890.0,France,'France',France,,D:_'France'_matchB1_P:_'France',France,Missing,fiwiki|dewiki|enwiki|frwiki|nowiki|plwiki|ruwiki,Europe,grA,6.868889,,45.922222,,0,D:_ski_skier_P:_ ski_jumper_English_auteur_ski...,
2291815,Q999998,1952.0,,,,,1952.0,1952.0,,,Male,Culture,JosÃ©_Massaroli,South America,1952.0,2032.8303,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,11512070,Culture-core,0.875000,,Missing,artist,5.Contemporary period 1901-2020AD,Missing,3772,3,5570,3,6,20.193954,453953.0,Argentina,'Argentina',Argentina,,D:_'Argentina'_matchB1_P:_'Argentina',Argentina,Missing,fiwiki|enwiki|eswiki,America,grA,-60.000000,,-33.483334,,0,D:_artist_P:_ comic_artist_comic_English_dibuj...,


In [34]:
dfs = []
row_count = df.shape[0]

In [38]:
row_count
chunk_count = 10
chunk_size = row_count // chunk_count

In [39]:
chunk_size

229181

In [45]:
df

Unnamed: 0,wikidata_code,birth,death,updated_death_date,approx_birth,approx_death,birth_min,birth_max,death_min,death_max,...,list_wikipedia_editions,un_region,group_wikipedia_editions,bplo1,dplo1,bpla1,dpla1,pantheon_1,level3_all_occ,date_of_birth
0,Q1000002,1932.0,1990.0,,,,1932.0,1932.0,1990.0,1990.0,...,dewiki,Europe,grB,11.833333,12.420000,53.416668,54.381390,0,D:_playwright_journalist_writer_screenwriter_P...,
1,Q1000005,1860.0,1927.0,,,,1860.0,1860.0,1927.0,1927.0,...,dewiki|cswiki|enwiki|eowiki|itwiki|kkwiki|rowi...,Europe,grA,12.929798,14.421389,49.440605,50.087502,0,D:_writer_journalist_P:_naturalist_writer_jour...,
2,Q1000006,1971.0,,,,,1971.0,1971.0,,,...,dewiki,Europe,grB,9.191944,,48.897499,,0,D:_film_screenwriter_film_P:_regisseur_autor_f...,
3,Q1000015,1983.0,,,,,1983.0,1983.0,,,...,dewiki,Europe,grB,13.383333,,52.516666,,0,D:_actor_P:_schauspiel_German,
4,Q1000023,1912.0,1977.0,,,,1912.0,1912.0,1977.0,1977.0,...,dewiki,Europe,grB,13.350000,8.400000,52.433300,49.016666,0,D:_judge_jurist_P:_ richter_verfassung_German,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2291812,Q999994,1988.0,,,,,1988.0,1988.0,,,...,fiwiki|dewiki|enwiki|frwiki|nowiki|plwiki|ruwi...,Europe,grA,14.355610,,46.238869,,0,D:_ski_P:_ ski_jumper_olympic_English_auteur_s...,
2291813,Q999995,1987.0,,,circa,,1987.0,1987.0,,,...,fiwiki|dewiki|enwiki|frwiki|nowiki|plwiki|ruwi...,Europe,grA,14.618354,,50.911613,,0,D:_ski_P:_ ski_jumper_olympic_English_auteur_s...,
2291814,Q999997,1984.0,,,,,1984.0,1984.0,,,...,fiwiki|dewiki|enwiki|frwiki|nowiki|plwiki|ruwiki,Europe,grA,6.868889,,45.922222,,0,D:_ski_skier_P:_ ski_jumper_English_auteur_ski...,
2291815,Q999998,1952.0,,,,,1952.0,1952.0,,,...,fiwiki|enwiki|eswiki,America,grA,-60.000000,,-33.483334,,0,D:_artist_P:_ comic_artist_comic_English_dibuj...,


In [40]:
df_rank_2000 = df[df['ranking_visib_5criteria'] <= 2000]

In [41]:
df_rank_2000

Unnamed: 0,wikidata_code,birth,death,updated_death_date,approx_birth,approx_death,birth_min,birth_max,death_min,death_max,gender,level1_main_occ,name,un_subregion,birth_estimation,death_estimation,bigperiod_birth_graph_b,bigperiod_death_graph_b,curid,level2_main_occ,freq_main_occ,freq_second_occ,level2_second_occ,level3_main_occ,bigperiod_birth,bigperiod_death,wiki_readers_2015_2018,non_missing_score,total_count_words_b,number_wiki_editions,total_noccur_links_b,sum_visib_ln_5criteria,ranking_visib_5criteria,all_geography_groups,string_citizenship_raw_d,citizenship_1_b,citizenship_2_b,list_areas_of_rattach,area1_of_rattachment,area2_of_rattachment,list_wikipedia_editions,un_region,group_wikipedia_editions,bplo1,dplo1,bpla1,dpla1,pantheon_1,level3_all_occ,date_of_birth
174,Q1001,1869.0,1948.0,,,,1869.0,1869.0,1948.0,1948.0,Male,Leadership,Mahatma_Gandhi,South Asia incl. Indian Peninsula,1869.0,1948.0000,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,19379,Politics,0.409091,0.181818,Culture-core,politician,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,54379472,3,191606,172,90,39.638851,11.0,"Old_regimes_in_/_of_India,Dominion_of_India",'British_Raj'_'Dominion_of_India',India,,D:_'British_Raj'_'Dominion_of_India'_matchB1_P...,Old_(before_year_1947_AD)_India,Missing,guwiki|itwiki|alswiki|amwiki|anwiki|arwiki|arz...,Asia,grA,69.604721,77.214333,21.642500,28.601860,1,D:_politician_barrister_writer_journalist_phil...,
1232,Q100937,1899.0,1987.0,,,,1899.0,1899.0,1987.0,1987.0,Male,Culture,Fred_Astaire,Northern America,1899.0,1987.0000,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,64962,Culture-core,0.953488,,Missing,actor,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,6224676,3,63143,78,67,35.286160,626.5,US,'US',US,,D:_'US'_matchB1_P:_'US',US,Missing,enwiki|dewiki|frwiki|eswiki|ruwiki|itwiki|jawi...,America,grA,-95.937500,-118.243683,41.258610,34.052231,1,D:_actor_actor_choreographer_singer_dancer_fil...,
1257,Q100948,1907.0,1964.0,,,,1907.0,1907.0,1964.0,1964.0,Female,Discovery/Science,Rachel_Carson,Northern America,1907.0,1964.0000,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,81274,Academia,0.441176,0.323529,Culture-core,biologist,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,1821105,3,70880,74,44,33.707863,1808.0,US,'US',US,,D:_'US'_matchB1_P:_'US',US,Missing,enwiki|dewiki|frwiki|eswiki|itwiki|jawiki|nlwi...,America,grA,-79.783356,-77.020790,40.546432,39.002441,1,D:_biologist_environmentalist_zoologist_essayi...,
1699,Q10132,1986.0,,,,,1986.0,1986.0,,,Male,Sports/Games,Rafael_Nadal,Southern Europe,1986.0,2063.1160,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,1439517,Sports/Games,0.857143,,Missing,player,5.Contemporary period 1901-2020AD,Missing,32249952,3,347862,103,37,38.330566,681.0,Spain,'Spain',Spain,,D:_'Spain'_matchB1_P:_'Spain',Spain,Missing,enwiki|afwiki|anwiki|astwiki|azwiki|bswiki|brw...,Europe,grA,3.208889,,39.570000,,1,D:_player_model_P:_tennis_player_tennis_Englis...,
1769,Q101410,1954.0,,,,,1954.0,1954.0,,,Male,Leadership,FranÃ§ois_Fillon,Western Europe,1954.0,2030.0797,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,6111238,Politics,0.807692,0.153846,Administration/Law,politician,5.Contemporary period 1901-2020AD,Missing,5424258,3,106332,73,35,34.968307,1273.0,France,'France',France,,D:_'France'_matchB1_P:_'France',France,Missing,zhwiki|plwiki|euwiki|eswiki|afwiki|ocwiki|huwi...,Europe,grA,0.196944,,48.004166,,1,D:_politician_lawyer_legislative_P:_politician...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2289106,Q989,1920.0,2005.0,,,,1920.0,1920.0,2005.0,2005.0,Male,Leadership,Pope_John_Paul_II,Eastern Europe,1920.0,2005.0000,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,23805,Religious,0.470588,0.176471,Culture-core,priest,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,19123836,3,233416,135,64,38.214069,91.0,"Poland,Vatican_City,Poland",'Poland'_'Vatican_City'_'Second_Polish_Republic',Poland,Vatican_City,D:_'Poland'_'Vatican_City'_'Second_Polish_Repu...,Poland,Vatican_City,enwiki|dewiki|frwiki|eswiki|ruwiki|itwiki|jawi...,Europe,grA,19.500000,12.456388,49.883335,41.903610,1,D:_priest_poet_politician_esperantist_writer_a...,
2289464,Q991,1821.0,1881.0,,,,1821.0,1821.0,1881.0,1881.0,Male,Culture,Fyodor_Dostoevsky,Eastern Europe,1821.0,1881.0000,4.Mid Modern Period 1751-1900AD,4.Mid Modern Period 1751-1900AD,11625,Culture-core,0.578947,0.184211,Academia,novelist,4.Mid Modern Period 1751-1900AD,4.Mid Modern Period 1751-1900AD,12878684,3,127400,154,95,37.733952,85.0,Old_regimes_in_/_of_Russia,'Russian_Empire',Russia,,D:_'Russian_Empire'_matchB1_P:_'Russia',Old_(before_year_1990_AD)_Russia,Missing,afwiki|alswiki|amwiki|arwiki|arzwiki|astwiki|a...,Europe,grA,37.611946,30.316668,55.783611,59.950001,1,D:_translator_poet_novelist_essayist_writer_jo...,
2289600,Q9916,1890.0,1969.0,,,,1890.0,1890.0,1969.0,1969.0,Male,Leadership,Dwight_D._Eisenhower,Northern America,1890.0,1969.0000,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,8182,Military,0.464286,0.392857,Politics,army,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,6613569,3,150067,124,79,36.833820,231.0,US,'US',US,,D:_'US'_mismatchB2_P:_'US'_'Italy',US,Missing,enwiki|dewiki|frwiki|eswiki|ruwiki|jawiki|nlwi...,America,grA,-96.557503,-77.029999,33.749699,38.974998,1,D:_politician_writer_officer_statesperson_P:_ ...,
2291069,Q9960,1911.0,2004.0,,,,1911.0,1911.0,2004.0,2004.0,Male,Culture,Ronald_Reagan,Northern America,1911.0,2004.0000,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,25433,Culture-core,0.487805,0.414634,Politics,actor,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,37631824,3,227295,249,81,39.705551,20.0,US,'US',US,,D:_'US'_matchB1_P:_'US',US,Missing,afwiki|anwiki|frpwiki|bclwiki|bswiki|brwiki|ca...,America,grA,-89.785301,-118.447777,41.630600,34.083328,1,D:_actor_actor_politician_biographer_actor_scr...,


In [42]:
failed_codes = []
insert_date_of_birth(df_rank_2000, client, failed_codes)

Failed to get date of birth at row 2784, with wikidata code Q102140
Failed to get date of birth at row 3011, with wikidata code Q102272
Failed to get date of birth at row 30172, with wikidata code Q1048
Failed to get date of birth at row 37921, with wikidata code Q1067
Failed to get date of birth at row 43485, with wikidata code Q108316
Failed to get date of birth at row 53514, with wikidata code Q11104
Failed to get date of birth at row 76267, with wikidata code Q117253
Failed to get date of birth at row 81120, with wikidata code Q11806
Failed to get date of birth at row 81266, with wikidata code Q11812
Failed to get date of birth at row 89696, with wikidata code Q119702
Failed to get date of birth at row 92347, with wikidata code Q120180
Failed to get date of birth at row 100096, with wikidata code Q122553
Failed to get date of birth at row 102907, with wikidata code Q123034
Failed to get date of birth at row 110907, with wikidata code Q124617
Failed to get date of birth at row 11638

Failed to get date of birth at row 1252107, with wikidata code Q37388
Failed to get date of birth at row 1257504, with wikidata code Q37562
Failed to get date of birth at row 1258093, with wikidata code Q37577
Failed to get date of birth at row 1258491, with wikidata code Q37594
Failed to get date of birth at row 1280946, with wikidata code Q38370
Failed to get date of birth at row 1309152, with wikidata code Q39619
Failed to get date of birth at row 1310988, with wikidata code Q39837
Failed to get date of birth at row 1312589, with wikidata code Q39978
Failed to get date of birth at row 1320811, with wikidata code Q40787
Failed to get date of birth at row 1322017, with wikidata code Q40939
Failed to get date of birth at row 1324045, with wikidata code Q41155
Failed to get date of birth at row 1325411, with wikidata code Q41223
Failed to get date of birth at row 1325821, with wikidata code Q41264
Failed to get date of birth at row 1328092, with wikidata code Q41523
Failed to get date o

Failed to get date of birth at row 2229918, with wikidata code Q83229
Failed to get date of birth at row 2230372, with wikidata code Q83375
Failed to get date of birth at row 2230593, with wikidata code Q83428
Failed to get date of birth at row 2230807, with wikidata code Q83476
Failed to get date of birth at row 2230915, with wikidata code Q835
Failed to get date of birth at row 2231743, with wikidata code Q8409
Failed to get date of birth at row 2231776, with wikidata code Q8413
Failed to get date of birth at row 2232444, with wikidata code Q8462
Failed to get date of birth at row 2232779, with wikidata code Q8479
Failed to get date of birth at row 2233842, with wikidata code Q855
Failed to get date of birth at row 2234379, with wikidata code Q8581
Failed to get date of birth at row 2234500, with wikidata code Q859
Failed to get date of birth at row 2236584, with wikidata code Q868
Failed to get date of birth at row 2238002, with wikidata code Q8739
Failed to get date of birth at row

In [44]:
df_rank_2000

Unnamed: 0,wikidata_code,birth,death,updated_death_date,approx_birth,approx_death,birth_min,birth_max,death_min,death_max,gender,level1_main_occ,name,un_subregion,birth_estimation,death_estimation,bigperiod_birth_graph_b,bigperiod_death_graph_b,curid,level2_main_occ,freq_main_occ,freq_second_occ,level2_second_occ,level3_main_occ,bigperiod_birth,bigperiod_death,wiki_readers_2015_2018,non_missing_score,total_count_words_b,number_wiki_editions,total_noccur_links_b,sum_visib_ln_5criteria,ranking_visib_5criteria,all_geography_groups,string_citizenship_raw_d,citizenship_1_b,citizenship_2_b,list_areas_of_rattach,area1_of_rattachment,area2_of_rattachment,list_wikipedia_editions,un_region,group_wikipedia_editions,bplo1,dplo1,bpla1,dpla1,pantheon_1,level3_all_occ,date_of_birth
174,Q1001,1869.0,1948.0,,,,1869.0,1869.0,1948.0,1948.0,Male,Leadership,Mahatma_Gandhi,South Asia incl. Indian Peninsula,1869.0,1948.0000,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,19379,Politics,0.409091,0.181818,Culture-core,politician,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,54379472,3,191606,172,90,39.638851,11.0,"Old_regimes_in_/_of_India,Dominion_of_India",'British_Raj'_'Dominion_of_India',India,,D:_'British_Raj'_'Dominion_of_India'_matchB1_P...,Old_(before_year_1947_AD)_India,Missing,guwiki|itwiki|alswiki|amwiki|anwiki|arwiki|arz...,Asia,grA,69.604721,77.214333,21.642500,28.601860,1,D:_politician_barrister_writer_journalist_phil...,1869-10-02
1232,Q100937,1899.0,1987.0,,,,1899.0,1899.0,1987.0,1987.0,Male,Culture,Fred_Astaire,Northern America,1899.0,1987.0000,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,64962,Culture-core,0.953488,,Missing,actor,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,6224676,3,63143,78,67,35.286160,626.5,US,'US',US,,D:_'US'_matchB1_P:_'US',US,Missing,enwiki|dewiki|frwiki|eswiki|ruwiki|itwiki|jawi...,America,grA,-95.937500,-118.243683,41.258610,34.052231,1,D:_actor_actor_choreographer_singer_dancer_fil...,1899-05-10
1257,Q100948,1907.0,1964.0,,,,1907.0,1907.0,1964.0,1964.0,Female,Discovery/Science,Rachel_Carson,Northern America,1907.0,1964.0000,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,81274,Academia,0.441176,0.323529,Culture-core,biologist,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,1821105,3,70880,74,44,33.707863,1808.0,US,'US',US,,D:_'US'_matchB1_P:_'US',US,Missing,enwiki|dewiki|frwiki|eswiki|itwiki|jawiki|nlwi...,America,grA,-79.783356,-77.020790,40.546432,39.002441,1,D:_biologist_environmentalist_zoologist_essayi...,1907-05-27
1699,Q10132,1986.0,,,,,1986.0,1986.0,,,Male,Sports/Games,Rafael_Nadal,Southern Europe,1986.0,2063.1160,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,1439517,Sports/Games,0.857143,,Missing,player,5.Contemporary period 1901-2020AD,Missing,32249952,3,347862,103,37,38.330566,681.0,Spain,'Spain',Spain,,D:_'Spain'_matchB1_P:_'Spain',Spain,Missing,enwiki|afwiki|anwiki|astwiki|azwiki|bswiki|brw...,Europe,grA,3.208889,,39.570000,,1,D:_player_model_P:_tennis_player_tennis_Englis...,1986-06-03
1769,Q101410,1954.0,,,,,1954.0,1954.0,,,Male,Leadership,FranÃ§ois_Fillon,Western Europe,1954.0,2030.0797,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,6111238,Politics,0.807692,0.153846,Administration/Law,politician,5.Contemporary period 1901-2020AD,Missing,5424258,3,106332,73,35,34.968307,1273.0,France,'France',France,,D:_'France'_matchB1_P:_'France',France,Missing,zhwiki|plwiki|euwiki|eswiki|afwiki|ocwiki|huwi...,Europe,grA,0.196944,,48.004166,,1,D:_politician_lawyer_legislative_P:_politician...,1954-03-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2289106,Q989,1920.0,2005.0,,,,1920.0,1920.0,2005.0,2005.0,Male,Leadership,Pope_John_Paul_II,Eastern Europe,1920.0,2005.0000,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,23805,Religious,0.470588,0.176471,Culture-core,priest,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,19123836,3,233416,135,64,38.214069,91.0,"Poland,Vatican_City,Poland",'Poland'_'Vatican_City'_'Second_Polish_Republic',Poland,Vatican_City,D:_'Poland'_'Vatican_City'_'Second_Polish_Repu...,Poland,Vatican_City,enwiki|dewiki|frwiki|eswiki|ruwiki|itwiki|jawi...,Europe,grA,19.500000,12.456388,49.883335,41.903610,1,D:_priest_poet_politician_esperantist_writer_a...,1920-05-18
2289464,Q991,1821.0,1881.0,,,,1821.0,1821.0,1881.0,1881.0,Male,Culture,Fyodor_Dostoevsky,Eastern Europe,1821.0,1881.0000,4.Mid Modern Period 1751-1900AD,4.Mid Modern Period 1751-1900AD,11625,Culture-core,0.578947,0.184211,Academia,novelist,4.Mid Modern Period 1751-1900AD,4.Mid Modern Period 1751-1900AD,12878684,3,127400,154,95,37.733952,85.0,Old_regimes_in_/_of_Russia,'Russian_Empire',Russia,,D:_'Russian_Empire'_matchB1_P:_'Russia',Old_(before_year_1990_AD)_Russia,Missing,afwiki|alswiki|amwiki|arwiki|arzwiki|astwiki|a...,Europe,grA,37.611946,30.316668,55.783611,59.950001,1,D:_translator_poet_novelist_essayist_writer_jo...,
2289600,Q9916,1890.0,1969.0,,,,1890.0,1890.0,1969.0,1969.0,Male,Leadership,Dwight_D._Eisenhower,Northern America,1890.0,1969.0000,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,8182,Military,0.464286,0.392857,Politics,army,4.Mid Modern Period 1751-1900AD,5.Contemporary period 1901-2020AD,6613569,3,150067,124,79,36.833820,231.0,US,'US',US,,D:_'US'_mismatchB2_P:_'US'_'Italy',US,Missing,enwiki|dewiki|frwiki|eswiki|ruwiki|jawiki|nlwi...,America,grA,-96.557503,-77.029999,33.749699,38.974998,1,D:_politician_writer_officer_statesperson_P:_ ...,1890-10-14
2291069,Q9960,1911.0,2004.0,,,,1911.0,1911.0,2004.0,2004.0,Male,Culture,Ronald_Reagan,Northern America,1911.0,2004.0000,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,25433,Culture-core,0.487805,0.414634,Politics,actor,5.Contemporary period 1901-2020AD,5.Contemporary period 1901-2020AD,37631824,3,227295,249,81,39.705551,20.0,US,'US',US,,D:_'US'_matchB1_P:_'US',US,Missing,afwiki|anwiki|frpwiki|bclwiki|bswiki|brwiki|ca...,America,grA,-89.785301,-118.447777,41.630600,34.083328,1,D:_actor_actor_politician_biographer_actor_scr...,1911-02-06


In [45]:
henry_III = client.get('Q53448')

In [47]:
henry_III.description

m'King of France from 1574 to 1589'

In [49]:
henry_III.attributes["claims"]

{'P39': [{'mainsnak': {'snaktype': 'value',
    'property': 'P39',
    'hash': '10bba1d624b6e568d675cbe954f35d750ed86fce',
    'datavalue': {'value': {'entity-type': 'item',
      'numeric-id': 18384454,
      'id': 'Q18384454'},
     'type': 'wikibase-entityid'},
    'datatype': 'wikibase-item'},
   'type': 'statement',
   'qualifiers': {'P580': [{'snaktype': 'value',
      'property': 'P580',
      'hash': '03faea1ccd949d0230882de0a5180cedd1518fef',
      'datavalue': {'value': {'time': '+1574-06-09T00:00:00Z',
        'timezone': 0,
        'before': 0,
        'after': 0,
        'precision': 11,
        'calendarmodel': 'http://www.wikidata.org/entity/Q1985786'},
       'type': 'time'},
      'datatype': 'time'}],
    'P582': [{'snaktype': 'value',
      'property': 'P582',
      'hash': 'aaa59ac44220318e79c79bca3569397a248456af',
      'datavalue': {'value': {'time': '+1589-08-02T00:00:00Z',
        'timezone': 0,
        'before': 0,
        'after': 0,
        'precision': 11,


In [50]:
date_of_birth = client.get('P569')

In [52]:
henry_III[date_of_birth]

DatavalueError: 'http://www.wikidata.org/entity/Q1985786' is unsupported calendarmodel for time datavalue: {'type': 'time', 'value': {'time': '+1551-09-19T00:00:00Z', 'timezone': 0, 'before': 0, 'after': 0, 'precision': 11, 'calendarmodel': 'http://www.wikidata.org/entity/Q1985786'}}

In [54]:
len(failed_codes)

0

In [65]:
df_rank_2000_cleaned = df_rank_2000[df_rank_2000['date_of_birth'] != ""]

In [66]:
df_rank_2000_cleaned.shape

(1732, 50)

In [69]:
df_rank_2000_cleaned.to_sql('individuals_2000', engine)

732