In [1]:
import sys

sys.path.append("../")

import pandas as pd
from dotenv import load_dotenv

load_dotenv()
import os
import sqlite3

DB_PATH = os.getenv("DB_PATH")
DATA_PATH = "data"

conn = sqlite3.connect(DB_PATH)

if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

In [2]:
# Individuals Regions

df = pd.read_csv("../immaterial_index/results/df_individuals_score.csv")
print(len(set(df.individual_wikidata_id)))

df[df['individual_name']=='Hesiod']

159340


Unnamed: 0.1,Unnamed: 0,individual_wikidata_id,productive_year,decade,individual_name,score,region_name
297556,312663,Q44233,-715.0,-720,Hesiod,147,Eastern Europe
297557,312664,Q44233,-715.0,-720,Hesiod,147,Balkans
297558,312665,Q44233,-715.0,-720,Hesiod,147,mediterranean World
297559,312666,Q44233,-715.0,-720,Hesiod,147,Greek World


In [3]:


# Load works of individuals

df_ind_works = pd.read_sql_query("SELECT * FROM individual_created_work", conn)

df_count_work = (
    df_ind_works.groupby("individual_wikidata_id")["work_wikidata_id"]
    .count()
    .rename("count_works")
    .reset_index()
)
df_final = pd.merge(df, df_count_work, on="individual_wikidata_id", how="left")
df_final = df_final.fillna(0)  # When there is no works we add 0
df_final = df_final.drop('Unnamed: 0', axis=1)
df_final = df_final.drop('productive_year', axis=1)
df_final.to_csv(DATA_PATH + "/df_indi_works.csv")


In [7]:
df_gender = pd.read_sql_query("SELECT * FROM individual_gender", conn)

df_final_gender = pd.merge(df_gender[['individual_wikidata_id', 'gender']], df_final, on = 'individual_wikidata_id')
df_final_gender.to_csv(DATA_PATH + "/df_indi_works_gender.csv")

Unnamed: 0,individual_wikidata_id,gender,decade,individual_name,score,region_name,count_works
0,Q29436295,male,1660,Rombout van den Hoeye,21,Low countries,3.0
1,Q29436295,male,1660,Rombout van den Hoeye,21,Netherlands,3.0
2,Q29436295,male,1660,Rombout van den Hoeye,21,Northwestern Europe,3.0
3,Q29436295,male,1660,Rombout van den Hoeye,21,Western Europe,3.0
4,Q18546722,male,1680,Peter Cross,21,Northwestern Europe,7.0
...,...,...,...,...,...,...,...
540115,Q432585,male,1700,Jean-Baptiste Dubos,58,France,4.0
540116,Q16063175,female,1840,Jane Martha St. John,9,Northwestern Europe,12.0
540117,Q16063175,female,1840,Jane Martha St. John,9,Western Europe,12.0
540118,Q16063175,female,1840,Jane Martha St. John,9,British Islands,12.0


In [9]:
df_clean_gdp = pd.read_sql_query("SELECT * FROM gdp_clean", conn)
regions_clean = list(set(df_clean_gdp["region_code"]))
df = df[df["region_code"].isin(regions_clean)]
df = df[df["decade"] >= min(df_clean_gdp.year)]

df_final = pd.merge(df, df_count_work, on="individual_wikidata_id", how="left")
df_final = df_final.fillna(0)  # When there is no works we add 0
df_final.to_csv(DATA_PATH + "/df_indi_works_clean_gdp.csv")