In [3]:
# plot number of individuals per region as a Facet Graph

In [4]:
import sys

sys.path.append("../")

import pandas as pd
import numpy as np

from dotenv import load_dotenv

load_dotenv()
import os

import sqlite3

DB_PATH = os.getenv("DB_PATH")
DATA_PATH = "data"

conn = sqlite3.connect(DB_PATH)

import numpy as np

In [5]:
# Individuals Regions
df_ind_regions = pd.read_sql_query("SELECT * FROM individuals_regions", conn)

# Years
df_ind = pd.read_sql_query("SELECT * FROM individuals_main_information", conn)
df_ind_year = df_ind[["individual_wikidata_id", "birthyear"]].drop_duplicates()
df_ind_year = df_ind_year.dropna()

temporal_resolution = 10
df_ind_year["decade"] = df_ind_year["birthyear"].apply(lambda x: round(x / temporal_resolution) * temporal_resolution)

In [6]:
df_catalogs_id = pd.read_sql_query("SELECT * FROM individual_identifiers", conn)

In [7]:
df_catalogs = pd.read_sql_query("SELECT * FROM identifiers", conn)
df_catalogs = df_catalogs[['identifiers_wikidata_id', 'country_name']].dropna()

In [8]:
df_cat = pd.merge(df_catalogs_id, df_catalogs, on = 'identifiers_wikidata_id')
df_cat = df_cat[['individual_wikidata_id','individual_name', 'country_name']].drop_duplicates()
df_cat = df_cat.groupby(['individual_name', 'individual_wikidata_id'])['country_name'].count().rename('score').reset_index()
df_cat = df_cat.sort_values('score', ascending=False).reset_index(drop=True)

In [9]:
#df_cat.head(20)

In [10]:
df = pd.merge(df_ind_regions,df_ind_year, on = 'individual_wikidata_id')
df = pd.merge(df, df_cat, on = 'individual_wikidata_id')
df = df[['individual_wikidata_id', 'region_name', 'decade']].drop_duplicates()
df['score'] = 1

df = df.groupby(['region_name', 'decade'])['score'].sum().reset_index()
df.to_sql('region_score',conn, if_exists = 'replace', index=False)

5363

In [11]:
df_fig = df.copy()
df_fig['log_cultural_score'] = np.log(1 + df_fig['score'])
df_fig['normalized_cultural_score'] = df_fig.groupby('region_name')['log_cultural_score'].transform(lambda x: (x - x.min()) / (x.max() - x.min()))
df_fig = df_fig[df_fig['decade']<=1840]

In [12]:
import plotly.express as px
import plotly

fig = px.line(df_fig, 
              x="decade", 
              y="normalized_cultural_score", 
              facet_col="region_name", 
              facet_col_wrap=6, 
              width = 3000, 
              height = 3000, 
              template = 'simple_white')


fig.update_xaxes(tickmode='linear', dtick=200)
fig.for_each_xaxis(lambda xaxis: xaxis.update(showticklabels=True))
fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))

for region_name in df['region_name'].unique():
    fig.update_xaxes(title_text='decade', col=region_name)
    

plotly.offline.plot(fig)

'temp-plot.html'