# Workshop task

Imagine now that you are interested in looking at the relationship between two of Seshat's social complexity variables. For example, the relationship between a transport infrastructure variable such as "Road" and a profession variable like "Professional soldier". Your hypothesis could be that the polities with professional soldiers need roads for them to move on, so there should be a strong correlation between the number of polities with soldiers and the number of polities with roads in any given year.

In [24]:
from seshat_api import SeshatAPI
import pandas as pd
import matplotlib.pyplot as plt

In [25]:
client = SeshatAPI(base_url="https://seshatdata.com/api")
# client = SeshatAPI(base_url="https://seshat-db.com/api")

In [26]:
years = range(500, 1501)

In [None]:
variables = ['road', 'professional_soldier']

In [36]:
def get_class_names(variables):
    class_names = []
    for v in variables:
        # Make camel case variable name
        v = v.replace('_', ' ').title().replace(' ', '')
        # Then capitalize the first letter
        v = v[0].upper() + v[1:]
        # Then pluralize
        v = v + 's'
        class_names.append(v)
    return class_names

In [56]:
import importlib
def get_frequencies(years, variables):
    class_names = get_class_names(variables)
    dataframes = []
    for var, class_name in zip(variables, class_names):
        module = __import__('seshat_api.sc', fromlist=[class_name])
        globals()[var] = module
        class_ = getattr(module, class_name)
        instance = class_(client)
        df = pd.DataFrame(instance.get_all())
        polities_with_var_df = pd.DataFrame(df['polity'].tolist())
        polities_with_var_df[var] = df[var]
        dataframes.append(polities_with_var_df)
    frequency_df = pd.DataFrame(index=years, columns=variables).fillna(0)
    for year in years:
        for df, var in zip(dataframes, variables):
            frequency_df.loc[year, var] = len(df[
                (df['start_year'] <= year) &
                (df['end_year'] >= year) &
                (df[var] == 'present')
            ])
    return frequency_df

In [None]:
# roads_df = pd.DataFrame(roads.get_all())
# polities_with_roads_df = pd.DataFrame(roads_df['polity'].tolist())
# polities_with_roads_df['road'] = roads_df['road']

In [57]:
example_frequency_df = get_frequencies(years, variables)

In [27]:
from seshat_api.sc import Roads
from seshat_api.sc import ProfessionalSoldiers
roads = Roads(client)
professional_soldiers = ProfessionalSoldiers(client)
roads_df = pd.DataFrame(roads.get_all())
professional_soldiers_df = pd.DataFrame(professional_soldiers.get_all())

In [28]:
polities_with_professional_soldiers_df = pd.DataFrame(professional_soldiers_df['polity'].tolist())
polities_with_professional_soldiers_df['professional_soldier'] = professional_soldiers_df['professional_soldier']
polities_with_roads_df = pd.DataFrame(roads_df['polity'].tolist())
polities_with_roads_df['road'] = roads_df['road']

In [29]:
road_soldier_frequency_df = pd.DataFrame(index=years, columns=['professional_soldier', 'road']).fillna(0)

In [30]:
for year in years:
    road_soldier_frequency_df.loc[year, 'professional_soldier'] = len(polities_with_professional_soldiers_df[
        (polities_with_professional_soldiers_df['start_year'] <= year) &
        (polities_with_professional_soldiers_df['end_year'] >= year) &
        (polities_with_professional_soldiers_df['professional_soldier'] == 'present')
    ])
    road_soldier_frequency_df.loc[year, 'road'] = len(polities_with_roads_df[
        (polities_with_roads_df['start_year'] <= year) &
        (polities_with_roads_df['end_year'] >= year) &
        (polities_with_roads_df['road'] == 'present')
    ])

In [None]:
road_soldier_frequency_df.sample(5)
# len(road_soldier_frequency_df)

In [None]:
example_frequency_df.sample(5)

In [None]:
plt.figure(figsize=(13, 7))
plt.plot(road_soldier_frequency_df.index, road_soldier_frequency_df['professional_soldier'], label='Professional Soldiers')
plt.plot(road_soldier_frequency_df.index, road_soldier_frequency_df['road'], label='Roads')
plt.xlabel('Year')
plt.ylabel('Number of Polities')
plt.title('Number of Polities with Professional Soldiers and Roads Over Time')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(13, 7))
scatter = plt.scatter(
    road_soldier_frequency_df['professional_soldier'], 
    road_soldier_frequency_df['road'], 
    c=road_soldier_frequency_df.index,
    cmap='viridis',
)
plt.xlabel('Number of polities with Professional Soldiers')
plt.ylabel('Number of polities with Roads')
plt.title('Polities recorded as having Professional Soldiers vs Roads: 500CE to 1500CE')
plt.colorbar(scatter, label='Year')
plt.show()