# Workshop task

Imagine now that you are interested in looking at the relationship between two of Seshat's social complexity variables. For example, the relationship between a transport infrastructure variable such as "Road" and a profession variable like "Professional soldier". Your hypothesis could be that the polities with professional soldiers need roads for them to move on, so there should be a strong correlation between the number of polities with soldiers and the number of polities with roads in any given year.

In [2]:
from seshat_api import SeshatAPI
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
client = SeshatAPI(base_url="https://seshatdata.com/api")
# client = SeshatAPI(base_url="https://seshat-db.com/api")

In [4]:
years = range(500, 1501)

In [5]:
from seshat_api.sc import Roads
from seshat_api.sc import ProfessionalSoldiers
roads = Roads(client)
professional_soldiers = ProfessionalSoldiers(client)
roads_df = pd.DataFrame(roads.get_all())
professional_soldiers_df = pd.DataFrame(professional_soldiers.get_all())

In [6]:
polities_with_professional_soldiers_df = pd.DataFrame(professional_soldiers_df['polity'].tolist())
polities_with_professional_soldiers_df['professional_soldier'] = professional_soldiers_df['professional_soldier']
polities_with_roads_df = pd.DataFrame(roads_df['polity'].tolist())
polities_with_roads_df['road'] = roads_df['road']

In [7]:
road_soldier_frequency_df = pd.DataFrame(index=years, columns=['professional_soldier', 'road']).fillna(0)

In [8]:
for year in years:
    road_soldier_frequency_df.loc[year, 'professional_soldier'] = len(polities_with_professional_soldiers_df[
        (polities_with_professional_soldiers_df['start_year'] <= year) &
        (polities_with_professional_soldiers_df['end_year'] >= year) &
        (polities_with_professional_soldiers_df['professional_soldier'] == 'present')
    ])
    road_soldier_frequency_df.loc[year, 'road'] = len(polities_with_roads_df[
        (polities_with_roads_df['start_year'] <= year) &
        (polities_with_roads_df['end_year'] >= year) &
        (polities_with_roads_df['road'] == 'present')
    ])

In [None]:
road_soldier_frequency_df.sample(5)
# len(road_soldier_frequency_df)

In [None]:
# Create a scatter plot showing the relationship between the presence of professional soldiers and roads in road_soldier_frequency_df
plt.figure(figsize=(13, 7))
scatter = plt.scatter(
    road_soldier_frequency_df['professional_soldier'], 
    road_soldier_frequency_df['road'], 
    c=road_soldier_frequency_df.index,  # Color by the DataFrame index (year)
    cmap='viridis',  # You can choose any colormap you prefer
)
plt.xlabel('Number of polities with Professional Soldiers')
plt.ylabel('Number of polities with Roads')
plt.title('Polities recorded as having Professional Soldiers vs Roads: 500CE to 1500CE')
plt.colorbar(scatter, label='Year')  # Add a colorbar to show the year
plt.show()