In [None]:
from seshat_api import SeshatAPI
import pandas as pd
import matplotlib.pyplot as plt
from ollama import chat, ChatResponse

# Questions for LLMs

1. Can LLMs predict where on a complexity scale such as "Settlement hierarchy" polities fall, given a definition of the complexity scale and the name of the polity (and dates)
2. Can LLMs predict specific absent/present variables for polities, given a definition of that variable and the name of the polity (and dates)

In [None]:
client = SeshatAPI(base_url="https://seshat-db.com/api")

In [None]:
from seshat_api.sc import SettlementHierarchies
settlement_hierarchies = SettlementHierarchies(client)
settlement_hierarchies_df = pd.DataFrame(settlement_hierarchies.get_all())

In [None]:
# Extract the polities column to a new dataframe
polities_with_settlement_hierarchies_df = pd.DataFrame(settlement_hierarchies_df['polity'].tolist())

# Add the settlement_hierarchy_to and settlement_hierarchy_from columns to the new dataframe
polities_with_settlement_hierarchies_df['settlement_hierarchy_to'] = settlement_hierarchies_df['settlement_hierarchy_to']
polities_with_settlement_hierarchies_df['settlement_hierarchy_from'] = settlement_hierarchies_df['settlement_hierarchy_from']

# Remove all rows from the dataframe where the settlement_hierarchy_to column is NaN
polities_with_settlement_hierarchies_df = polities_with_settlement_hierarchies_df[polities_with_settlement_hierarchies_df['settlement_hierarchy_to'].notna()]

polities_with_settlement_hierarchies_df.sample(5)

### Using `polities_with_settlement_hierarchies_df`, plot the relationship between `end_year` and `settlement_hierarchy_to`

In [None]:
# Calculate the correlation coefficient
correlation_coefficient = polities_with_settlement_hierarchies_df['end_year'].corr(polities_with_settlement_hierarchies_df['settlement_hierarchy_to'])

# Plot the scatter plot
polities_with_settlement_hierarchies_df.plot.scatter(x='end_year', y='settlement_hierarchy_to')

# Annotate the plot with the correlation coefficient
plt.annotate(f'Correlation: {correlation_coefficient:.2f}', xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red')

plt.show()

This shows there's only a weak positive correlation between the year of a polity and it's recorded settlement hierarchy.

In [None]:
settlement_hierarcy_definition = "Talking about Hierarchical Complexity, Settlement hierarchy records (in levels) the hierarchy of not just settlement sizes, but also their complexity as reflected in different roles they play within the (quasi)polity. As settlements become more populous they acquire more complex functions: transportational (e.g. port); economic (e.g. market); administrative (e.g. storehouse, local government building); cultural (e.g. theatre); religious (e.g. temple), utilitarian (e.g. hospital), monumental (e.g. statues, plazas). Example: (1) Large City (monumental structures, theatre, market, hospital, central government buildings) (2) City (market, theatre, regional government buildings) (3) Large Town (market, administrative buildings) (4) Town (administrative buildings, storehouse)) (5) Village (shrine) (6) Hamlet (residential only). In the narrative paragraph explain the different levels and list their functions. Provide a (crude) estimate of population sizes. For example, Large Town (market, temple, administrative buildings): 2,000-5,000 inhabitants."

# Scientific literature

In [None]:
from seshat_api.sc import ScientificLiteratures
scientific_literatures = ScientificLiteratures(client)
scientific_literatures_df = pd.DataFrame(scientific_literatures.get_all())

In [None]:
scientific_literatures_df.head()

In [None]:
# Filter out the records that are not expert reviewed
scientific_literatures_df = scientific_literatures_df[scientific_literatures_df['expert_reviewed'] == True]
len(scientific_literatures_df)

In [None]:
# Extract the polities column to a new dataframe
polities_with_scientific_literatures_df = pd.DataFrame(scientific_literatures_df['polity'].tolist())

# Add the settlement_hierarchy_to and scientific_literature columns to the new dataframe
polities_with_scientific_literatures_df['scientific_literature'] = scientific_literatures_df['scientific_literature']

# Remove all rows from the dataframe where the scientific_literature column is NaN
polities_with_scientific_literatures_df = polities_with_scientific_literatures_df[polities_with_scientific_literatures_df['scientific_literature'].notna()]

polities_with_scientific_literatures_df.sample(5)

In [None]:
scientific_literature_definition = "Talking about Kinds of Written Documents, Scientific literature includes mathematics, natural sciences, social sciences"

In [None]:
def prompt_func(polity, variable, variable_definition):
    prompt = "Use your knowledge of world history to answer the following question. "
    prompt += f"Given your knowledge of the historical polity '{polity}'"
    prompt += f", do you expect that {variable} was present or absent? "
    prompt += f"{variable} is defined as: {variable_definition}. "
    prompt += "Answer 'present' if you expect it to be present, and 'absent' if you expect it to be absent."
    return prompt
    