<a href="https://colab.research.google.com/github/jbloewencolon/Creating-Dataset-for-The-Demographics-of-Faerun/blob/main/data_engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
import pandas as pd
import numpy as np
import random
import ast

In [None]:
# File path
df_file_path = '/content/drive/MyDrive/Colab Notebooks/DnD/fictional_data.xlsx'
idf_file_path = '/content/drive/MyDrive/Colab Notebooks/DnD/idf_dataset.csv'

# Read the Excel file into a DataFrame
df = pd.read_excel(df_file_path)

# Read the Excel file into a DataFrame
idf = pd.read_csv(idf_file_path)

DataFrame exported to: /content/drive/MyDrive/Colab Notebooks/DnD/fictional_data.xlsx


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 648 entries, 0 to 647
Data columns (total 27 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   settlement                  648 non-null    object 
 1   region_kingdom              648 non-null    object 
 2   capital                     648 non-null    object 
 3   settlement_population       648 non-null    int64  
 4   settlement_economy          648 non-null    int64  
 5   rumored_treasure_value      648 non-null    int64  
 6   demographic_breakdown       648 non-null    object 
 7   average_age                 648 non-null    int64  
 8   most_likely_cause_of_death  648 non-null    object 
 9   government_type             648 non-null    object 
 10  class_density               648 non-null    object 
 11  tax_rate                    648 non-null    int64  
 12  exports                     647 non-null    object 
 13  imports                     623 non

# Data Understanding

In [None]:
df.head()

Unnamed: 0,settlement,region_kingdom,capital,settlement_population,settlement_economy,rumored_treasure_value,demographic_breakdown,average_age,most_likely_cause_of_death,government_type,...,languages,religions,region_population,region_economy,area,average_temperature,annual_rainfall,magical_climate,dragon_sightings,sources
0,Athkatla,Amn,Athkatla,116163,34272222,526941,"{'humans': 116046, 'halflings': 116, 'half-orc...",39,Work-related accidents,['Confederacy'],...,"['Chondathan', 'Common']","['Bane', 'Chauntea', 'Cyric', 'Selûne', 'Sune'...",2920094,403450,,58.0,36.6,68,1,"Empires of the Sands - Scott Haring, 1988, Lan..."
1,Amnwater,Amn,Athkatla,7035,1092226,1122,"{'humans': 2335, 'halflings': 4699, 'half-orcs...",55,Bandit attacks,['Matriarchy'],...,"['Chondathan', 'Common']","['Bane', 'Chauntea', 'Cyric', 'Selûne', 'Sune'...",2920094,403450,,67.6,38.2,82,0,"Empires of the Sands - Scott Haring, 1988, Lan..."
2,Citadel Amnur,Amn,Athkatla,636,81796,81,"{'humans': 580, 'halflings': 55, 'half-orcs': ...",43,Assassination,['Patriarchy'],...,"['Chondathan', 'Common']","['Bane', 'Chauntea', 'Cyric', 'Selûne', 'Sune'...",2920094,403450,,69.1,44.6,56,0,"Empires of the Sands - Scott Haring, 1988, Lan..."
3,Citadel Rashturl,Amn,Athkatla,710,102127,102,"{'humans': 487, 'halflings': 222, 'half-orcs':...",49,Poison,['Monarchy'],...,"['Chondathan', 'Common']","['Bane', 'Chauntea', 'Cyric', 'Selûne', 'Sune'...",2920094,403450,,52.5,49.4,76,0,"Empires of the Sands - Scott Haring, 1988, Lan..."
4,Coryllvol,Amn,Athkatla,23000,4192755,9199,"{'humans': 5842, 'halflings': 17158, 'half-orc...",58,Starvation,['Kleptocracy'],...,"['Chondathan', 'Common']","['Bane', 'Chauntea', 'Cyric', 'Selûne', 'Sune'...",2920094,403450,,57.1,43.3,56,1,"Empires of the Sands - Scott Haring, 1988, Lan..."


In [None]:
# Assuming you have a DataFrame 'df' with columns containing float values
columns_to_convert = ['settlement_population', 'region_population', 'settlement_economy', 'rumored_treasure_value', 'region_economy']

# Fill NaN values with 0 and convert infinity values to a large integer
for column in columns_to_convert:
    df[column] = df[column].fillna(0).replace([np.inf, -np.inf], np.nan).astype(int)

# Use astype(int) to convert specified columns to integers
df[columns_to_convert] = df[columns_to_convert].astype(int)

# Determing Class Density and Generating Economy

In [None]:
lifestyle_costs = {
    'Wretched': 0,          # No cost
    'Squalid': .1,           # 1 silver piece (sp) per day
    'Poor': .2,              # 2 silver pieces (sp) per day
    'Modest': 1,            # 1 gold piece (gp) per day
    'Comfortable': 2,       # 2 gold pieces (gp) per day
    'Wealthy': 4,           # 4 gold pieces (gp) per day
    'Aristocratic': 10      # 10 gold pieces (gp) minimum per day
}

lifestyle_ranges = {
    'tiny_settlement': {'Aristocratic': (0, 0.0001), 'Wealthy': (0, .05), 'Comfortable': (1, 10), 'Modest': (1, 20), 'Poor': (30, 35), 'Squalid': (10, 30), 'Wretched': (0, 0.001)},
    'small_settlement': {'Aristocratic': (0, .005), 'Wealthy': (.05, 1), 'Comfortable': (4, 7), 'Modest': (7, 20),'Poor': (30, 35), 'Squalid': (15, 30), 'Wretched': (1, 2)},
    'medium_settlement': {'Aristocratic': (0, .05), 'Wealthy': (1, 2), 'Comfortable': (6, 8), 'Modest': (10, 25),'Poor': (30, 35), 'Squalid': (18, 30), 'Wretched': (3, 8)},
    'large_settlement': {'Aristocratic': (0, .5), 'Wealthy': (1, 3), 'Comfortable': (7, 13), 'Modest': (10, 20),'Poor': (30, 35), 'Squalid': (20, 40), 'Wretched': (5, 10)},
    'mega_settlement': {'Aristocratic': (0, .8), 'Wealthy': (2, 3), 'Comfortable': (10, 15), 'Modest': (15, 30),'Poor': (30, 35), 'Squalid': (35, 50), 'Wretched': (8, 10)}
}

In [None]:
def assign_lifestyle_demographics(population):
    total_economy = 0

    # Step 1: Determine city_size
    if population < 100:
        city_size = 'tiny_settlement'
    elif 100 <= population < 1000:
        city_size = 'small_settlement'
    elif 1000 <= population < 10000:
        city_size = 'medium_settlement'
    elif 10000 <= population < 100000:
        city_size = 'large_settlement'
    else:
        city_size = 'mega_settlement'

    # Step 2: Calculate class_density and total_economy
    class_density = {}
    for lifestyle, (start, end) in lifestyle_ranges[city_size].items():
        percentage = random.uniform(start, end)
        count = int(population * (percentage / 100))
        class_density[lifestyle] = count
        total_economy += count * lifestyle_costs[lifestyle] * 365

    return class_density, total_economy

# Then apply the function to your DataFrame
df['class_density'], df['settlement_economy'] = zip(*df['settlement_population'].apply(assign_lifestyle_demographics))

NameError: ignored

In [None]:
# Create an empty dictionary to store the total sum of each lifestyle category
total_class_density = {}

# Assuming you have a DataFrame 'df' with columns containing float values
for _, row in df.iterrows():
    class_density, _ = assign_lifestyle_demographics(row['settlement_population'])

    # Add the counts to the total_class_density dictionary
    for lifestyle, count in class_density.items():
        total_class_density[lifestyle] = total_class_density.get(lifestyle, 0) + count

# Print the total sum of each lifestyle category across all settlements
for lifestyle, total_count in total_class_density.items():
    print(f"Total {lifestyle} count: {total_count}")

NameError: ignored

In [None]:
df['class_density'].head()

0    {'Aristocratic': 332, 'Wealthy': 2809, 'Comfor...
1    {'Aristocratic': 2, 'Wealthy': 113, 'Comfortab...
2    {'Aristocratic': 0, 'Wealthy': 1, 'Comfortable...
3    {'Aristocratic': 0, 'Wealthy': 3, 'Comfortable...
4    {'Aristocratic': 71, 'Wealthy': 631, 'Comforta...
Name: class_density, dtype: object

In [None]:
# Calculate the total economy across all settlements
total_economy = df['settlement_economy'].sum()
print(f"Total Economy: {total_economy}")

# Find the top 10 settlements with the highest settlement economy
top_10_settlements = df.nlargest(10, 'settlement_economy')

# Print the top 10 settlements
print("Top 10 Settlements by Economy:")
print(top_10_settlements[['settlement', 'settlement_economy', 'settlement_population']])


Total Economy: 1411247388.0
Top 10 Settlements by Economy:
        settlement  settlement_economy  settlement_population
450      Waterdeep         350050841.0                1347840
625     Gheldaneth          47012036.5                 170413
441      Unthalass          46052488.0                 158047
622          Skuld          45662777.5                 193245
0         Athkatla          34272222.5                 116163
456  Baldur's Gate          31422813.5                 125000
59          Suzail          30664124.5                 129311
362      Zazesspur          29910326.5                 114508
31       Calimport          16168405.0                  86268
55        Soorenar          15090888.5                  75665


In [None]:
# Group by 'Region' and sum up the 'settlement_economy'
region_economy_sum = df.groupby('region_kingdom')['settlement_economy'].sum().reset_index()

# Rename the columns for easier merging
region_economy_sum.columns = ['region_kingdom', 'new_region_economy']

# Merge the DataFrame to include the new region economy sums
df = pd.merge(df, region_economy_sum, on='region_kingdom', how='left')

# Update the 'region_economy' column with the newly computed sums
df['region_economy'] = df['new_region_economy']

# Drop the temporary column used for merging
df.drop(columns=['new_region_economy'], inplace=True)


In [None]:
def adjust_settlement_economy(row):
    if row['settlement_size'] == 'tiny':
        return row['settlement_economy'] / 10000
    elif row['settlement_size'] == 'small':
        return row['settlement_economy'] / 1000
    elif row['settlement_size'] == 'medium':
        return row['settlement_economy'] / 100
    elif row['settlement_size'] == 'large':
        return row['settlement_economy'] / 10
    else:
        return row['settlement_economy']

# Create 'settlement_size' column by applying 'determine_settlement_size'
idf['settlement_size'] = idf['settlement_population'].apply(determine_settlement_size)

# Save this column as a separate Series for future use
settlement_size_series = idf['settlement_size'].copy()

# Adjust the 'settlement_economy' column
idf['settlement_economy'] = idf.apply(adjust_settlement_economy, axis=1)


NameError: ignored

In [None]:
idf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 648 entries, 0 to 647
Data columns (total 27 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   settlement                 648 non-null    object 
 1   region_kingdom             648 non-null    object 
 2   capital                    648 non-null    object 
 3   settlement_population      648 non-null    int64  
 4   military                   622 non-null    object 
 5   exports                    647 non-null    object 
 6   imports                    623 non-null    object 
 7   population_breakdown       648 non-null    object 
 8   region_population          648 non-null    int64  
 9   settlement_economy         648 non-null    int64  
 10  class_density              648 non-null    object 
 11  demographic_breakdown      648 non-null    object 
 12  average_age                648 non-null    int64  
 13  rumored_treasure_value     648 non-null    int64  

# Calculating Hidden Economy

In [None]:
def calculate_hidden_economy(row):
    try:
        if isinstance(row['class_density'], str):
            class_density = ast.literal_eval(row['class_density'])
        else:
            class_density = row['class_density']

        # Adjusted multipliers to generate smaller hidden_economy values
        squalid_multiplier = class_density.get('Squalid', 0) * 0.0000005  # Adjusted weight
        wretched_multiplier = class_density.get('Wretched', 0) * 0.0000003  # Adjusted weight
        poor_multiplier = class_density.get('Poor', 0) * 0.0000001  # Adjusted weight
        wealthy_multiplier = class_density.get('Wealthy', 0) * 0.000004  # Adjusted weight
        aristocratic_multiplier = class_density.get('Aristocratic', 0) * 0.00001  # Adjusted weight

        # Calculating hidden_multiplier using adjusted weights
        hidden_multiplier = wealthy_multiplier + aristocratic_multiplier + squalid_multiplier - wretched_multiplier - poor_multiplier

        # Final hidden economy calculation using the adjusted hidden_multiplier
        hidden_economy = row['settlement_economy'] * hidden_multiplier if hidden_multiplier != 0 else row['settlement_economy']

        return hidden_economy
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Recalculate 'hidden_economy' using the updated function
idf['hidden_economy'] = df.apply(calculate_hidden_economy, axis=1)

# Convert the 'hidden_economy' column to integers, handling None values
idf['hidden_economy'] = idf['hidden_economy'].fillna(0).astype(int)


In [None]:
# Handle potential division by zero
idf['settlement_population'] = idf['settlement_population'].replace(0, np.nan)

# Calculate hidden economy per capita
idf['hidden_economy_per_capita'] = idf['hidden_economy'] / idf['settlement_population']

# Replace NaN values with 0 (if there were any divisions by zero)
idf['hidden_economy_per_capita'] = idf['hidden_economy_per_capita'].fillna(0)

# Get the top 10 settlements with the highest hidden economy per capita
top_10_hidden_economies = idf.nlargest(10, 'hidden_economy_per_capita')[['settlement', 'hidden_economy_per_capita']]

# Print the top 10 hidden economies per capita
print("Top 10 Settlements with Highest Hidden Economy Per Capita:")
print(top_10_hidden_economies)

Top 10 Settlements with Highest Hidden Economy Per Capita:
     settlement  hidden_economy_per_capita
450   Waterdeep                  27.467449
441   Unthalass                  20.663258
625  Gheldaneth                  18.703168
362   Zazesspur                  14.140078
59       Suzail                  13.173365
294        Surd                  12.235608
295     Tulbegh                  10.960058
291       Saerb                   8.495393
0      Athkatla                   8.482374
622       Skuld                   8.141851


In [None]:
idf['hidden_economy'].head()

0    985338
1       298
2         0
3         0
4     25034
Name: hidden_economy, dtype: int64

In [None]:
idf['settlement_economy'] = idf['settlement_economy'].astype(int)

# Calculate the total economy of all settlements
total_economy = idf['settlement_economy'].sum() + idf['hidden_economy'].sum()

print(f"The total economy of all settlements is {total_economy}")


The total economy of all settlements is 1465417469


In [None]:
# Function to adjust hidden economy based on the number of unique imports and exports
def adjust_hidden_economy(row):
    # Get the number of unique exports and imports
    unique_exports = len(set(row['exports'])) if isinstance(row['exports'], list) else 0
    unique_imports = len(set(row['imports'])) if isinstance(row['imports'], list) else 0

    # Calculate adjustment value
    adjustment_value = (unique_exports + unique_imports) / 100 * row['settlement_economy']

    # Update hidden_economy value
    return row['hidden_economy'] + adjustment_value

# Assuming 'hidden_economy' is already initialized. If not, initialize it first.
if 'hidden_economy' not in idf.columns:
    idf['hidden_economy'] = 0.0  # Initialize to 0

# Now adjust the 'hidden_economy' column
idf['hidden_economy'] = idf.apply(adjust_hidden_economy, axis=1)
idf['hidden_economy'] = idf['hidden_economy'].astype(int)

# Demographics

In [None]:
import ast  # For parsing the string representation of a dictionary

# Custom function to calculate demographic_breakdown
def calculate_demographics(row):
    try:
        # Parse the population_breakdown if it's a string representation of a dictionary
        if isinstance(row['population_breakdown'], str):
            population_breakdown = ast.literal_eval(row['population_breakdown'])
        else:
            population_breakdown = row['population_breakdown']

        # Initialize the demographic_breakdown dictionary
        demographic_breakdown = {}

        # Calculate absolute populations based on percentages
        for species, percentage in population_breakdown.items():
            demographic_breakdown[species] = int(row['settlement_population'] * (percentage / 100))

        return demographic_breakdown
    except Exception as e:
        print(f"Error calculating demographics for row: {e}")
        return None

# Apply the function across the DataFrame rows
df['demographic_breakdown'] = df.apply(calculate_demographics, axis=1)


In [None]:
# Calculate the total economy of all settlements
total_population = df['settlement_population'].sum()
print(f"The total population of all settlements is {total_population}")


The total population of all settlements is 6527146


# Average Age

In [None]:
# Custom function to calculate average_age with lifestyle modifiers
def calculate_average_age(row):
    try:
        # Initialize variables
        weighted_ages = 0
        total_population = row['settlement_population']
        lifestyle_adjustment = 0

        # Convert strings to dictionaries if necessary
        demographic_breakdown = ast.literal_eval(row['demographic_breakdown']) if isinstance(row['demographic_breakdown'], str) else row['demographic_breakdown']
        class_density = ast.literal_eval(row['class_density']) if isinstance(row['class_density'], str) else row['class_density']

        # Loop over each species in the demographic breakdown
        for species, count in demographic_breakdown.items():
            # Get the age range for this species
            age_range = species_age_ranges.get(species, [1, 100])  # default to human age range if species is not found

            # Calculate the average age for this species
            avg_age_species = sum(age_range) / 2

            # Calculate the weighted age for this species
            weighted_age_species = avg_age_species * (count / total_population)

            # Add to the total weighted ages
            weighted_ages += weighted_age_species

        # Calculate lifestyle adjustment
        for lifestyle, count in class_density.items():
            modifier = lifestyle_modifier.get(lifestyle, 0)
            lifestyle_adjustment += (count / total_population) * modifier

        # Apply lifestyle adjustment to weighted ages
        average_age = weighted_ages + lifestyle_adjustment

        return average_age
    except Exception as e:
        print(f"Error calculating average age for row: {e}")
        return None

# Import the required library for literal evaluation
import ast

# Make sure to apply the function again
df['average_age'] = df.apply(calculate_average_age, axis=1)

# Handle the None values before casting to int
df['average_age'] = df['average_age'].fillna(0).astype(int)

#Define Constants
species = ["humans", "halflings", "half-orcs", "half-drow", "half-elves", "elves", "dwarves", "gnomes", "goblins", "lizardfolk", "yuan-ti", "orcs", "ogres", "asabi", "gnolls",
           "pterafolk", "drow", "centaurs", "wemics", "humanoids"]

species_age_ranges = {
    "humans": [1, 100],
    "halflings": [1, 150],
    "half-elves": [1, 180],
    "half-orcs": [1, 75],
    "orcs": [1, 50],
    "elves": [1, 750],
    "dwarves": [1, 350],
    "gnomes": [1, 400],
    "goblins": [1, 60],  # Goblins generally have short lifespans.
    "lizardfolk": [1, 80],  # Lizardfolk have lifespans comparable to or slightly longer than humans.
    "yuan-ti": [1, 120],  # Yuan-ti have longer lifespans due to their snake-like nature.
    "ogres": [1, 100],  # Ogres have lifespans similar to humans but can occasionally live longer.
    "asabi": [1, 60],  # Also known as 'ashworms', Asabis have shorter lifespans.
    "gnolls": [1, 30],  # Gnolls have relatively short lifespans due to their chaotic and violent lives.
    "pterafolk": [1, 60],  # Pterafolk have lifespans similar to other humanoid species.
    "drow": [1, 750],  # Drow, or dark elves, have lifespans similar to other elves.
    "centaurs": [1, 120],  # Centaurs tend to live longer than humans but not as long as elves.
    "wemics": [1, 60],  # Wemics have lifespans similar to or slightly longer than humans.
    "half-drow": [1, 180],  # Half-drow might have lifespans in between humans and drow.
    "humanoids": [1, 80],  # Generic humanoid lifespan, individual species may vary.
    "misc.": [1, 80],  # Generic humanoid lifespan, individual species may vary.
}


# Lifestyle modifiers
lifestyle_modifier= {
    'Wretched': -40,
    'Squalid': -15,
    'Poor': -1,
    'Modest': 2,
    'Comfortable': 3,
    'Wealthy': 6,
    'Aristocratic': 10
}

# Apply the function across the DataFrame rows
df['average_age'] = df.apply(calculate_average_age, axis=1)
df['average_age'] = df['average_age'].astype(int)

In [None]:
df['average_age'].head(25)

# Rumored Treasure Value

In [None]:
def calculate_rumored_treasure_value(row):
    try:
        # Calculate the base value by dividing settlement_economy by 1000
        base_value = row['settlement_economy'] / 1000

        # Apply size modifier to the hidden_economy
        size_modifier = {
            'tiny_settlement': 0.09,
            'small_settlement': 0.09,
            'medium_settlement': 0.1,
            'large_settlement': .2,
            'mega_settlement': .5
        }
        # Ensure that row['settlement_size'] exists and is not None before trying to access it in size_modifier
        if row['settlement_size'] is not None:
            modified_hidden_economy = row['hidden_economy'] * size_modifier.get(row['settlement_size'], 1)
        else:
            modified_hidden_economy = row['hidden_economy']

        # Add the modified hidden_economy to the base_value
        treasure_value = base_value + modified_hidden_economy

        return treasure_value
    except Exception as e:
        print(f"Error calculating rumored treasure value for row: {e}")
        return None

# Apply the function across the DataFrame rows
idf['rumored_treasure_value'] = idf.apply(calculate_rumored_treasure_value, axis=1)

# Handle None values before casting to int
idf['rumored_treasure_value'] = idf['rumored_treasure_value'].fillna(0).astype(int)

In [None]:
comparison_df = idf[['rumored_treasure_value', 'settlement_economy', 'hidden_economy']].head()
print(comparison_df)

   rumored_treasure_value  settlement_economy  hidden_economy
0                  526941            34272222          985338
1                    1122             1092226             298
2                      81               81796               0
3                     102              102127               0
4                    9199             4192755           25034


# Goverment Types

In [None]:
# List of government types
government_types = ["Autocracy", "Bureaucracy", "Confederacy", "Democracy", "Dictatorship", "Feudalism", "Gerontocracy", "Hierarchy",
                    "Monarchy", "Magocracy", "Militocracy", "Matriarchy", "Oligarchy", "Patriarchy", "Meritocracy", "Plutocracy",
                    "Republic", "Kleptocracy", "Satrapy", "Theocracy"]

# Add a new column "government_type" with random values
df['government_type'] = [random.choice(government_types) for _ in range(len(df))]

# Print the DataFrame to verify the new column
print(df[['region_kingdom', 'government_type']])


      region_kingdom government_type
0                Amn        Monarchy
1                Amn       Autocracy
2                Amn     Militocracy
3                Amn    Gerontocracy
4                Amn       Hierarchy
..               ...             ...
643          Narfell    Dictatorship
644          Narfell       Democracy
645          Narfell       Magocracy
646          Narfell     Bureaucracy
647  The Sword Coast       Magocracy

[648 rows x 2 columns]


# Cause of Death

In [None]:
# Define causes of death for each settlement size
causes_of_death = {
    'tiny_settlement': ['Old age', 'Wildlife attacks', 'Starvation', 'Disease', 'Exposure to the elements', 'Accidental falls', 'Lack of medical care', 'Poisonous plants', 'Isolation', 'Magic mishaps'],
    'small_settlement': ['Disease', 'Assassination', 'Poison', 'Old age', 'Starvation', 'Bandit attacks', 'Exposure to the elements', 'Magic mishaps', 'Work-related accidents', 'Wildlife attacks'],
    'medium_settlement': ['Disease', 'Old age', 'Assassination', 'Poison', 'Work-related accidents', 'Starvation', 'Exposure to the elements', 'Bandit attacks', 'Magic mishaps', 'Dueling accidents'],
    'large_settlement': ['Disease', 'Assassination', 'Poison', 'Old age', 'Magic mishaps', 'Work-related accidents', 'Starvation', 'Dueling accidents', 'Wagon Accident', 'Exposure to the elements'],
    'mega_settlement': ['Assassination', 'Disease', 'Poison', 'Magic mishaps', 'Dueling accidents', 'Crime', 'Work-related accidents', 'Starvation', 'Wagon Accident', 'Exposure to the elements']
}

# Function to assign cause of death based on settlement size
def assign_cause_of_death(row):
    size = row['settlement_size']
    return random.choice(causes_of_death[size])

# Add a new column "settlement_size" based on population
df['settlement_size'] = df['settlement_population'].apply(determine_settlement_size)

# Add a new column "cause_of_death" with assigned causes
df['most_likely_cause_of_death'] = df.apply(assign_cause_of_death, axis=1)

# Print the DataFrame to verify the new columns
print(df[['settlement', 'settlement_population', 'settlement_size', 'most_likely_cause_of_death']])


           settlement  settlement_population    settlement_size  \
0            Athkatla                 116163    mega_settlement   
1            Amnwater                   7035  medium_settlement   
2       Citadel Amnur                    636   small_settlement   
3    Citadel Rashturl                    710   small_settlement   
4           Coryllvol                  23000   large_settlement   
..                ...                    ...                ...   
643            N’Jast                     71    tiny_settlement   
644          Peltarch                    313   small_settlement   
645           Selmast                    392   small_settlement   
646           Snowcap                    495   small_settlement   
647            Luskan                  16000   large_settlement   

    most_likely_cause_of_death  
0       Work-related accidents  
1               Bandit attacks  
2                Assassination  
3                       Poison  
4                   Starvation

# Inequality Score

In [None]:
# Define weights for factors (you can adjust these)
weights = {
    'wealth_distribution': 0.4,
    'trade_balance': 0.1,
    'hidden_economy': 0.4,
    'average_age': 0.1
}

# Define a function to parse the 'class_density' column into dictionaries
def parse_class_density(x):
    try:
        return ast.literal_eval(x)
    except (SyntaxError, ValueError):
        return {}

# Parse the 'class_density' column into dictionaries
idf['class_density'] = idf['class_density'].apply(parse_class_density)

# Now you can access dictionary values in the 'class_density' column
idf['normalized_wealth'] = (idf['class_density'].apply(lambda x: x.get('Aristocratic', 0)) + idf['class_density'].apply(lambda x: x.get('Wealthy', 0))) / (idf['class_density'].apply(lambda x: x.get('Squalid', 0)) + idf['class_density'].apply(lambda x: x.get('Wretched', 0)) + 1)

# Assuming normalized_age is already scaled between 0 and 1
idf['inverted_normalized_age'] = 1 - idf['normalized_age']

# Then, when calculating the inequality_score:
idf['inequality_score'] = (
    weights['wealth_distribution'] * idf['normalized_wealth'] +
    weights['trade_balance'] * idf['normalized_trade_balance'] +
    weights['hidden_economy'] * idf['normalized_hidden_economy'] +
    weights['average_age'] * idf['inverted_normalized_age']  # Use the inverted value here
)

# Generate a Pandas Series of random factors, one for each row in idf
num_rows = idf.shape[0]  # Get number of rows in idf
random_factors = pd.Series([random.uniform(0.95, 1.05) for _ in range(num_rows)])

# Multiply inequality_score by corresponding random factor for each row
idf['inequality_score'] = idf['inequality_score'] * random_factors.values


In [None]:
idf['inequality_score'].describe()

count      648.000000
mean        69.587349
std        615.832001
min          0.028436
25%          0.112051
50%          0.183052
75%          0.318514
max      11224.047147
Name: inequality_score, dtype: float64

# Tax Rate

In [None]:
df['class_density'] = df['class_density'].apply(ast.literal_eval)

# Define the calculate_tax_rate function
def calculate_tax_rate(row):
    # Check if 'imports' and 'exports' are not NaN and are of type list
    if pd.notna(row['imports']) and isinstance(row['imports'], list):
        region_imports = len(row['imports'])
    else:
        region_imports = 0

    if pd.notna(row['exports']) and isinstance(row['exports'], list):
        region_exports = len(row['exports'])
    else:
        region_exports = 0

    # Check if there are aristocrats in the settlement
    if row['class_density'].get('Aristocratic', 0) > 0:
        region_aristocrats = df[df['region_kingdom'] == row['region_kingdom']]['class_density'].apply(lambda x: x.get('Aristocratic', 0)).sum()

        # Check if the region has 100 or more aristocrats
        if region_aristocrats >= 100:
            # Check if there is a military in the region's capital
            if row['capital'] == row['settlement']:
                if pd.notna(row['military']):
                    tax_rate = np.random.randint(10, 21)  # Tax rate between 10% and 20% with military
                else:
                    tax_rate = np.random.randint(1, 11)  # Tax rate between 1% and 10% without military
            else:
                tax_rate = np.random.randint(1, 21)  # Tax rate between 1% and 20% in regions with aristocrats and >=100 total aristocrats
        else:
            tax_rate = 0  # Tax rate is 0 if region has aristocrats but <100 total aristocrats
    else:
        tax_rate = 0  # Tax rate is 0 if no aristocrats in the settlement

    # Calculate the multiplier based on imports and exports
    multiplier = 1 + (region_imports + region_exports) / 1000

    # Apply the multiplier to the tax rate
    adjusted_tax_rate = tax_rate * multiplier

    return adjusted_tax_rate

# Apply the calculate_tax_rate function to create the tax_rate column
df['tax_rate'] = df.apply(calculate_tax_rate, axis=1)


In [None]:
# Group by 'region_kingdom' and sum the 'inequality_score' for each region
region_inequality_sum = idf.groupby('region_kingdom')['inequality_score'].sum()

# Sort the regions by the summed inequality score in descending order and take the top 10
top_10_inequal_regions = region_inequality_sum.sort_values(ascending=False).head(10)

# Print the top 10 most unequal regions
print("Top 10 Most Unequal Regions:")
print(top_10_inequal_regions)


Top 10 Most Unequal Regions:
region_kingdom
Moonshae    123.313278
Tethyr       61.221127
Cormyr       50.428565
Halruaa      50.144494
Damara       38.056324
Turmish      37.485639
Amn          32.962329
Thay         31.804846
Lantan       30.502219
Impiltur     29.119687
Name: inequality_score, dtype: float64


In [None]:
# Group by 'region_kingdom' and sum the 'inequality_score' for each region
region_inequality_sum = idf.groupby('settlement')['inequality_score'].sum()

# Sort the regions by the summed inequality score in descending order and take the top 10
top_10_inequal_regions = region_inequality_sum.sort_values(ascending=False).head(10)

# Print the top 10 most unequal regions
print("Top 10 Most Unequal Settlements:")
print(top_10_inequal_regions)


Top 10 Most Unequal Settlements:
settlement
Tsan           21.000000
Harloch         2.231275
Dorset          2.220974
Hickorydale     2.218901
Llewellyn       2.216759
Highhome        2.215710
Wyngate         2.215024
Dynnatt         2.207920
Elyssyrr        2.205746
Borth           2.182493
Name: inequality_score, dtype: float64


# Adjusting Average Age

In [None]:
# Define a function to adjust average_age based on inequality_score
def adjust_average_age(row):
    # Define a scaling factor to control the adjustment
    scaling_factor = 50  # You can adjust this value as needed

    # Calculate the adjusted average age
    adjusted_age = row['average_age'] - scaling_factor * row['inequality_score']

    # Ensure the adjusted age is within a reasonable range
    return max(adjusted_age, 1)  # Minimum age set to 1

# Apply the adjustment function to the DataFrame
idf['adjusted_average_age'] = idf.apply(adjust_average_age, axis=1)

In [None]:
idf['adjusted_average_age'].head()

0    30.828081
1    44.986469
2    35.820596
3    41.580476
4    36.711909
Name: adjusted_average_age, dtype: float64

# Temperature and Climate

In [None]:
region_climate = {
    'Amn': 'Temperate',  # Coastal and fertile
    'Calimshan': 'Arid',  # Desert and semi-arid regions
    'Chessenta': 'Temperate',  # Mediterranean climate
    'Cormyr': 'Temperate',  # Forested and fertile
    'Damara': 'Cold',  # Northern, colder climate
    'Halruaa': 'Temperate',  # Known for magical barriers, likely moderate climate
    'Impiltur': 'Temperate',  # Coastal with some mountainous terrain
    'Moonshae': 'Jungle/Marsh',  # Isles with varying climates, generally temperate
    'Nimbral': 'Temperate',  # Island with moderate climate
    'Sembia': 'Temperate',  # Trading nation with fertile lands
    'Sossal': 'Cold',  # Far to the northeast, likely very cold
    'Tethyr': 'Temperate',  # Coastal and forested
    'Thay': 'Arid',  # Plateau with harsh climate
    'Thesk': 'Temperate',  # Known for trade, likely moderate climate
    'Turmish': 'Temperate',  # Coastal and forested
    'Unther': 'Arid',  # Desert-like with ancient ruins
    'The Sword Coast': 'Temperate',  # Varying climates, mostly temperate
    'Aglarond': 'Temperate',  # Forested peninsula
    'Evereska': 'Temperate',  # Elven city-state in a valley
    'Evermeet': 'Temperate',  # Elven island, magically preserved
    'Gundarlun': 'Cold',  # Northern islands
    'Ruathym': 'Cold',  # Island with a harsh, cold climate
    'Trisk (Kingdom of the Purple Rocks)': 'Temperate',  # Island, uncertain climate
    'Tuern': 'Cold',  # Volcanic island to the north
    'Hartsvale': 'Cold',  # Located in a valley within mountains
    'Lantan': 'Temperate',  # Island nation known for technology
    'Lapaliiya': 'Jungle/Marsh',  # Coastal with some jungles
    'Luiren': 'Temperate',  # Homeland of the halflings, likely moderate climate
    'The Silver Marches': 'Cold',  # Northern frontier with colder climate
    'Dwarfholds of the North': 'Cold',  # Mountainous and northern
    'Orlumbor': 'Temperate',  # Wooded island
    'Mulhorand': 'Arid',  # Desert with some fertile river valleys
    'Narfell': 'Cold',  # Plains with harsh winters
}
climate_temperature_range = {
    'Arid': [65, 100],  # Temperature range in Fahrenheit
    'Cold': [0, 40],
    'Temperate': [45, 70],
    'Jungle/Marsh': [65, 90]
}

average_rainfall = {
    'Arid': [0, 10],  # Rainfall range in inches per year
    'Cold': [20, 40],
    'Temperate': [30, 50],
    'Jungle/Marsh': [45, 65]
}
import random

def assign_climate_attributes(row):
    try:
        # Extract climate from region
        climate = region_climate.get(row['region_kingdom'], 'Temperate')  # Default to 'Temperate' if region is not found

        # Get temperature and rainfall ranges for the climate
        temp_range = climate_temperature_range[climate]
        rainfall_range = average_rainfall[climate]

        # Randomly assign values within the ranges
        average_temp = random.uniform(temp_range[0], temp_range[1])
        annual_rainfall = random.uniform(rainfall_range[0], rainfall_range[1])

        return average_temp, annual_rainfall

    except Exception as e:
        print(f"Error assigning climate attributes for row: {e}")
        return None

# Apply the function to the DataFrame
df['average_temperature'], df['annual_rainfall'] = zip(*df.apply(assign_climate_attributes, axis=1))


# Magic Academies

In [None]:
wizard_schools = {
    'Arrabar': 'Academia Vilhonus',
    'Chondath': 'Academia Vilhonus',
    'Thay': 'Academy of Shapers and Binders',
    ' Waterdeep': 'Academy of Stargazers',
    'Mintar': 'Academy of the Drawn Sword',
    ' Lake of Steam': 'Academy of the Drawn Sword',
    'Seventon': "Alimon's Mental Academy",
    'Netheril': "Alimon's Mental Academy",
    'Sundabar': 'Anstruth',
    'Gheldaneth': 'Arcanum of Magic',
    'Mulhorand': 'Arcanum of Magic',
    'Silverymoon': "Arkhen's Invocatorium",
    'Huzuz': "Al-Bidir Sallah (Home of the Seekers of Knowledge)",
    'Zakhara': "Al-Bidir Sallah (Home of the Seekers of Knowledge)",
    'Waterdeep': 'Blackstaff Academy',
    'Luskan': 'Blackstaff Academy',
    'Baldurs Gate': "Briel's School of the Arcane",
    'The Sword Coast': "Briels School of the Arcane",
    'Zazesspur': 'Canaith',
    'Tethyr': 'Canaith',
    'Evereska': 'College of Magic and Arms',
    'The Sword Coast Faerûn': 'College of Magic and Arms',
    'Heralds Holdfast': 'College of the Herald',
    'Berdusk': 'College of Doss',
    'Myth Nantar': "Dukars' Academy",
    'Waterdeep': 'Dweomercore',
    'Waterdeep': 'Eltorchul Academy',
    'Loudwater': "Enalin's Shop of Adventurers' Supplies",
    'Delimbiyr Vale': "Enalin's Shop of Adventurers' Supplies",
    'Heliogabalus': 'Ends Rest',
    'Heldapan': 'Gathering of Magicians',
    'Durpar': 'Gathering of Magicians',
    'Akkaido': 'Golden Fox Martial Arts School',
    'Huzuz': 'Grand University of Huzuz',
    'Medina al-Afyal': 'Great Mosque of Selan',
    'Island of Afyal': 'Great Mosque of Selan',
    'Almorel': 'Great School of Deneir',
    'Waterdeep': 'Halasters Heirs',
    'Evereska': 'Hall of the Kaliesherai',
    'Waterdeep': 'House of Healing'
}

def assign_magic_academy(row):
    settlement = row['settlement']
    if settlement in wizard_schools:
        return wizard_schools[settlement]
    else:
        return 'None'  # Fill with 'None' if there's no school

# Assuming you have a DataFrame named df
df['magic_academy'] = df.apply(assign_magic_academy, axis=1)

# To keep track of schools that do not get placed
unplaced_schools = [school for school in wizard_schools.values() if school not in df['magic_academy'].unique()]

# Print unplaced schools
print("Unplaced Schools:", unplaced_schools)


In [None]:
def add_magic_academy(df):
    def check_population(row):
        if row['settlement_population'] > 15000 and row['magic_academy'] == 'None':
            # If population is over 10000 and academy is 'None', replace it with 'guilds'
            return 'guilds'
        return row['magic_academy']

    df['magic_academy'] = df.apply(lambda row: check_population(row), axis=1)
    return df
df = add_magic_academy(df)

In [None]:
# Check the count of each unique value in the 'magic_academy' column
academy_counts = df['magic_academy'].value_counts()

# Check if 'guilds' is in the index of unique values
if 'guilds' in academy_counts.index:
    # 'guilds' was added, and you can access its count using academy_counts['guilds']
    guilds_count = academy_counts['guilds']
    print(f"'guilds' was added {guilds_count} times.")
else:
    # 'guilds' was not added
    print("'guilds' was not added.")


'guilds' was added 78 times.


# Magical Climate

In [None]:
idf['demographic_breakdown'] = df['demographic_breakdown']
moderate_temp = 70
def parse_demographic_breakdown(x):
    try:
        return ast.literal_eval(x)
    except (SyntaxError, ValueError):
        return {}

# Parse the 'demographic_breakdown' column into dictionaries
idf['demographic_breakdown'] = idf['demographic_breakdown'].apply(parse_demographic_breakdown)

# Calculate the count of non-humans in each settlement
idf['non_human_count'] = idf['demographic_breakdown'].apply(lambda x: sum(v for k, v in x.items() if k.lower() != 'human') if isinstance(x, dict) else 0)

# Calculate the total population based on demographic breakdown
idf['total_population_from_demo'] = idf['demographic_breakdown'].apply(lambda x: sum(v for k, v in x.items()) if isinstance(x, dict) else 0)

# Calculate non-human proportion; handle division by zero by checking if total_population_from_demo is not zero
idf['non_human_proportion'] = idf.apply(lambda row: row['non_human_count'] / row['total_population_from_demo'] if row['total_population_from_demo'] != 0 else 0, axis=1)

# Score for magic academy presence
idf['academy_score'] = idf['magic_academy'].apply(lambda x: 30 if x is not None else 0)

# Define mean_inequality
mean_inequality = idf['inequality_score'].mean()

# Step 1: Randomly multiply each factor
idf['random_age_multiplier'] = np.random.uniform(1, 5, len(idf))
idf['random_inequality_multiplier'] = np.random.uniform(1, 5, len(idf))
idf['random_temperature_multiplier'] = np.random.uniform(1, 5, len(idf))
idf['random_non_human_multiplier'] = np.random.uniform(1, 5, len(idf))

idf['age_score'] = idf['random_age_multiplier'] * (idf['average_age'] - idf['average_age'].min()) / (idf['average_age'].max() - idf['average_age'].min())
idf['inequality_score'] = idf['random_inequality_multiplier'] * (np.abs(idf['inequality_score'] - mean_inequality) / idf['inequality_score'].std())
idf['temperature_score'] = idf['random_temperature_multiplier'] * (np.abs(idf.get('average_temperature', 0) - moderate_temp))
idf['non_human_score'] = idf['random_non_human_multiplier'] * (idf['non_human_proportion'] - idf['non_human_proportion'].min()) / (idf['non_human_proportion'].max() - idf['non_human_proportion'].min())

# Step 2: Normalize each factor to be between 1 and 25
idf['age_score'] = 1 + 5 * (idf['age_score'] - idf['age_score'].min()) / (idf['age_score'].max() - idf['age_score'].min())
idf['inequality_score'] = 1 + 20 * (idf['inequality_score'] - idf['inequality_score'].min()) / (idf['inequality_score'].max() - idf['inequality_score'].min())
idf['temperature_score'] = 1 + 35 * (idf['temperature_score'] - idf['temperature_score'].min()) / (idf['temperature_score'].max() - idf['temperature_score'].min())
idf['non_human_score'] = 1 + 30 * (idf['non_human_score'] - idf['non_human_score'].min()) / (idf['non_human_score'].max() - idf['non_human_score'].min())

# Step 3: Sum up the normalized scores to get the magical_climate
idf['magical_climate'] = idf['academy_score'] + idf['age_score'] + idf['inequality_score'] + idf['temperature_score'] + idf['non_human_score']

# Optional: Clip the total score if necessary
idf['magical_climate'] = idf['magical_climate'].round().clip(lower=4, upper=100)


# Dragon Sightings

In [None]:
# Step 1: Calculate Region Totals
region_totals = idf.groupby('region_kingdom').agg({
    'hidden_economy': 'sum',
    'magical_climate': 'sum'  # Replace with the correct column name
}).reset_index()

# Step 2: Allocate Dragon Sightings to Regions
total_dragon_sightings = 1000
region_totals['total_score'] = region_totals['hidden_economy'] + region_totals['magical_climate']
region_totals['allocated_dragon_sightings'] = (region_totals['total_score'] / region_totals['total_score'].sum()) * total_dragon_sightings
region_totals = region_totals[['region_kingdom', 'allocated_dragon_sightings']]

# Ensure 'allocated_dragon_sightings' does not already exist in 'idf' before merging
if 'allocated_dragon_sightings' in idf.columns:
    idf = idf.drop(columns=['allocated_dragon_sightings'])

# Step 3: Merge the allocated_dragon_sightings to the original DataFrame
idf = pd.merge(idf, region_totals, on='region_kingdom', how='left', suffixes=('', '_to_drop'))

# Drop the extra columns if they are created
idf = idf.drop(columns=[col for col in idf if 'to_drop' in col])

# Step 4: Allocate Dragon Sightings to Settlements
idf['settlement_dragon_sightings'] = (idf['magical_climate'] / idf.groupby('region_kingdom')['magical_climate'].transform('sum')) * idf['allocated_dragon_sightings']

# Optional: Round the dragon sightings as they are countable
idf['settlement_dragon_sightings'] = idf['settlement_dragon_sightings'].round().astype(int)

# Step 1: Create a Random Factor
random_factor = np.random.uniform(0.1, 2, size=len(idf))

# Step 2: Apply Random Factor to Magical Climate
idf['adjusted_magical_climate'] = idf['magical_climate'] * random_factor

# Step 3: Allocate Dragon Sightings (Similar to Step 4 above, but using adjusted_magical_climate)
idf['settlement_dragon_sightings'] = (idf['adjusted_magical_climate'] / idf.groupby('region_kingdom')['adjusted_magical_climate'].transform('sum')) * idf['allocated_dragon_sightings']

# Optional: Round the dragon sightings
idf['settlement_dragon_sightings'] = idf['settlement_dragon_sightings'].round().astype(int)

In [None]:
# Group by 'region_kingdom' and sum 'settlement_dragon_sightings'
total_dragon_sightings_by_region = idf.groupby('region_kingdom')['settlement_dragon_sightings'].sum().reset_index()

# Optional: Sort values for better visualization
total_dragon_sightings_by_region = total_dragon_sightings_by_region.sort_values(by='settlement_dragon_sightings', ascending=False).reset_index(drop=True)

# Display the result
print(total_dragon_sightings_by_region)

                         region_kingdom  settlement_dragon_sightings
0                       The Sword Coast                          690
1                             Mulhorand                           91
2                                Unther                           59
3                                Tethyr                           39
4                                Cormyr                           29
5                                   Amn                           22
6                             Calimshan                            9
7                                Sembia                            7
8                             Lapaliiya                            6
9                              Aglarond                            4
10                                 Thay                            2
11                            Chessenta                            1
12                              Halruaa                            0
13              Dwarfholds of the 

In [None]:
# Step 1
settlement_size_mapping = {
    'tiny_settlement': np.random.uniform(5, 15),
    'small_settlement': np.random.uniform(5, 11),
    'medium_settlement': np.random.uniform(4, 10),
    'large_settlement': np.random.uniform(2, 7),
    'mega_settlement': np.random.uniform(1, 8)
}

# Map settlement_size and fill NaN with a default value, then convert to int
idf['average_household_size'] = idf['settlement_size'].map(settlement_size_mapping)
idf['average_household_size'] = idf['average_household_size'].fillna(idf['average_household_size'].mean())  # replace NaN with the mean value
idf['total_households'] = (idf['settlement_population'] / idf['average_household_size']).astype(int)  # now safe to convert to int


# Step 2
# Check if each value in class_density is a dictionary
idf['is_dict'] = idf['class_density'].apply(lambda x: isinstance(x, dict))

# Filter the DataFrame to keep only rows where class_density is a dictionary
idf = idf[idf['is_dict'] == True]

# You may drop the is_dict column as it's no longer needed
idf = idf.drop(columns=['is_dict'])

# Now you can use dictionary methods to extract values
def parse_class_density(density):
    return density.get('Wealthy', 0), density.get('Poor', 0), density.get('Squalid', 0)

idf[['Wealthy', 'Poor', 'Squalid']] = idf['class_density'].apply(parse_class_density).apply(pd.Series)

# Step 3
debt_base = idf['Poor'] * 7 + idf['Squalid'] * 15 - idf['Wealthy'] * 5
idf['average_debt_by_household'] = (1 + debt_base * idf['tax_rate'])
idf['average_debt_by_household'] = idf['average_debt_by_household'].clip(lower=0)  # Ensure no negative debt
idf['average_debt_by_household'] = (idf['average_debt_by_household'] / idf['total_households']).fillna(0)

# Ensure the total debt doesn't exceed hidden_economy
idf['average_debt_by_household'] = idf['average_debt_by_household'].where(
    idf['average_debt_by_household'] * idf['total_households'] <= idf['hidden_economy'],
    idf['hidden_economy'] / idf['total_households']
)

# Drop temporary columns if necessary
idf = idf.drop(columns=['Wealthy', 'Poor', 'Squalid', 'total_households'])


In [None]:
idf['average_debt_by_household'].describe()

count    648.000000
mean       1.346091
std        4.783225
min        0.000000
25%        0.000000
50%        0.004707
75%        0.286589
max       55.049743
Name: average_debt_by_household, dtype: float64

In [None]:
idf['average_debt_by_household'].head()

0    16.575067
1     0.001408
2     0.000000
3     0.000000
4     4.895190
Name: average_debt_by_household, dtype: float64

In [None]:
# Function to determine development_index based on conditions
def assign_development_index(row):
    options = ['region improvement', 'administration']
    if row['military'] != 'None':
        options.append('military support')

    return random.choice(options)

# Applying the function to each row in the DataFrame
idf['development_index'] = idf.apply(assign_development_index, axis=1)

# If you want each region to have the same development_index, you can group by region and then transform:
idf['development_index'] = idf.groupby('region_kingdom')['development_index'].transform('first')

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 648 entries, 0 to 647
Data columns (total 27 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   settlement                  648 non-null    object 
 1   region_kingdom              648 non-null    object 
 2   capital                     648 non-null    object 
 3   settlement_population       648 non-null    int64  
 4   settlement_economy          648 non-null    int64  
 5   rumored_treasure_value      648 non-null    int64  
 6   demographic_breakdown       648 non-null    object 
 7   average_age                 648 non-null    int64  
 8   most_likely_cause_of_death  648 non-null    object 
 9   government_type             648 non-null    object 
 10  class_density               648 non-null    object 
 11  tax_rate                    648 non-null    int64  
 12  exports                     647 non-null    object 
 13  imports                     623 non

In [None]:
import ast
import numpy as np

# Function to safely evaluate literals or return an empty dictionary on error
def safe_literal_eval(text):
    try:
        return ast.literal_eval(text)
    except (ValueError, SyntaxError):
        return {}

# Convert 'demographic_breakdown' column to dictionaries, replacing problematic entries with empty dictionaries
df['demographic_breakdown'] = df['demographic_breakdown'].apply(lambda x: safe_literal_eval(x) if pd.notna(x) else {})

# Convert 'class_density' column to dictionaries
df['class_density'] = df['class_density'].apply(safe_literal_eval)

# Convert 'government_type' to a list and replace the original column
df['government_type'] = df['government_type'].apply(lambda x: [x])

# Split 'religions' by commas and replace the original column
df['religions'] = df['religions'].str.split(', ')

# Split 'languages' by commas and replace the original column
df['languages'] = df['languages'].str.split(', ')


0    36.6
1    38.2
2    44.6
3    49.4
4    43.3
Name: annual_rainfall, dtype: float64

In [None]:
# Mapping dictionary to standardize demographic categories
demographic_mapping = {
    'half-orcs': 'half-orcs',
    'half-orc': 'half-orcs',
    'dwarf': 'dwarves',
    'half-elf': 'half-elves',
    'elf': 'elves',
    'gnoll': 'gnolls',
    'giant': 'giants',
    'tiefling': 'tieflings',
    'goblin': 'goblins',
    'human': 'humans',
    'orc': 'orcs',
}


# Define a function to safely convert strings to dictionaries
def safe_dict_conversion(value):
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return {}

# Apply the safe_dict_conversion function to the 'demographic_breakdown' column
df['demographic_breakdown'] = df['demographic_breakdown'].apply(safe_dict_conversion)

# Filter out empty dictionaries
df['demographic_breakdown'] = df['demographic_breakdown'].apply(lambda d: {} if not d else d)

# Standardize demographic categories using the mapping dictionary
def standardize_demographics(d):
    return {
        demographic_mapping.get(key, key): value for key, value in d.items()
    }

df['demographic_breakdown'] = df['demographic_breakdown'].apply(standardize_demographics)

# Create a counter for demographic categories
from collections import Counter

all_demographics = []
for d in df['demographic_breakdown']:
    all_demographics.extend(d.keys())

demographics_counter = Counter(all_demographics)

# Print the unique keys and their counts
for key, count in demographics_counter.items():
    print(f'{key}: {count}')

In [None]:
# Export DataFrame to Excel
df.to_excel(df_file_path, index=False)

# Export the idf dataset to a CSV file
idf.to_csv('/content/drive/MyDrive/Colab Notebooks/DnD/idf_dataset.csv', index=False)

print(f'DataFrame exported to: {df_file_path}')

In [None]:
    data = {
            "Number of Landowners": np.random.randint(1, 200),
            "Number of Homeowners": np.random.randint(10, 10000),
            "Average Rent": np.random.randint(1, 5000),
            "Likelihood of Getting Cursed": np.random.randint(0, 10),
            "Danger Level": np.random.randint(0, 10),
            "Openness to Strangers": np.random.randint(0, 100),
            "Political Climate": np.random.randint(0, 100),
            "Fate Ranking": np.random.randint(0, 10),
        }


In [None]:
# Adjusting Danger Level
df['Danger Level'] = (
    df['Dragon Sightings']
    + df['Political Climate']
    + df['Magical Climate']
    + df['Likelihood of Getting Cursed']
    + df['Tax Rate']
    + df['Average Debt by Household']
    + df['Rumored Treasure Value']
    + df['Openness to Strangers'].apply(lambda x: 10 if x <= 2.5 or x >= 7.5 else 0)
).astype(int)

# Increase "Danger Level" if Orcs are present
df.loc[df['Percentage of population Orcs'] > 0, 'Danger Level'] += 1

# Decrease "Danger Level" if Elves are present
df.loc[df['Percentage of population Elves'] > 0, 'Danger Level'] -= 1

# Adjusting Danger Level based on the presence of Orcs and Elves
df['Danger Level'] = df.apply(lambda row: np.random.randint(1, 11)
                                          + (3 if row['Percentage of population Orcs'] > 0 else 0)
                                          - (2 if row['Percentage of population Elves'] > 0 else 0), axis=1)

# Adjusting Openness to Strangers
df['Openness to Strangers'] = df.apply(lambda row: np.random.randint(0, 11)
                                                   - (3 if row['Percentage of population Elves'] > 0
                                                       or row['Percentage of population Orcs'] > 0 else 0), axis=1)

# Adjusting Magical Climate
df['Magical Climate'] = df.apply(lambda row: row['Magical Climate']
                                             + (row['Percentage of population Elves'] / 100 * 2)
                                             + (row['Dragon Sightings'] / 10)
                                             + row['Number of Active Wizards'], axis=1)

# Pre-calculation of sum values
grouped = df.groupby('Region')
region_gold_mapping = grouped[['Active Economy (Gold)', 'Hidden Economy (Gold)']].sum(numeric_only=True).sum(axis=1)
df['Total Gold in Region'] = df['Region'].map(region_gold_mapping)

# Main DataFrame Manipulations
df['Dragon Sightings'] = df.apply(calculate_dragon_sightings, axis=1)
df['Dragon Sightings'] = ((df['Dragon Sightings'] - df['Dragon Sightings'].min()) /
                          (df['Dragon Sightings'].max() - df['Dragon Sightings'].min()) * 50).astype(int)

df['Average Age of Population'] = df.apply(lambda row: calculate_average_age(row, species_age_ranges), axis=1)
df['Average Age of Population'] -= df['Danger Level'] * 0.5
df['Average Age of Population'] = df['Average Age of Population'].clip(lower=1)

df['Hidden Economy (Gold)'] = df['Number of Nobility'] * 1e4
df['Openness to Strangers'] = df.apply(calculate_openness_to_strangers, axis=1)

# Adjusting population distribution for specific species across cities
def adjust_population_distribution(df, species, percentage, city_count):
    total_population = df['Population of City'].sum()
    target_population = int(total_population * (percentage / 100))
    selected_cities = df.sample(city_count)
    each_city_population = target_population // city_count
    for i, city in selected_cities.iterrows():
        df.at[i, f'Percentage of population {species}'] = (each_city_population / city['Population of City']) * 100

# Adjusting Economy Distribution
total_active_economy = np.sum(df['Active Economy (Gold)'])
top_10_cities_active_economy = total_active_economy * 0.50
next_100_cities_active_economy = total_active_economy * 0.25
remaining_cities_active_economy = total_active_economy * 0.25

# Known Treasure and Rumored Treasure Calculations
df['Known Treasure'] = df['Active Economy (Gold)'] * 0.50
df['Rumored Treasure'] = (df['Active Economy (Gold)'] * 0.50) + df['Hidden Economy (Gold)']

# Adjusting Political Climate and Fate Ranking
df['Wizarding School'] = np.random.choice(['yes', 'no'], num_cities)
df['Number of Homeowners'] = (df['Population of City'] * np.random.uniform(0.5, 0.7)).astype(int)
df['Average Rent'] = np.random.uniform(500, 2000, num_cities)
df['Average Age of Population'] = np.random.uniform(25, 70, num_cities)
df['Known Treasure Value'] = np.random.uniform(0, 1e6, num_cities)

# Adjust the tax rate based on the number of nobility
df['Tax Rate'] = df.apply(lambda row: 0 if row['Number of Nobility'] == 0 else row['Tax Rate'], axis=1)

# (You need to define 'Average Debt by Household' and 'Rumored Treasure Value' before using them in the next formula)
df['Political Climate'] = (df['Tax Rate'] + df['Number of Homeowners']/df['Population of City'] +
                              df['Average Rent']/2000 + df['Average Debt by Household']/1e4 +
                              df['Average Age of Population']/70 + df['Hidden Economy (Gold)']/1e6 +
                              df['Known Treasure Value']/1e6 + df['Rumored Treasure Value']/1e6) / 8

df['Fate ranking'] = (df['Rumored Treasure Value'] / 1e6) + np.where(df['Wizarding School'] == 'yes', 1, 0) + df['Political Climate']