In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
# Set a seed for reproducibility
np.random.seed(0)

# Define the number of samples
n_samples = 10

# Some realistic names of battles/conflicts during the Napoleonic Wars
conflict_names = [
    'Battle of Austerlitz',
    'Battle of Jena–Auerstedt',
    'Battle of Borodino',
    'Battle of Leipzig',
    'Battle of Waterloo',
    'Siege of Toulon',
    'Battle of the Pyramids',
    'Battle of Marengo',
    'Battle of Ulm',
    'Battle of Trafalgar'
]

# Assuming these are the primary combatants in different conflicts
combatants = {
    'France': ['France', 'French Empire', 'Napoleon'],
    'Coalition': ['Russia', 'Austria', 'Prussia', 'United Kingdom', 'Spain', 'Portugal', 'Sweden', 'Ottoman Empire']
}

# Create a DataFrame
df_napoleonic_wars = pd.DataFrame({
    'Conflict_ID': range(1, n_samples + 1),
    'Conflict_Name': np.random.choice(conflict_names, size=n_samples, replace=False),
    'Year': np.random.choice(range(1803, 1815), size=n_samples, replace=True), # Napoleonic Wars period
    'Duration_Days': np.random.randint(1, 10, size=n_samples) * 10, # Duration in days
    'Combatant1': 'France',
    'Combatant2': np.random.choice(combatants['Coalition'], size=n_samples),
    'Theater': np.random.choice(['Europe', 'Middle East', 'North Africa', 'Atlantic Ocean'], size=n_samples),
    'Casualties': np.random.randint(1000, 70000, size=n_samples), # A range of possible casualties
    'Resolution': np.random.choice(['French Victory', 'Coalition Victory', 'Stalemate'], size=n_samples),
})

print(df_napoleonic_wars)


   Conflict_ID             Conflict_Name  Year  Duration_Days Combatant1  \
0            1        Battle of Borodino  1807             90     France   
1            2             Battle of Ulm  1810             20     France   
2            3        Battle of Waterloo  1809             60     France   
3            4       Battle of Trafalgar  1811             90     France   
4            5  Battle of Jena–Auerstedt  1811             50     France   
5            6    Battle of the Pyramids  1813             40     France   
6            7         Battle of Marengo  1804             10     France   
7            8         Battle of Leipzig  1809             40     France   
8            9      Battle of Austerlitz  1810             60     France   
9           10           Siege of Toulon  1810             10     France   

       Combatant2         Theater  Casualties         Resolution  
0         Prussia          Europe       12723     French Victory  
1  United Kingdom     Middle 

In [7]:
# EDA

# Data Type

#df_napoleonic_wars.info()
#RangeIndex: 10 entries, 0 to 9
#Data columns (total 9 columns):
# #   Column         Non-Null Count  Dtype 
#---  ------         --------------  ----- 
# 0   Conflict_ID    nominal     int64 
# 1   Conflict_Name  categorical     object
# 2   Year           discrete     int32 
# 3   Duration_Days  discrete     int32 
# 4   Combatant1     categorical     object
# 5   Combatant2     categorical     object
# 6   Theater        categorical     object
# 7   Casualties     continuous     int32 
# 8   Resolution     categorical     object
#dtypes: int32(3), int64(1), object(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Conflict_ID    10 non-null     int64 
 1   Conflict_Name  10 non-null     object
 2   Year           10 non-null     int32 
 3   Duration_Days  10 non-null     int32 
 4   Combatant1     10 non-null     object
 5   Combatant2     10 non-null     object
 6   Theater        10 non-null     object
 7   Casualties     10 non-null     int32 
 8   Resolution     10 non-null     object
dtypes: int32(3), int64(1), object(5)
memory usage: 728.0+ bytes


In [9]:
# Estimates of location

# E.L. of casualties
casualties_series = df_napoleonic_wars['Casualties']

# mean
mean_casualties = np.mean(casualties_series)
print('mean Casualeties: ',mean_casualties) # mean

# trimmed mean
trimmed_mean_casualties = stats.trim_mean(casualties_series, 0.1)
print('trimmed mean Casualties: ',trimmed_mean_casualties) # trimmed mean

# median
median_casualties = np.median(casualties_series)
print('median Casualties: ',median_casualties) 

# trimmed median
trimmed_median_casualties = stats.trim_median(casualties_series,0.1)
# a robust measurement is merely affected by the outliers





mean Casualeties:  37789.5
trimmed mean Casualties:  38827.75
median Casualties:  45667.0
