In [1]:

%pprint
import sys
sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

from notebook_utils import NotebookUtilities
import humanize
import os.path as osp
import pandas as pd
import re

nu = NotebookUtilities(
    data_folder_path=osp.abspath('../data'),
    saves_folder_path=osp.abspath('../saves')
)


# Build a Dataset of India vs China Wars and Armed Conflicts
This is mostly data from Wikipedia's [List_of_anthropogenic_disasters_by_death_toll](https://en.wikipedia.org/wiki/List_of_anthropogenic_disasters_by_death_toll) from the first table on the page. The dataset build pipeline is at [https://github.com/dbabbitt/StatsByCountry/blob/master/builders/Build%20a%20Dataset%20of%20India%20vs%20China%20Wars%20and%20Armed%20Conflicts.ipynb](https://github.com/dbabbitt/StatsByCountry/blob/master/builders/Build%20a%20Dataset%20of%20India%20vs%20China%20Wars%20and%20Armed%20Conflicts.ipynb). The geometric mean is used in the visualization pipeline at [https://github.com/dbabbitt/StatsByCountry/blob/master/lineplots/Visualize%20a%20Timeline%20of%20India%20vs%20China%20Wars%20and%20Armed%20Conflicts.ipynb](https://github.com/dbabbitt/StatsByCountry/blob/master/lineplots/Visualize%20a%20Timeline%20of%20India%20vs%20China%20Wars%20and%20Armed%20Conflicts.ipynb).

In [3]:

from datetime import datetime

present_year = datetime.now().year
present_year

2023


## Get Anthropogenic Disasters by Death Toll

In [4]:

wikipedia_url = 'https://en.wikipedia.org/wiki/List_of_anthropogenic_disasters_by_death_toll'
tables_df_list = nu.get_page_tables(wikipedia_url, verbose=True)

[(1, (119, 9)), (7, (107, 9)), (9, (73, 9)), (3, (44, 9)), (15, (59, 5)), (17, (30, 9)), (11, (28, 9)), (5, (21, 9)), (13, (18, 9)), (19, (13, 9)), (21, (8, 9)), (22, (7, 2)), (0, (1, 2)), (2, (1, 2)), (4, (1, 2)), (6, (1, 2)), (8, (1, 2)), (10, (1, 2)), (12, (1, 2)), (14, (1, 2)), (16, (1, 2)), (18, (1, 2)), (20, (1, 2))]


In [5]:

anthropogenic_disasters_df = tables_df_list[1].copy()
anthropogenic_disasters_df.columns = anthropogenic_disasters_df.columns.map(lambda x: str(x).split('[')[0].split(',')[0])
anthropogenic_disasters_df = anthropogenic_disasters_df.applymap(lambda x: str(x).split('[')[0])

# Fix the BCEs and Presents
mask_series = anthropogenic_disasters_df.End.map(lambda x: 'Present' in str(x))
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == "Qin's wars of unification")
anthropogenic_disasters_df.loc[mask_series, 'Start'] = -230
anthropogenic_disasters_df.loc[mask_series, 'End'] = -221
mask_series = (anthropogenic_disasters_df.Event == 'Punic Wars')
anthropogenic_disasters_df.loc[mask_series, 'Start'] = -264
anthropogenic_disasters_df.loc[mask_series, 'End'] = -146
mask_series = (anthropogenic_disasters_df.Event == 'Gallic Wars')
anthropogenic_disasters_df.loc[mask_series, 'Start'] = -58
anthropogenic_disasters_df.loc[mask_series, 'End'] = -50
mask_series = (anthropogenic_disasters_df.Event == 'Syrian civil war')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'War on terror')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Colombian conflict')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Somali Civil War')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Boko Haram insurgency')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Yemeni Civil War (2014–present)')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'War in Darfur')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Genocide of Nuba Peoples')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Papua conflict')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Kalinga War')
anthropogenic_disasters_df.loc[mask_series, 'Start'] = -321
anthropogenic_disasters_df.loc[mask_series, 'End'] = -261
mask_series = (anthropogenic_disasters_df.Event == "Lord's Resistance Army insurgency")
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Internal conflict in Myanmar')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Moro conflict')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Arab–Israeli conflict')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Mexican drug war')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Kurdish rebellions in Turkey')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Kivu conflict')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year
mask_series = (anthropogenic_disasters_df.Event == 'Kashmir conflict')
anthropogenic_disasters_df.loc[mask_series, 'End'] = present_year

# Recompute Lowest estimates
mask_series = (anthropogenic_disasters_df.Event == 'Punic Wars') & anthropogenic_disasters_df['Lowest estimate'].isnull()
if anthropogenic_disasters_df[mask_series].shape[0]:
    gme = int(anthropogenic_disasters_df.loc[mask_series, 'Geometric mean estimate'])
    he = int(anthropogenic_disasters_df.loc[mask_series, 'Highest estimate'])
    anthropogenic_disasters_df.loc[mask_series, 'Lowest estimate'] = int(gme*gme/he)

# Compute percentages
mask_series = anthropogenic_disasters_df.Location.map(lambda x: 'China' in str(x))
anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 100.0
anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 0.0
mask_series = anthropogenic_disasters_df.Location.map(lambda x: 'India' in str(x)) & (anthropogenic_disasters_df.Event != 'Napoleonic Wars')
anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 0.0
anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 100.0

# Convert numeric columns
columns_list = ['Lowest estimate', 'Highest estimate', 'Geometric mean estimate', 'Start', 'End']
cleaning_func = lambda x: re.sub(r'[^0-9-]+', '', str(x))
for cn in columns_list:
    anthropogenic_disasters_df[cn] = anthropogenic_disasters_df[cn].map(cleaning_func)
    anthropogenic_disasters_df[cn] = pd.to_numeric(anthropogenic_disasters_df[cn], errors='coerce', downcast='integer')

In [6]:

events_list = [
    'European colonization of the Americas', 'Russian Civil War', "Thirty Years' War", 'Second Congo War', 'French Wars of Religion', "Hundred Years' War",
    'Crusades', 'Nigerian Civil War', 'Mfecane', 'Punic Wars', 'Second Sudanese Civil War', 'Soviet–Afghan War', 'Japanese invasions of Korea',
    'Mexican Revolution', 'Wars of the Three Kingdoms', 'Conquests of Mehmed II', 'Ethiopian Civil War', 'Jewish–Roman wars', 'American Civil War',
    'Algerian War', 'War of the Spanish Succession', 'Spanish Civil War', 'Gallic Wars', 'Spanish American wars of independence',
    'Iran–Iraq War', 'Syrian civil war', 'French invasion of Russia', 'English Civil War', 'Angolan Civil War', 'First Sudanese Civil War', 'Colombian conflict',
    'Albigensian Crusade', 'First Congo War', 'Continuation War', 'Somali Civil War', 'South Sudanese Civil War', 'Crimean War', 'Cuban War of Independence',
    'Iraq War', 'Boko Haram insurgency', 'Great Northern War', 'Italian Wars', 'Tigray War', 'French conquest of Algeria', 'Burundian Civil War', 'Yemeni Civil War (2014–present)',
    'War in Darfur', 'Second Italo-Ethiopian War', 'Paraguayan War', 'Genocide of Nuba Peoples', 'Papua conflict', "Ten Years' War", 'Philippine–American War',
    'Venezuelan War of Independence', 'Ugandan Bush War', "Lord's Resistance Army insurgency", 'Franco-Dutch War', 'War in Iraq (2013–2017)', 'Iraqi–Kurdish conflict',
    'Campaigns of Suleiman the Magnificent', 'Franco-Spanish War (1635–1659)', 'Carlist Wars', 'La Violencia', 'War in Afghanistan (2001–2021)', 'Internal conflict in Myanmar',
    'Winter War', 'Guatemalan Civil War', 'Greek Civil War', '2022 Russian invasion of Ukraine', 'North Yemen Civil War', '1991 Iraqi uprisings', 'Balkan Wars',
    'Anglo-Spanish War (1585–1604)', 'Saint-Domingue Expedition', 'Yugoslav Wars', 'Lebanese Civil War', 'Sierra Leone Civil War', 'Great Turkish War', "Thousand Days' War",
    'Moro conflict', 'Arab–Israeli conflict', 'Mexican drug war', 'Aceh War', 'Bosnian War', "German Peasants' War", 'Kurdish rebellions in Turkey', 'Congo Crisis', 'Insurgency in Laos',
    'Kivu conflict', 'Algerian Civil War', 'Angolan War of Independence', 'Napoleonic Wars', 'Korean War', "Seven Years' War", 'French Revolutionary Wars', "Eighty Years' War",
    'War on terror', 'First Indochina War'
]
for event in events_list:
    mask_series = (anthropogenic_disasters_df.Event == event)
    anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 0.0
    anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 0.0


## Compute WWII Percentages

In [7]:

wikipedia_url = 'https://en.wikipedia.org/wiki/World_War_II_casualties'

# Get the BeautifulSoup object for the Wikipedia page
page_soup = nu.get_page_soup(wikipedia_url, verbose=False)

# Find all the tables on the Wikipedia page
table_soups_list = page_soup.find_all('table', attrs={'class': 'wikitable'})
tables_df_list = nu.get_page_tables(wikipedia_url, verbose=True)

[(0, (62, 9)), (5, (37, 7)), (3, (21, 7)), (2, (17, 7)), (4, (19, 4)), (6, (22, 2)), (1, (5, 8)), (9, (8, 2)), (8, (5, 2)), (7, (4, 2))]


In [8]:

table_soup = table_soups_list[0]
nations_list = []
for td_soup in table_soup.find_all('td', attrs={'style': 'text-align:left'}):
    for b_soup in td_soup.find_all('b'):
        nations_list.append(b_soup.text.strip())
nations_list + ['Other nations', 'Approximate totals']

['Albania', 'Australia', 'Austria', 'Belgium', 'Brazil', 'Bulgaria', 'Burma', 'Canada', 'China', 'Cuba', 'Czechoslovakia', 'Denmark', 'Dutch East Indies', 'Egypt', 'Estonia', 'Ethiopia', 'Finland', 'France', 'French Indochina', 'Germany', 'Greece', 'Guam', 'Hungary', 'Iceland', 'India', 'Iraq', 'Ireland', 'Italy', 'Japan', 'Korea', 'Latvia', 'Lithuania', 'Luxembourg', 'Malaya & Singapore', 'Malta', 'Mexico', 'Mongolia', 'Nauru', 'Nepal', 'Netherlands', 'Newfoundland', 'New Zealand', 'Norway', 'Papua and New Guinea', 'Philippines', 'Poland', 'Portuguese Timor', 'Romania', 'Ruanda-Urundi', 'South Africa', 'South Seas Mandate', 'Soviet Union', 'Spain', 'Sweden', 'Switzerland', 'Thailand', 'Turkey', 'United Kingdom', 'United States', 'Yugoslavia', 'Other nations', 'Approximate totals']

In [9]:

wwii_casualties_df = tables_df_list[0].copy()
wwii_casualties_df.Country = pd.Series(nations_list + ['Other nations', 'Approximate totals'])
wwii_casualties_df = wwii_casualties_df.applymap(lambda x: str(x).split('[')[0].strip().split('(')[0].strip()).set_index('Country')
wwii_casualties_df = wwii_casualties_df.applymap(lambda x: re.split('[^0-9.,]+', str(x), 0)[0].strip())
wwii_casualties_df = wwii_casualties_df.applymap(lambda x: re.sub('[^0-9.]+', '', str(x)))
for cn in wwii_casualties_df.columns: wwii_casualties_df[cn] = pd.to_numeric(wwii_casualties_df[cn], errors='coerce', downcast='integer')
wwii_casualties_df

Unnamed: 0_level_0,Total population 1/1/1939,Military deaths from all causes,Civilian deaths due to military activity and crimes against humanity,Civilian deaths due to war-related famine and disease,Total deaths,Deaths as % of 1939 population,Average Deaths as % of 1939 population,Military wounded
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Albania,1073000,30000.0,,,30000.0,2.80,2.80,
Australia,6968000,39700.0,700.0,,40400.0,0.58,0.58,39803.0
Austria,6653000,,,,,,,
Belgium,8387000,12000.0,76000.0,,88000.0,1.05,1.05,55513.0
Brazil,40289000,1000.0,1000.0,,2000.0,0.00,0.00,4222.0
...,...,...,...,...,...,...,...,...
United Kingdom,47760000,383700.0,67200.0,,450900.0,0.94,0.94,376239.0
United States,131028000,407300.0,12100.0,,419400.0,0.32,0.32,671801.0
Yugoslavia,15490000,300000.0,581000.0,,1027000.0,6.63,8.80,425000.0
Other nations,300000000,,,,,,,


In [10]:

# Compute WWII percentages
mask_series = wwii_casualties_df.index.isin(['China', 'India', 'Approximate totals'])
display(wwii_casualties_df[mask_series])
mask_series = wwii_casualties_df.index.isin(['China'])
china_deaths = wwii_casualties_df[mask_series]['Total deaths'].squeeze()
mask_series = wwii_casualties_df.index.isin(['India'])
india_deaths = wwii_casualties_df[mask_series]['Total deaths'].squeeze()
mask_series = wwii_casualties_df.index.isin(['Approximate totals'])
total_deaths = wwii_casualties_df[mask_series]['Total deaths'].squeeze()
mask_series = (anthropogenic_disasters_df.Event == 'World War II')
anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 100*china_deaths/total_deaths
anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 100*india_deaths/total_deaths

Unnamed: 0_level_0,Total population 1/1/1939,Military deaths from all causes,Civilian deaths due to military activity and crimes against humanity,Civilian deaths due to war-related famine and disease,Total deaths,Deaths as % of 1939 population,Average Deaths as % of 1939 population,Military wounded
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
China,517568000,3000000.0,7357000.0,5000000.0,15000000.0,2.9,3.38,1761335.0
India,377800000,87000.0,,2100000.0,2200000.0,0.58,0.58,64354.0
Approximate totals,2300000000,21000000.0,29000000.0,19000000.0,70000000.0,3.0,3.35,



## Compute Mongol Percentages

In [11]:

# Compute Mongol percentages
mask_series = (anthropogenic_disasters_df.Event == 'Mongol invasions and conquests')

# Estimates vary, but it is thought that around 30% of the deaths
# from the Mongol invasions and conquests happened to what is modern day China.
# This means that out of the 41.4 million deaths, around 12.4 million happened in China.
anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 30.0

# Estimates vary, but it is believed that between 10 and 20 million people
# died in India as a result of the Mongol invasions and conquests.
# This represents between 24% and 48% of the total estimated 41.4 million deaths.
anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 24.0


## Compute WWI Percentages

In [12]:

wikipedia_url = 'https://en.wikipedia.org/wiki/World_War_I_casualties'

# Get the BeautifulSoup object for the Wikipedia page
page_soup = nu.get_page_soup(wikipedia_url, verbose=False)

# Find all the tables on the Wikipedia page
table_soups_list = page_soup.find_all('table', attrs={'class': 'wikitable'})
tables_df_list = nu.get_page_tables(wikipedia_url, verbose=True)

[(0, (34, 9)), (9, (48, 2)), (1, (36, 2)), (5, (14, 2)), (15, (14, 2)), (4, (9, 2)), (17, (8, 2)), (19, (7, 2)), (2, (5, 2)), (7, (5, 2)), (8, (4, 2)), (21, (4, 2)), (11, (3, 2)), (13, (3, 2)), (22, (3, 2)), (24, (3, 2)), (3, (2, 2)), (10, (2, 2)), (16, (2, 2)), (6, (1, 2)), (12, (1, 2)), (14, (1, 2)), (18, (1, 2)), (20, (1, 2)), (23, (1, 2))]


In [13]:

table_soup = table_soups_list[0]
nations_list = []
for i, tbody_soup in enumerate(table_soup.children):
    if (tbody_soup.name == 'tbody'):
        for j, tr_soup in enumerate(tbody_soup.children):
            if (tr_soup.name == 'tr'):
                nation_list = []
                for td_soup in tr_soup.find_all('td', attrs={'align': 'left'}):
                    for a_soup in td_soup.find_all('a', attrs={'title': True}):
                        nation_list.append(a_soup.text.strip())
                    for div_soup in td_soup.find_all('div', attrs={'class':'plainlist'}):
                        for li_soup in div_soup.find_all('li'):
                            nation_list.append(li_soup.text.strip())
                for th_soup in tr_soup.find_all('th', attrs={'colspan': '9'}):
                    a_soups_list = th_soup.find_all('a', attrs={'title': True})
                    if not a_soups_list: nation_list.append(th_soup.text.strip())
                    for a_soup in a_soups_list: nation_list.append(a_soup.text.strip())
                # if nations_list and (nations_list[-1] == 'Total Central Powers'): raise
                if nation_list: nations_list.append(' '.join(nation_list))
nations_list

['Allies and co-belligerents of World War I', 'Australia', 'Canada', 'India', 'New Zealand', 'Newfoundland', 'South Africa', 'United Kingdom', 'Sub-total British Empire', 'Belgium', 'France', 'Greece', 'Italy', 'Japan', 'Montenegro', 'Portugal', 'Romania', 'Russia', 'Serbia', 'United States', 'Total Allied Powers', 'Central Powers', 'Austria-Hungary', 'Bulgaria', 'Germany', 'Ottoman Empire', 'Total Central Powers', 'Neutral nations', 'Denmark', 'Luxembourg', 'Norway', 'Persia', 'Sweden', 'Albania']

In [14]:

wwi_casualties_df = tables_df_list[0].copy()
wwi_casualties_df.columns = [
    'Nation', 'Population (millions)', 'Combat deaths and missing in action (included in total military deaths)', 'Total military deaths (from all causes)',
    'Civilian deaths (military action and crimes against humanity)', 'Increase in civilian deaths (malnutrition and disease excluding Influenza pandemic)',
    'Total deaths', 'Deaths as % of population', 'Military wounded'
]
wwi_casualties_df.Nation = pd.Series(nations_list[1:] + ['Grand total'])
wwi_casualties_df = wwi_casualties_df.set_index('Nation')
wwi_casualties_df = wwi_casualties_df.applymap(lambda x: str(x).split('[')[0].strip().split('(')[0].strip())
wwi_casualties_df = wwi_casualties_df.applymap(lambda x: re.split('[^0-9.,]+', str(x), 0)[0].strip())
wwi_casualties_df = wwi_casualties_df.applymap(lambda x: re.sub('[^0-9.]+', '', str(x)))
for cn in wwi_casualties_df.columns: wwi_casualties_df[cn] = pd.to_numeric(wwi_casualties_df[cn], errors='coerce', downcast='integer')
wwi_casualties_df

Unnamed: 0_level_0,Population (millions),Combat deaths and missing in action (included in total military deaths),Total military deaths (from all causes),Civilian deaths (military action and crimes against humanity),Increase in civilian deaths (malnutrition and disease excluding Influenza pandemic),Total deaths,Deaths as % of population,Military wounded
Nation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Australia,5.0,61527.0,59330.0,,,59330.0,1.2,152171.0
Canada,7.2,56638.0,56639.0,1963.0,,58639.0,0.8,149732.0
India,315.1,64449.0,64449.0,,,64449.0,0.0,69214.0
New Zealand,1.1,18166.0,16711.0,,,16711.0,1.5,41317.0
Newfoundland,0.2,1204.0,1204.0,,,1204.0,0.6,2314.0
South Africa,6.0,7121.0,7121.0,,,7121.0,0.1,12029.0
United Kingdom,45.4,744000.0,887858.0,16829.0,107000.0,867829.0,1.9,1675000.0
Sub-total British Empire,380.0,953104.0,949454.0,18829.0,107000.0,1077283.0,0.3,2101077.0
Belgium,7.4,38170.0,38170.0,23700.0,62000.0,123870.0,1.7,44686.0
France,39.6,1150000.0,1357000.0,40000.0,300000.0,1697000.0,4.3,4266000.0


In [15]:

# Compute WWI percentages
mask_series = wwi_casualties_df.index.isin(['China', 'India', 'Grand total'])
display(wwi_casualties_df[mask_series])

mask_series = wwi_casualties_df.index.isin(['China'])
df = wwi_casualties_df[mask_series]
if (df.shape[0] == 1): china_deaths = df['Total deaths'].squeeze()
else: china_deaths = 0
    
mask_series = wwi_casualties_df.index.isin(['India'])
india_deaths = wwi_casualties_df[mask_series]['Total deaths'].squeeze()

mask_series = wwi_casualties_df.index.isin(['Grand total'])
total_deaths = wwi_casualties_df[mask_series]['Total deaths'].squeeze()

mask_series = (anthropogenic_disasters_df.Event == 'World War I')
anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 100*china_deaths/total_deaths
anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 100*india_deaths/total_deaths

Unnamed: 0_level_0,Population (millions),Combat deaths and missing in action (included in total military deaths),Total military deaths (from all causes),Civilian deaths (military action and crimes against humanity),Increase in civilian deaths (malnutrition and disease excluding Influenza pandemic),Total deaths,Deaths as % of population,Military wounded
Nation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
India,315.1,64449.0,64449.0,,,64449.0,0.0,69214.0
Grand total,959.7,8042189.0,8573054.0,2250099.0,5411000.0,15000000.0,1.7,22101100.0



## Compute Vietnam War Percentages

In [16]:

# https://en.wikipedia.org/wiki/Vietnam_War_casualties
china_deaths = 1_446
india_deaths = 0
total_deaths = 2_450_000

mask_series = (anthropogenic_disasters_df.Event == 'Vietnam War')
anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 100*china_deaths/total_deaths
anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 100*india_deaths/total_deaths


## Compute Sri Lankan Civil War Percentages

In [17]:

mask_series = (anthropogenic_disasters_df.Event == 'Sri Lankan Civil War')
anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 0.0
anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 100.0


## Compute Bangladesh Liberation War Percentages

In [18]:

mask_series = (anthropogenic_disasters_df.Event == 'Bangladesh Liberation War')
anthropogenic_disasters_df.loc[mask_series, 'percent_china'] = 0.0
anthropogenic_disasters_df.loc[mask_series, 'percent_india'] = 100.0


----

In [19]:

mask_series = anthropogenic_disasters_df.percent_china.isnull() | anthropogenic_disasters_df.percent_india.isnull()
anthropogenic_disasters_df[mask_series]

Unnamed: 0,Event,Lowest estimate,Highest estimate,Geometric mean estimate,Location,Start,End,Duration,Notes,percent_china,percent_india


In [20]:

mask_series = ~anthropogenic_disasters_df.percent_china.isnull()
f = lambda srs: srs.percent_china*srs['Geometric mean estimate']/100
total_casualties = anthropogenic_disasters_df[mask_series].apply(f, axis='columns').sum()
print(f'{total_casualties:,} ({humanize.intword(total_casualties)})')
mask_series = (anthropogenic_disasters_df.percent_china > 0.0)
china_casualties_df = anthropogenic_disasters_df[mask_series]
display(china_casualties_df)

140,788,168.59147674 (140.8 million)


Unnamed: 0,Event,Lowest estimate,Highest estimate,Geometric mean estimate,Location,Start,End,Duration,Notes,percent_china,percent_india
0,World War II,35000000,118357000,70000000,Worldwide,1939,1945,6 years and 1 day,See also: World War II casualties.,21.428571,3.142857
1,Mongol invasions and conquests,30000000,57000000,41352146,Eurasia,1206,1405,199 years,"See also: Mongol Empire, Destruction under the...",30.0,24.0
2,Taiping Rebellion,20000000,30000000,28284271,China,1850,1864,14 years,"A civil war in China. See also: Qing dynasty, ...",100.0,0.0
4,Transition from Ming to Qing,25000000,25000000,25000000,China,1618,1683,65 years,See also: Qing dynasty,100.0,0.0
5,Second Sino-Japanese War,18000000,22000000,19899748,China,1937,1945,"8 years, 1 month, 3 weeks and 5 days",,100.0,0.0
7,An Lushan Rebellion,13000000,13000000,13000000,China,755,763,8 years,A civil war in Tang China. Also known as the A...,100.0,0.0
8,Dungan Revolt,10000000,10000000,10000000,China,1862,1877,15 years,Civil war in China. See also: Qing dynasty,100.0,0.0
9,Chinese Civil War,8000000,11692000,9671401,China,1927,1949,14 years,Major civil war in China that led to the found...,100.0,0.0
14,Yellow Turban Rebellion,3000000,7000000,4582576,China (Han dynasty),184,205,22 years,Part of the Three Kingdoms War,100.0,0.0
19,Qin's wars of unification,2000000,2000000,2000000,China,-230,-221,9 years,See also: History of China,100.0,0.0


In [21]:

mask_series = ~anthropogenic_disasters_df.percent_india.isnull()
f = lambda srs: srs.percent_india*srs['Geometric mean estimate']/100
total_casualties = anthropogenic_disasters_df[mask_series].apply(f, axis='columns').sum()
print(f'{total_casualties:,} ({humanize.intword(total_casualties)})')
mask_series = (anthropogenic_disasters_df.percent_india > 0.0)
anthropogenic_disasters_df[mask_series]

17,886,276.04 (17.9 million)


Unnamed: 0,Event,Lowest estimate,Highest estimate,Geometric mean estimate,Location,Start,End,Duration,Notes,percent_china,percent_india
0,World War II,35000000,118357000,70000000,Worldwide,1939,1945,6 years and 1 day,See also: World War II casualties.,21.428571,3.142857
1,Mongol invasions and conquests,30000000,57000000,41352146,Eurasia,1206,1405,199 years,"See also: Mongol Empire, Destruction under the...",30.0,24.0
6,World War I,15000000,32500000,20000000,Worldwide,1914,1918,"4 years, 3 months, 1 week",Military conflict lasting from 1914 to 1918 be...,0.0,0.42966
12,Mughal–Maratha Wars,600000,5600000,3200000,India,1680,1707,27 years,,0.0,100.0
37,Indian Rebellion of 1857,806000,806000,806000,India,1857,1858,1 year,,0.0,100.0
38,Bangladesh Liberation War,200000,3000000,774597,Bangladesh,1971,1971,1 year,See also: 1971 Bangladesh genocide,0.0,100.0
55,Maratha invasions of Bengal,400000,400000,400000,India,1741,1751,10 years,,0.0,100.0
76,Kalinga War,220000,250000,234521,India,-321,-261,60 years,,0.0,100.0
114,Kashmir conflict,80000,110000,93808,"North India, Pakistan",1947,2023,74 years,,0.0,100.0
117,Sri Lankan Civil War,80000,100000,89443,Sri Lanka,1983,2009,26 years,,0.0,100.0


In [22]:

display(anthropogenic_disasters_df.head(20))

Unnamed: 0,Event,Lowest estimate,Highest estimate,Geometric mean estimate,Location,Start,End,Duration,Notes,percent_china,percent_india
0,World War II,35000000,118357000,70000000,Worldwide,1939,1945,6 years and 1 day,See also: World War II casualties.,21.428571,3.142857
1,Mongol invasions and conquests,30000000,57000000,41352146,Eurasia,1206,1405,199 years,"See also: Mongol Empire, Destruction under the...",30.0,24.0
2,Taiping Rebellion,20000000,30000000,28284271,China,1850,1864,14 years,"A civil war in China. See also: Qing dynasty, ...",100.0,0.0
3,European colonization of the Americas,8400000,80000000,25922963,Americas,1492,1691,199 years,Death toll estimates vary due to lack of conse...,0.0,0.0
4,Transition from Ming to Qing,25000000,25000000,25000000,China,1618,1683,65 years,See also: Qing dynasty,100.0,0.0
5,Second Sino-Japanese War,18000000,22000000,19899748,China,1937,1945,"8 years, 1 month, 3 weeks and 5 days",,100.0,0.0
6,World War I,15000000,32500000,20000000,Worldwide,1914,1918,"4 years, 3 months, 1 week",Military conflict lasting from 1914 to 1918 be...,0.0,0.42966
7,An Lushan Rebellion,13000000,13000000,13000000,China,755,763,8 years,A civil war in Tang China. Also known as the A...,100.0,0.0
8,Dungan Revolt,10000000,10000000,10000000,China,1862,1877,15 years,Civil war in China. See also: Qing dynasty,100.0,0.0
9,Chinese Civil War,8000000,11692000,9671401,China,1927,1949,14 years,Major civil war in China that led to the found...,100.0,0.0


In [23]:

nu.store_objects(**{pickle_name: eval(pickle_name) for pickle_name in dir() if pickle_name.endswith('_df')})

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\anthropogenic_disasters_df.pkl
Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\china_casualties_df.pkl
Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\wwi_casualties_df.pkl
Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\wwii_casualties_df.pkl
