# Exploring World Historical Battle Database
Access to this database was granted by its creator and curator, [Dr. Shuhei Kitamura of Osaka University](https://osf.io/j357k). It's important we acknowledge his generosity. 

In the interest of looking at applying data science to social studies topics, Canada's involvement in world conflict is an important part of Canadian history. This database allows us to explore battles from throughout human history, and across the world.

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import warnings
import math
import pycountry_convert as pc
from geopy .geocoders import Nominatim

In [None]:
# Read in the data
data = pd.read_excel('data/whbd_v11-2.xlsx')
data = data[data['year'].notna()]
data['year'] = data['year'].map(lambda x: int(x))
data.sort_values('year',inplace=True)
data

In [None]:
# See what the columns contain
data.columns

In [None]:
# Filter to only look at the battles that specify Canada as a combatant. Drop unused columns
candata = data[data['bell']=='Canada'].drop(['uk', 'fr', 'de', 'sp', 'sw', 'tr', 'at', 
                                             'ru', 'nl', 'it', 'pt', 'dk', 'habsburg', 'hre',
                                             'naval', 'river', 'lake', 'air', 'multiple', 'entire',
                                             'ongoing', 'plan', 'nopage', 'unknown'], axis=1)
display(candata)

We can take the data and consider only the wars that list Canada as a participant:

In [None]:
candata = candata[candata['bname'].notna()]

list(candata['bname'].sort_values().unique())

We can plot the battles on a map with their lattitude and longitude coordinates.

You can drag to move around the map, zoom in and out to get more clarity. Hovering over each data point lists the name of the battle, as well as the war in which the battle was fought.

In [None]:
fig = px.scatter_geo(candata, lat='lat', lon='lng', 
               hover_name='bname', 
               hover_data=['year'],
               color= 'war',
               title='Battles participated in by post-Confederation Canada')

fig.update_layout(showlegend=False)
fig.show()

Let's animate through the years so see the wars that Canada has participated in

In [None]:
warnings.filterwarnings("ignore")
years = list(candata['year'].unique())

animate_candata = pd.DataFrame(candata)

def animation_years(row):
    global animate_candata
   
    df = pd.DataFrame(columns=animate_candata.columns)
    index = years.index(row['year'])
    for i in years[index+1:]:
        row[4] = i
        df.loc[len(df.index)] = row
    
    animate_candata = pd.concat([animate_candata,df],ignore_index=True)

for i in range(len(candata.index)):
    animation_years(candata.iloc[i])



In [None]:
def rank_rows(df):
    
    df['sort_rank'] = 0
    curr_rank = 1
    for b in battles:
        temp = df.loc[df['bname'] == b]
        for i,rows in temp.iterrows():
            
            if df['sort_rank'][i] == 0:
                df['sort_rank'][i] = curr_rank
                curr_rank += 1
    return df        
            

    
animate_candata.sort_values(['year'],inplace=True)            
battles = list(animate_candata.sort_values('year')['bname'].unique())
animate_candata = rank_rows(animate_candata)

animate_candata.sort_values('sort_rank',inplace=True)
animate_candata

By clicking on the "play" button on the bottom we'll be able to look at the wars that Canada has found throughout the years

In [None]:
px.scatter_geo(animate_candata, lat='lat', lon='lng', 
               height=800, hover_name='bname', 
               animation_frame= 'year',
               animation_group='war',
               title='Battles participated in by post-Confederation Canada')

Now let's add in the number of casualties of each battle that Canada has fought in. We will change the size of each bubble to match the number of casualties each battle had. Larger bubbles will indicate battles with more casualties.

In [None]:
casualties_can = candata[candata['casualties'].notna()]

px.scatter_geo(casualties_can, lat='lat', lon='lng', 
               height=800, hover_name='bname', 
               hover_data=['war', 'year'],
               size = 'casualties',
               color='casualties',
               title='Casualties of battles participated in by post-Confederation Canada')

We can also create a bar graph that shows the bloodiest wars that Canada has been a part of.

In [None]:
can_bloodiest_war = candata.groupby('war',as_index=False)['casualties'].sum()
can_bloodiest_war = can_bloodiest_war[can_bloodiest_war['casualties'] > 0]
can_bloodiest_war.sort_values('casualties',inplace=True)

In [None]:
top_10_bloddiest_can_wars = can_bloodiest_war[:10:-1]

fig = px.bar(top_10_bloddiest_can_wars,x='war',y='casualties',title="Canada's Bloodiest Wars")

fig.show()

We can also look at which continents Canada has fought the most in.

In [None]:
locator = Nominatim(user_agent='battles')

can_battles_continent = candata[candata['locn'].notna()]

true_locations = []
for i,row in can_battles_continent.iterrows():
   true_locations.append(locator.reverse(str(row['lat']) + "," + str(row['lng']),language='en'))
   #country = address.get('country','')

can_battles_continent['True Location'] = true_locations
can_battles_continent = can_battles_continent[can_battles_continent['True Location'].notna()]
can_battles_continent

In [None]:
def FindContinent(location):
   address = location.raw['address']
   country = address.get('country','')
   change = {'Palestinian Territory': 'Palestine'}
   
   if country in change:
      country = change[country]
      
   country_alpha2 = pc.country_name_to_country_alpha2(country)
   continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
   continent = pc.convert_continent_code_to_continent_name(continent_code)
   return continent

can_battles_continent['continent'] = can_battles_continent['True Location'].map(lambda x: FindContinent(x))
can_battles_continent



In [None]:
continent_grouped = can_battles_continent.groupby('continent')['locn'].count().reset_index(name='Number of Battles Fought')
continent_grouped.sort_values('Number of Battles Fought',ascending=False,inplace=True)
fig = px.bar(continent_grouped,x='continent',y='Number of Battles Fought',title='Number of Battles fought by Canada in different Continents')

fig.show()

But these only consider battles that happened after Confederation, as the sovereign state of Canada didn't exist yet. We can also look at battles that happened in geographic Canada by only considering lattitude and longitude values in that region:

In [None]:
# Southernmost point of Canada is Lake Erie, ON, at 41°40' N; easternmost is Cape Spear, NL, at 52°37' W
# We also need to exclude a single WWII Pacific battle that happened off the coast of Alaska that didn't involve Canada
NA_data = data[(data['lat']>41.6) & 
               (data['lng']<-52.6) & 
               (data['lng']>-160)]

# There's also many wars in this subset that don't feature Canada, so we can list them here to remove:
remove = ['American Revolutionary War',
          'Sioux Wars',
          "Red Cloud's War",
          'Dakota War of 1862',
          'Russo-Tlingit War',
          'Great Sioux War of 1876',
          'Powder River Expedition',
          'American Civil War',
          'Yellowstone Expedition of 1873',
          'Nez Perce War',
          'Comanche Campaign',
          'Boston campaign',
          'Modoc War',
          'American Revolution',
          "King Philip's War",
          'Black Hawk War',
          'Colorado War',
          'American Indian Wars',
          'Forage War',
          "Coeur d'Alene War",
          'Yakima War',
          'Philadelphia campaign',
          'Ghost Dance War']

# Remove wars listed above, as well as battles without a specific war (that all happened in the USA)
NA_data = NA_data[(~NA_data['war'].isin(remove)) & (~NA_data['war'].isnull())]
          
          
          
fig = px.scatter_geo(NA_data, lat='lat', lon='lng', 
               hover_name='bname', 
               color='war',
               hover_data=['year'], 
               fitbounds='locations',
               title='Historical battles fought in present-day Canada')

fig.update_layout(showlegend=False)
fig.show()

In [None]:
list(NA_data['bname'].sort_values().unique())

Similar to before, we can also include casualties into this and have the bubble sizes match the number of casualties

In [None]:
NA_casualties = NA_data[NA_data['casualties'].notna()]

px.scatter_geo(NA_casualties, lat='lat', lon='lng', 
               height=800, hover_name='bname', 
               size = 'casualties',
               size_max = 30,
               color = 'casualties',
               hover_data=['war','year'], 
               fitbounds='locations',
               title='Historical battles fought in present-day Canada')

In [23]:
fig = px.scatter_mapbox(NA_data, lat="lat", lon="lng", hover_name="bname", hover_data=['year'],
                        color_discrete_sequence=["fuchsia"],zoom=2.5)
fig.update_layout(
    mapbox_style="white-bg",
    mapbox_layers=[
        {
            "below": 'traces',
            "sourcetype": "raster",
            "sourceattribution": "United States Geological Survey",
            "source": [
                "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"
            ]
        }
      ])
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Next Steps

Though the data can be kind of spotty for some battles, it could be interesting to bring in the number (or rate) of casualities for each battle, and size the markers proportional to that. It's only an estimate of the importance of the battle, but it's a good first step.

We could also use the plotting function to animate the conflicts throughout Canadian history.

Lastly, we could focus on WWI or WWII data and look at the advance of the Allied gains in Europe, highlighting battles that Canada was a major part of.