In [1]:
from warnings import filterwarnings

filterwarnings('ignore')

import pandas as pd
import numpy as np
from copy import deepcopy
from traceback import format_exc
import the_networks_of_war_python_functions

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [3]:
c_code_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/COW country codes.csv', encoding = 'utf8')

c_code_df.rename({'CCode': 'c_code',
                  'StateNme': 'state_name'}, axis = 1, inplace = True)

c_code_df.drop(['StateAbb'], axis = 1, inplace = True)

duplicate_list = ['c_code', 'state_name']
c_code_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
c_code_df = deepcopy(c_code_df.reset_index(drop = True))

c_code_dic = {}
for i, c_code in enumerate(c_code_df['c_code']):
    c_code_dic[c_code] = c_code_df.loc[i, 'state_name']
    
print(str(len(c_code_dic.keys())) + " total countries")

217 total countries


# Participant DataFrames

## Inter-State Wars

In [4]:
participant_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Inter-StateWarData_v4.0.csv', encoding = 'latin-1')

print("Inter-StateWarData_v4.0 columns: \n")
print(sorted(list(participant_df.columns)))

participant_df.rename({'WarNum': 'war_num',
                       'WarName': 'war_name',
                       'WarType': 'war_type',
                       'ccode': 'c_code',
                       'StateName': 'state_name',
                       'Side': 'side',
                       'BatDeath': 'battle_deaths',
                       'StartYear1': 'start_year',
                       'StartMonth1': 'start_month',
                       'StartDay1': 'start_day',
                       'EndYear1': 'end_year',
                       'EndMonth1': 'end_month',
                       'EndDay1': 'end_day'}, axis = 1, inplace = True)

participant_df = deepcopy(the_networks_of_war_python_functions.participant_start_and_end_dates(participant_df))

Inter-StateWarData_v4.0 columns: 

['BatDeath', 'EndDay1', 'EndDay2', 'EndMonth1', 'EndMonth2', 'EndYear1', 'EndYear2', 'Initiator', 'Outcome', 'Side', 'StartDay1', 'StartDay2', 'StartMonth1', 'StartMonth2', 'StartYear1', 'StartYear2', 'StateName', 'TransFrom', 'TransTo', 'Version', 'WarName', 'WarNum', 'WarType', 'WhereFought', 'ccode']

total participants with both dates found 337
total participants with at least one date not found 0


In [5]:
# ## figuring out how much before adding missing values in next cell
# missing_values_length = deepcopy(len(participant_df))

# # ## some values in dyad_df for a given war aren't in the participant_df
# # ## these need to be added manually

# # participant_df[participant_df['war_num']==184]
# # dyad_df_test = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/directed_dyadic_war.csv', encoding = 'latin-1')
# # dyad_df_test = dyad_df_test[(dyad_df_test['warnum']==184) & ((dyad_df_test['statea']==350) | (dyad_df_test['stateb']==350))]

# # mid_test = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/dyadic MIDs 3.1.csv', encoding = 'latin-1')
# # mid_test = deepcopy(mid_test[(mid_test['disno']==1293) & ((mid_test['statea']==350)|(mid_test['stateb']==350))])


# ## manually filling in values that are found in dyadic cow datasets but seem to be missing from country level sources.
# ## values have been obtained from dyadic data (directed_dyadic_war.csv' and dyadic MIDs 3.1.csv)

# # df_length = deepcopy(len(dyad_df))

# # dyad_df.loc[df_length, 'war_num'] = 163
# # dyad_df.loc[df_length, 'c_code_a'] = 811
# # dyad_df.loc[df_length, 'state_name_a'] = c_code_dic[811]
# # dyad_df.loc[df_length, 'c_code_b'] = c_code_dic[816]
# # dyad_df.loc[df_length, 'state_name_b'] = 816

# # df_length = deepcopy(len(dyad_df))

# # dyad_df.loc[df_length, 'war_num'] = 163
# # dyad_df.loc[df_length, 'c_code_a'] = 811
# # dyad_df.loc[df_length, 'state_name_a'] = c_code_dic[811]
# # dyad_df.loc[df_length, 'c_code_b'] = c_code_dic[710]
# # dyad_df.loc[df_length, 'c_code_b'] = 710

# df_length = deepcopy(len(participant_df))

# participant_df.loc[df_length, 'war_num'] = 108
# participant_df.loc[df_length, 'war_name'] = 'Latvian Liberation'
# participant_df.loc[df_length, 'war_type'] = 1
# participant_df.loc[df_length, 'c_code'] = 200
# participant_df.loc[df_length, 'state_name'] = c_code_dic[200]
# participant_df.loc[df_length, 'side'] = 1
# participant_df.loc[df_length, 'battle_deaths'] = 128
# participant_df.loc[df_length, 'start_day'] = 12
# participant_df.loc[df_length, 'start_month'] = 12
# participant_df.loc[df_length, 'start_year'] = 1918
# participant_df.loc[df_length, 'end_day'] = 1
# participant_df.loc[df_length, 'end_month'] = 2
# participant_df.loc[df_length, 'end_year'] = 1920

# participant_df.loc[df_length, 'end_date'] = pd.to_datetime(participant_df.loc[df_length, 'end_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_day'].astype(int).astype(str))
# participant_df.loc[df_length, 'start_date'] = pd.to_datetime(participant_df.loc[df_length, 'start_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_day'].astype(int).astype(str))

# df_length = deepcopy(len(participant_df))

# participant_df.loc[df_length, 'war_num'] = 108
# participant_df.loc[df_length, 'war_name'] = 'Latvian Liberation'
# participant_df.loc[df_length, 'war_type'] = 1
# participant_df.loc[df_length, 'c_code'] = 290
# participant_df.loc[df_length, 'state_name'] = c_code_dic[290]
# participant_df.loc[df_length, 'side'] = 1
# participant_df.loc[df_length, 'battle_deaths'] = 100
# participant_df.loc[df_length, 'start_day'] = 3
# participant_df.loc[df_length, 'start_month'] = 1
# participant_df.loc[df_length, 'start_year'] = 1920
# participant_df.loc[df_length, 'end_day'] = 1
# participant_df.loc[df_length, 'end_month'] = 2
# participant_df.loc[df_length, 'end_year'] = 1920

# participant_df.loc[df_length, 'end_date'] = pd.to_datetime(participant_df.loc[df_length, 'end_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_day'].astype(int).astype(str))
# participant_df.loc[df_length, 'start_date'] = pd.to_datetime(participant_df.loc[df_length, 'start_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_day'].astype(int).astype(str))

# df_length = deepcopy(len(participant_df))

# participant_df.loc[df_length, 'war_num'] = 108
# participant_df.loc[df_length, 'war_name'] = 'Latvian Liberation'
# participant_df.loc[df_length, 'war_type'] = 1
# participant_df.loc[df_length, 'c_code'] = 220
# participant_df.loc[df_length, 'state_name'] = c_code_dic[220]
# ## need to figure out how to determine this one
# participant_df.loc[df_length, 'side'] = 1
# participant_df.loc[df_length, 'battle_deaths'] = 0
# participant_df.loc[df_length, 'start_day'] = 27
# participant_df.loc[df_length, 'start_month'] = 3
# participant_df.loc[df_length, 'start_year'] = 1919
# participant_df.loc[df_length, 'end_day'] = 1
# participant_df.loc[df_length, 'end_month'] = 2
# participant_df.loc[df_length, 'end_year'] = 1920

# participant_df.loc[df_length, 'end_date'] = pd.to_datetime(participant_df.loc[df_length, 'end_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_day'].astype(int).astype(str))
# participant_df.loc[df_length, 'start_date'] = pd.to_datetime(participant_df.loc[df_length, 'start_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_day'].astype(int).astype(str))

# df_length = deepcopy(len(participant_df))

# participant_df.loc[df_length, 'war_num'] = 139
# participant_df.loc[df_length, 'war_name'] = 'World War II'
# participant_df.loc[df_length, 'war_type'] = 1
# participant_df.loc[df_length, 'c_code'] = 230
# participant_df.loc[df_length, 'state_name'] = c_code_dic[230]
# participant_df.loc[df_length, 'side'] = 2
# participant_df.loc[df_length, 'battle_deaths'] = 950
# participant_df.loc[df_length, 'start_day'] = 26
# participant_df.loc[df_length, 'start_month'] = 6
# participant_df.loc[df_length, 'start_year'] = 1941
# participant_df.loc[df_length, 'end_day'] = 20
# participant_df.loc[df_length, 'end_month'] = 2
# participant_df.loc[df_length, 'end_year'] = 1944

# participant_df.loc[df_length, 'end_date'] = pd.to_datetime(participant_df.loc[df_length, 'end_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_day'].astype(int).astype(str))
# participant_df.loc[df_length, 'start_date'] = pd.to_datetime(participant_df.loc[df_length, 'start_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_day'].astype(int).astype(str))

# df_length = deepcopy(len(participant_df))

# participant_df.loc[df_length, 'war_num'] = 151
# participant_df.loc[df_length, 'war_name'] = 'Korean'
# participant_df.loc[df_length, 'war_type'] = 1
# participant_df.loc[df_length, 'c_code'] = 920
# participant_df.loc[df_length, 'state_name'] = c_code_dic[920]
# participant_df.loc[df_length, 'side'] = 1
# participant_df.loc[df_length, 'battle_deaths'] = 23
# participant_df.loc[df_length, 'start_day'] = 29
# participant_df.loc[df_length, 'start_month'] = 6
# participant_df.loc[df_length, 'start_year'] = 1950
# participant_df.loc[df_length, 'end_day'] = 27
# participant_df.loc[df_length, 'end_month'] = 7
# participant_df.loc[df_length, 'end_year'] = 1953

# participant_df.loc[df_length, 'end_date'] = pd.to_datetime(participant_df.loc[df_length, 'end_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_day'].astype(int).astype(str))
# participant_df.loc[df_length, 'start_date'] = pd.to_datetime(participant_df.loc[df_length, 'start_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_day'].astype(int).astype(str))

# df_length = deepcopy(len(participant_df))

# participant_df.loc[df_length, 'war_num'] = 169
# participant_df.loc[df_length, 'war_name'] = 'Six Day War'
# participant_df.loc[df_length, 'war_type'] = 1
# participant_df.loc[df_length, 'c_code'] = 645
# participant_df.loc[df_length, 'state_name'] = c_code_dic[645]
# participant_df.loc[df_length, 'side'] = 2
# participant_df.loc[df_length, 'battle_deaths'] = 30
# participant_df.loc[df_length, 'start_day'] = 17
# participant_df.loc[df_length, 'start_month'] = 5
# participant_df.loc[df_length, 'start_year'] = 1967
# participant_df.loc[df_length, 'end_day'] = 10
# participant_df.loc[df_length, 'end_month'] = 6
# participant_df.loc[df_length, 'end_year'] = 1967

# participant_df.loc[df_length, 'end_date'] = pd.to_datetime(participant_df.loc[df_length, 'end_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_day'].astype(int).astype(str))
# participant_df.loc[df_length, 'start_date'] = pd.to_datetime(participant_df.loc[df_length, 'start_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_day'].astype(int).astype(str))

# df_length = deepcopy(len(participant_df))

# participant_df.loc[df_length, 'war_num'] = 184
# participant_df.loc[df_length, 'war_name'] = 'Turco-Cypriot'
# participant_df.loc[df_length, 'war_type'] = 1
# participant_df.loc[df_length, 'c_code'] = 350
# participant_df.loc[df_length, 'state_name'] = c_code_dic[350]
# participant_df.loc[df_length, 'side'] = 2
# ## value is 2, documented as 26-100 deaths
# participant_df.loc[df_length, 'battle_deaths'] = 100
# participant_df.loc[df_length, 'start_day'] = 2
# participant_df.loc[df_length, 'start_month'] = 7
# participant_df.loc[df_length, 'start_year'] = 1974
# participant_df.loc[df_length, 'end_day'] = 16
# participant_df.loc[df_length, 'end_month'] = 8
# participant_df.loc[df_length, 'end_year'] = 1974

# # participant_df.loc[df_length, 'end_date'] = pd.to_datetime(participant_df.loc[df_length, 'end_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'end_day'].astype(int).astype(str))
# # participant_df.loc[df_length, 'start_date'] = pd.to_datetime(participant_df.loc[df_length, 'start_year'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_month'].astype(int).astype(str) + "-" + participant_df.loc[df_length, 'start_day'].astype(int).astype(str))

# print(str(len(participant_df)) + " total values")
# print("{} values before".format(missing_values_length))

In [6]:
## accounting for all cases where countries have more than one side
aggregations = {
    ## they will become side 3
    'side': 'sum',
    'battle_deaths': 'sum',
    'start_date': 'min',
    'start_year': 'max',
    'end_date': 'max',
    'end_year': 'max',
    'ongoing_participation': 'max'
    }

participant_df = deepcopy(participant_df.groupby(['war_num', 'war_name', 'war_type', 'c_code', 'state_name']).agg(aggregations).reset_index())

participant_df['days_at_war'] = participant_df['end_date'] - participant_df['start_date']

for i, country_code in enumerate(participant_df['c_code']):
    participant_df.loc[i, 'days_at_war'] = int(str(participant_df.loc[i, 'days_at_war']).split(' ')[0])

participant_df = deepcopy(participant_df[['war_num',
                                          'war_name',
                                          'war_type',
                                          'c_code',
                                          'state_name',
                                          'side',
                                          'battle_deaths',
                                          'start_date',
                                          'start_year',
                                          'end_date',
                                          'end_year',
                                          'days_at_war',
                                          'ongoing_participation']])

In [7]:
# participant_df.tail()

## Intra-State Wars

In [8]:
## creating new dataframe to union to interstate wars participant_df
## note: the code below is very intefficient because something is off with integer formatting in this file.
## this did not occur in the interstate war file.
participant_df_2 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/INTRA-STATE_State_participants v5.1.csv', encoding = 'latin-1')

print("INTRA-STATE_State_participants v5.1 columns: \n")
print(sorted(list(participant_df_2.columns)))

## either one of these a or b may not actually be states.
## this wil be fixed later on
participant_df_2.rename({'WarNum': 'war_num',
                         'WarName': 'war_name',
                         'WarType': 'war_type',
                         'StartDy1': 'start_day',
                         'StartMo1': 'start_month',
                         'StartYr1': 'start_year',
                         'EndDy1': 'end_day',
                         'EndMo1': 'end_month',
                         'EndYr1': 'end_year',
                         'CcodeA': 'c_code_a',
                         'SideA': 'state_name_a',
                         'CcodeB': 'c_code_b',
                         'SideB': 'state_name_b',
                         ## unsure if these are the same as battle deaths, or include civilians
                         'Deaths A': 'battle_deaths_a',
                         'Deaths B': 'battle_deaths_b',
                         ## according to documentation, this includes both sides
                         'TotalBDeaths': 'total_deaths_both_sides',
#                          'WDuratDays': 'total_days_in_war',
                         'SideAPeakTotForces': 'peak_forces_available_a',
                         'SideBPeakTotForces': 'peak_forces_available_b',
                         'SideAPeak TheatForces': 'peak_battle_forces_a',
                         'SideBPeakTheatForces': 'peak_battle_forces_b',
                         'TransFrom': 'lagging_war',
                         'TransTo': 'leading_war'}, axis = 1, inplace = True)

## whoever is originally marked as side a is getting labelled as 1.
## whoever is originally marked as side b is getting labelled as 2.
participant_df_2['side_a'] = 1
participant_df_2['side_b'] = 2

## fixing for leap year issue
participant_df_2.loc[(participant_df_2['start_day'] == 29) & (participant_df_2['start_month'] == 2) & (participant_df_2['start_year'] == 1894), 'start_day'] = 28
## adjusting for wrong start year
## this needs to be automated (check for  "of ___" in war_name where start_year <> ___)
participant_df_2.loc[participant_df_2['war_num']==976, 'start_year'] = '2011'

participant_df_2 = deepcopy(the_networks_of_war_python_functions.participant_start_and_end_dates(participant_df_2))

INTRA-STATE_State_participants v5.1 columns: 

['CcodeA', 'CcodeB', 'Deaths A', 'Deaths B', 'EndDy1', 'EndDy2', 'EndDy3', 'EndDy4', 'EndMo1', 'EndMo2', 'EndMo3', 'EndMo4', 'EndYr1', 'EndYr2', 'EndYr3', 'EndYr4', 'Initiator', 'Intnl', 'Outcome', 'SideA', 'SideAPeak TheatForces', 'SideAPeakTotForces', 'SideB', 'SideBPeakTheatForces', 'SideBPeakTotForces', 'StartDy1', 'StartDy2', 'StartDy3', 'StartDy4', 'StartMo1', 'StartMo2', 'StartMo3', 'StartMo4', 'StartYr1', 'StartYr2', 'StartYr3', 'StartYr4', 'TotalBDeaths', 'TransFrom', 'TransTo', 'V5Region', 'Version', 'WDuratDays', 'WDuratMo', 'WarName', 'WarNum', 'WarType']

total participants with both dates found 452
total participants with at least one date not found 141


In [9]:
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b',
                         'state_name_a',
                         'state_name_b',
                         'side_a',
                         'side_b',
                         'battle_deaths_a',
                         'battle_deaths_b',
                         'peak_forces_available_a',
                         'peak_forces_available_b',
                         'peak_battle_forces_a',
                         'peak_battle_forces_b']
participant_df_2 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(participant_df_2, switched_columns_list))

## making a copy before duplicates a taken out.
## this will be used below for dyadic data (since no dyadic files are available for intra-state wars)
dyad_df_2 = deepcopy(participant_df_2[['war_num', 'c_code_a', 'state_name_a', 'c_code_b', 'state_name_b', 'start_year']])
## this will be adjusted again later
dyad_df_2.rename({'start_year': 'year'}, axis = 1, inplace = True)

# keeping one state (or non-state) per war after duplicate removal
duplicate_list = ['war_num', 'c_code_a', 'state_name_a']
participant_df_2.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
participant_df_2 = deepcopy(participant_df_2.reset_index(drop = True))
participant_df_2 = deepcopy(the_networks_of_war_python_functions.drop_participant_b_columns(participant_df_2, switched_columns_list))

## Extra State Wars

In [10]:
## creating new dataframe to union to extra-state wars participant_df
## inefficient pipeline from above was used to accomodate integer formatting
## unsure if that problem occurs for this one too though
participant_df_3 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Extra-StateWarData_v4.0.csv', encoding = 'latin-1')

print("Extra-StateWarData_v4.0.csv columns: \n")
print(sorted(list(participant_df_3.columns)))

## either one of these a or b may not actually be states.
## this wil be fixed later on
participant_df_3.rename({'WarNum': 'war_num',
                         'WarName': 'war_name',
                         'WarType': 'war_type',
                         'StartDay1': 'start_day',
                         'StartMonth1': 'start_month',
                         'StartYear1': 'start_year',
                         'EndDay1': 'end_day',
                         'EndMonth1': 'end_month',
                         'EndYear1': 'end_year',
                         'ccode1': 'c_code_a',
                         'SideA': 'state_name_a',
                         'ccode2': 'c_code_b',
                         'SideB': 'state_name_b',
                         ## unsure if these are the same as battle deaths, or include civilians
                         'BatDeath': 'battle_deaths_a',
                         'NonStateDeaths': 'battle_deaths_b'}, axis = 1, inplace = True)

## whoever is originally marked as side a is getting labelled as 1.
## whoever is originally marked as side b is getting labelled as 2.
participant_df_3['side_a'] = 1
participant_df_3['side_b'] = 2

participant_df_3 = deepcopy(the_networks_of_war_python_functions.participant_start_and_end_dates(participant_df_3))

Extra-StateWarData_v4.0.csv columns: 

['BatDeath', 'EndDay1', 'EndDay2 ', 'EndMonth1', 'EndMonth2', 'EndYear1', 'EndYear2', 'Initiator', 'Interven', 'NonStateDeaths', 'Outcome', 'SideA', 'SideB', 'StartDay1', 'StartDay2', 'StartMonth1', 'StartMonth2', 'StartYear1', 'StartYear2', 'TransFrom', 'TransTo', 'Version', 'WarName', 'WarNum', 'WarType', 'WhereFought', 'ccode1', 'ccode2']

total participants with both dates found 124
total participants with at least one date not found 74


In [11]:
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b',
                         'state_name_a',
                         'state_name_b',
                         'side_a',
                         'side_b',
                         'battle_deaths_a',
                         'battle_deaths_b']
participant_df_3 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(participant_df_3, switched_columns_list))

## making a copy before duplicates a taken out.
## this will be used below for dyadic data (since no dyadic files are available for extra-state wars)
dyad_df_3 = deepcopy(participant_df_3[['war_num', 'c_code_a', 'state_name_a', 'c_code_b', 'state_name_b', 'start_year']])
## this will be adjusted again later
dyad_df_3.rename({'start_year': 'year'}, axis = 1, inplace = True)

# keeping one state (or non-state) per war after duplicate removal
duplicate_list = ['war_num', 'war_type', 'war_name', 'c_code_a', 'state_name_a']
participant_df_3.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
participant_df_3 = deepcopy(participant_df_3.reset_index(drop = True))
participant_df_3 = deepcopy(the_networks_of_war_python_functions.drop_participant_b_columns(participant_df_3, switched_columns_list))

## Combining Participant Sources

In [12]:
## removing non applicable participants
participant_df = deepcopy(participant_df[participant_df['state_name']!="-8"]).reset_index(drop = True)
participant_df_2 = deepcopy(participant_df_2[participant_df_2['state_name']!="-8"]).reset_index(drop = True)
participant_df_3 = deepcopy(participant_df_3[participant_df_3['state_name']!="-8"]).reset_index(drop = True)

In [13]:
participant_df = deepcopy(pd.concat([participant_df, participant_df_2], ignore_index = True).reset_index(drop = True))
participant_df = deepcopy(pd.concat([participant_df, participant_df_3], ignore_index = True).reset_index(drop = True))

## keeping only essential columns
participant_df = deepcopy(participant_df[['war_num',
                                          'war_name',
                                          'war_type',
                                          'c_code',
                                          'state_name',
                                          'side',
                                          'battle_deaths',
                                          'start_date',
                                          'start_year',
                                          'end_date',
                                          'end_year',
                                          'days_at_war',
                                          'lagging_war',
                                          'leading_war',
                                          'ongoing_participation',
                                          'total_deaths_both_sides',
                                          'peak_forces_available',
                                          'peak_battle_forces']])

## removing non applicable participants
participant_df = deepcopy(participant_df[participant_df['state_name']!="-8"]).reset_index(drop = True)

print("{} Total War Participants After Merge".format(len(participant_df)))

1705 Total War Participants After Merge


In [14]:
participant_df.rename({'war_type': 'war_type_code'}, axis = 1, inplace = True)

participant_df.loc[participant_df['war_type_code']==1, 'war_type'] = 'Inter-State War'
participant_df.loc[participant_df['war_type_code']==2, 'war_type'] = 'Extra-State War'
participant_df.loc[participant_df['war_type_code']==3, 'war_type'] = 'Extra-State War'
participant_df.loc[participant_df['war_type_code']==4, 'war_type'] = 'Intra-State War'
participant_df.loc[participant_df['war_type_code']==5, 'war_type'] = 'Intra-State War'
participant_df.loc[participant_df['war_type_code']==6, 'war_type'] = 'Intra-State War'
participant_df.loc[participant_df['war_type_code']==7, 'war_type'] = 'Intra-State War'
participant_df.loc[participant_df['war_type_code']==8, 'war_type'] = 'Non-State War'
participant_df.loc[participant_df['war_type_code']==9, 'war_type'] = 'Non-State War'

participant_df.loc[participant_df['war_type_code']==1, 'war_sub_type'] = ''
participant_df.loc[participant_df['war_type_code']==2, 'war_sub_type'] = 'Colonial (conflict with colony)'
participant_df.loc[participant_df['war_type_code']==3, 'war_sub_type'] = 'Imperial (state vs non-state)'
participant_df.loc[participant_df['war_type_code']==4, 'war_sub_type'] = 'Civil War (for central control)'
participant_df.loc[participant_df['war_type_code']==5, 'war_sub_type'] = 'Civil War (over local issues)'
participant_df.loc[participant_df['war_type_code']==6, 'war_sub_type'] = 'Regional/Internal'
participant_df.loc[participant_df['war_type_code']==7, 'war_sub_type'] = 'Intercommunal'
participant_df.loc[participant_df['war_type_code']==8, 'war_sub_type'] = 'In Non-State Territory'
participant_df.loc[participant_df['war_type_code']==9, 'war_sub_type'] = 'Across State Borders'

In [15]:
## filling in non-applicable values with None
participant_df = deepcopy(the_networks_of_war_python_functions.remaining_participant_null_values(participant_df))

In [16]:
participant_df['war_type'].value_counts()

Intra-State War    1011
Extra-State War     361
Inter-State War     332
Name: war_type, dtype: int64

## Dyadic DataFrames

In [17]:
## battle deaths and start/end dates are in this file too, but it's more confusing than the participant_df.
## this will just be used to get the combinations of countries directly at war with each other.

dyad_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/directed_dyadic_war.csv', encoding = 'latin-1')

dyad_df.rename({'warnum': 'war_num',
                'statea': 'c_code_a',
                'stateb': 'c_code_b'}, axis = 1, inplace = True)

print("directed_dyadic_war columns: \n")
print(sorted(list(dyad_df.columns)))

dyad_df = deepcopy(dyad_df[['war_num', 'c_code_a', 'c_code_b', 'year']])

for i, c_code_a in enumerate(dyad_df['c_code_a']):
    dyad_df.loc[i, 'state_name_a'] = c_code_dic[c_code_a]
    dyad_df.loc[i, 'state_name_b'] = c_code_dic[dyad_df.loc[i, 'c_code_b']] 

directed_dyadic_war columns: 

['batdtha', 'batdthb', 'batdths', 'c_code_a', 'c_code_b', 'changes_1', 'changes_2', 'disno', 'durindx', 'dyindex', 'outcomea', 'war_num', 'wardyadrolea', 'wardyadroleb', 'warenday', 'warendmnth', 'warendyr', 'warolea', 'waroleb', 'warstrtday', 'warstrtmnth', 'warstrtyr', 'year']


In [18]:
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b',
                         'state_name_a',
                         'state_name_b']
dyad_df = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dyad_df, switched_columns_list))

## Combining Dyadic Sources

In [19]:
## removing non applicable participants
## don't need to do this for inter-state war because all is applicable
dyad_df_2 = deepcopy(dyad_df_2[dyad_df_2['state_name_a']!="-8"]).reset_index(drop = True)
dyad_df_2 = deepcopy(dyad_df_2[dyad_df_2['state_name_b']!="-8"]).reset_index(drop = True)
dyad_df_3 = deepcopy(dyad_df_3[dyad_df_3['state_name_a']!="-8"]).reset_index(drop = True)
dyad_df_3 = deepcopy(dyad_df_3[dyad_df_3['state_name_b']!="-8"]).reset_index(drop = True)

In [20]:
print("{} Total Inter-State War Dyads".format(len(dyad_df)/2))
print("{} Total Intra-State War Dyads".format(len(dyad_df_2)/2))
print("{} Total Extra-State War Dyads".format(len(dyad_df_3)/2))

dyad_df = deepcopy(pd.concat([dyad_df, dyad_df_2], ignore_index = True).reset_index(drop = True))
dyad_df = deepcopy(pd.concat([dyad_df, dyad_df_3], ignore_index = True).reset_index(drop = True))

print("{} Total Dyads After Merge".format(len(dyad_df)/2))

1364.0 Total Inter-State War Dyads
420.0 Total Intra-State War Dyads
164.0 Total Extra-State War Dyads
1948.0 Total Dyads After Merge


In [21]:
participant_df_copy = deepcopy(participant_df)
participant_df_copy.rename({'state_name': 'total_participants'}, axis = 1, inplace = True)

participant_df_copy['war_num'] = participant_df_copy['war_num'].astype(float)
## filling these dates in arbitrarily before taking aggregates
## high date for start_date because this will be min
## low date for end_date because this will be max
participant_df_copy['start_date'].fillna(pd.to_datetime('2100-01-01'), inplace = True)
participant_df_copy['end_date'].fillna(pd.to_datetime('1700-01-01'), inplace = True)
participant_df_copy.rename({'ongoing_participation': 'ongoing_war'}, axis = 1, inplace = True)

for i, war in enumerate(participant_df_copy['war_name']):
    if participant_df_copy.loc[i, 'ongoing_war']==1:
        pass
    elif 'present' in participant_df_copy.loc[i, 'war_name'].lower() or 'ongoing' in participant_df_copy.loc[i, 'war_name'].lower():
        participant_df_copy.loc[i, 'ongoing_war'] = 1
        print("{} changed to ongoing_war.".format(participant_df_copy.loc[i, 'war_name']))
    if ' of 1' in participant_df_copy.loc[i, 'war_name']:
        participant_df_copy.loc[i, 'war_name'] = participant_df_copy.loc[i, 'war_name'].split(' of 1')[0].replace("  ", " ")
    elif ' of 2' in participant_df_copy.loc[i, 'war_name']:
        participant_df_copy.loc[i, 'war_name'] = participant_df_copy.loc[i, 'war_name'].split(' of 2')[0].replace("  ", " ")
        
        
aggregations = {
    'total_participants': 'count',
#     'potential_start_year': 'min',
    'start_year': 'min',
    'end_year': 'max',
    ## this will not be accurate if there are more than one lagging/leading wars per war.
    'lagging_war': 'min',
    'leading_war': 'max',
    'ongoing_war': 'max',
    'start_date': 'min',
    'end_date': 'max'
    ## not sure how to add this one just yet
#     'total_deaths_both_sides': 'max'
    }

war_df = deepcopy(participant_df_copy.groupby(['war_num', 'war_name', 'war_type_code', 'war_type', 'war_sub_type']).agg(aggregations).reset_index())

## putting these back to none in case they made it through the aggregation
war_df.loc[war_df['start_date'] == pd.to_datetime('2100-01-01'), 'start_date'] = None
war_df.loc[war_df['end_date'] == pd.to_datetime('1700-01-01'), 'end_date'] = None
war_df['total_days_in_war'] = war_df['end_date'] - war_df['start_date']

for i, war in enumerate(war_df['war_name']):
    try:
        war_df.loc[i, 'total_days_in_war'] = int(str(war_df.loc[i, 'total_days_in_war']).split(' ')[0])
    except:
        war_df.loc[i, 'total_days_in_war'] = None
        
war_df = deepcopy(war_df.sort_values(by = ['start_year', 'end_year', 'war_name'], ascending = (False, True, True)))

Second Waziristan War of 2007-present changed to ongoing_war.
Second Waziristan War of 2007-present changed to ongoing_war.
Boko Haram in Nigeria of 2013 - ongoing  changed to ongoing_war.
South Sudan War of 2013 to present changed to ongoing_war.
South Sudan War of 2013 to present changed to ongoing_war.
Afghan-Taliban War of 2014-ongoing changed to ongoing_war.
Afghan-Taliban War of 2014-ongoing changed to ongoing_war.
Somali-Al-Shabaab war of 2014-present changed to ongoing_war.
Somali-Al-Shabaab war of 2014-present changed to ongoing_war.
Somali-Al-Shabaab war of 2014-present changed to ongoing_war.
Somali-Al-Shabaab war of 2014-present changed to ongoing_war.
Somali-Al-Shabaab war of 2014-present changed to ongoing_war.
Somali-Al-Shabaab war of 2014-present changed to ongoing_war.
Somali-Al-Shabaab war of 2014-present changed to ongoing_war.
Second Waziristan War of 2007-present changed to ongoing_war.
Boko Haram in Nigeria of 2013 - ongoing  changed to ongoing_war.
South Sudan Wa

In [22]:
## need to figure out a way to add dyadic data when it's missing.
## these are clear cases where it should be added because one side on the war is only one country.
## it'll be trickier when each side isn't just one country.

dyads_added = 0
for i, war_num in enumerate(war_df['war_num']):

    total_side_1 = len(participant_df[(participant_df['war_num']==war_num) & (participant_df['side']==1)])
    total_side_2 = len(participant_df[(participant_df['war_num']==war_num) & (participant_df['side']==2)])
    if total_side_1==1 and total_side_2!=1:
        side_1_name = participant_df[(participant_df['war_num']==war_num) & (participant_df['side']==1)]['state_name'].values[0]
        side_1_code = participant_df[(participant_df['war_num']==war_num) & (participant_df['side']==1)]['c_code'].values[0]
        participating_parties = sorted(list(set(list(participant_df[(participant_df['war_num']==war_num) & (participant_df['side']==2)]['state_name']))))
        dyadic_parties = sorted(list(set(list(dyad_df[dyad_df['war_num']==war_num]['state_name_a']) + list(dyad_df[dyad_df['war_num']==war_num]['state_name_b']))))
        for i, party in enumerate(participating_parties):
            if party in dyadic_parties:
                pass
            else:
                df_length = deepcopy(len(dyad_df))
                dyad_df.loc[df_length, 'war_num'] = war_num
                dyad_df.loc[df_length, 'c_code_a'] = participant_df[(participant_df['war_num']==war_num) & (participant_df['state_name']==party)]['c_code'].values[0]
                dyad_df.loc[df_length, 'state_name_a'] = party
                dyad_df.loc[df_length, 'year'] = participant_df[(participant_df['war_num']==war_num) & (participant_df['state_name']==party)]['start_year'].values[0]
                dyad_df.loc[df_length, 'c_code_b'] = side_1_code
                dyad_df.loc[df_length, 'state_name_b'] = side_1_name
                dyads_added+=1
    elif total_side_2==1 and total_side_1!=1:
        side_2_name = participant_df[(participant_df['war_num']==war_num) & (participant_df['side']==2)]['state_name'].values[0]
        side_2_code = participant_df[(participant_df['war_num']==war_num) & (participant_df['side']==2)]['c_code'].values[0]
        participating_parties = sorted(list(set(list(participant_df[(participant_df['war_num']==war_num) & (participant_df['side']==1)]['state_name']))))
        dyadic_parties = sorted(list(set(list(dyad_df[dyad_df['war_num']==war_num]['state_name_a']) + list(dyad_df[dyad_df['war_num']==war_num]['state_name_b']))))
        for i, party in enumerate(participating_parties):
            if party in dyadic_parties:
                pass
            else:
                df_length = deepcopy(len(dyad_df))
                dyad_df.loc[df_length, 'war_num'] = war_num
                dyad_df.loc[df_length, 'c_code_a'] = participant_df[(participant_df['war_num']==war_num) & (participant_df['state_name']==party)]['c_code'].values[0]
                dyad_df.loc[df_length, 'state_name_a'] = party
                dyad_df.loc[df_length, 'year'] = participant_df[(participant_df['war_num']==war_num) & (participant_df['state_name']==party)]['start_year'].values[0]
                dyad_df.loc[df_length, 'c_code_b'] = side_2_code
                dyad_df.loc[df_length, 'state_name_b'] = side_2_name
                dyads_added+=1
                
print("Total Dyads Added: {}".format(dyads_added))

Total Dyads Added: 182


In [23]:
dyadic_borders_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/contcold.csv', encoding = 'utf8')
dyadic_borders_df.rename({'statelno': 'c_code_a',
                          'statehno': 'c_code_b',
                          'land': 'land_contiguity',
                          'sea': 'sea_contiguity',
                          'total': 'total_contiguity'}, axis = 1, inplace = True)

print("contcold columns: \n")
print(sorted(list(dyadic_borders_df.columns)))

dyadic_borders_df = deepcopy(dyadic_borders_df[['c_code_a',
                                                'c_code_b',
                                                'year',
                                                'land_contiguity',
                                                'sea_contiguity',
                                                'total_contiguity']])

## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b',
                         'state_name_a',
                         'state_name_b']
dyadic_borders_df = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dyadic_borders_df, switched_columns_list))

dyad_df = deepcopy(pd.merge(dyad_df, dyadic_borders_df, how = 'left', on = ['c_code_a', 'c_code_b', 'year']))

contcold columns: 

['c_code_a', 'c_code_b', 'dyad', 'land_contiguity', 'sea_contiguity', 'statehab', 'statelab', 'total_contiguity', 'version', 'year']


In [24]:
dyadic_alliance_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/alliance_v4.1_by_dyad_yearly.csv', encoding = 'utf8')
dyadic_alliance_df.rename({'ccode1': 'c_code_a',
                           'ccode2': 'c_code_b',
                           'defense': 'defense_alliance',
                           'neutrality': 'neutrality_alliance', 
                           'entente': 'entente_alliance'}, axis = 1, inplace = True)

print("alliance_v4.1_by_dyad_yearly columns: \n")
print(sorted(list(dyadic_alliance_df.columns)))

dyadic_alliance_df = deepcopy(dyadic_alliance_df[['c_code_a',
                                                  'c_code_b',
                                                  'year',
                                                  'defense_alliance',
                                                  'neutrality_alliance',
                                                  'entente_alliance']])

## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b',
                         'state_name_a',
                         'state_name_b']
dyadic_alliance_df = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dyadic_alliance_df, switched_columns_list))

dyad_df = deepcopy(pd.merge(dyad_df, dyadic_alliance_df, how = 'left', on = ['c_code_a', 'c_code_b', 'year']))

alliance_v4.1_by_dyad_yearly columns: 

['c_code_a', 'c_code_b', 'defense_alliance', 'dyad_end_day', 'dyad_end_month', 'dyad_end_year', 'dyad_st_day', 'dyad_st_month', 'dyad_st_year', 'entente_alliance', 'left_censor', 'neutrality_alliance', 'nonaggression', 'right_censor', 'state_name1', 'state_name2', 'version', 'version4id', 'year']


In [25]:
for i, state_name_a in enumerate(dyad_df['state_name_a']):
    dyad_list = []
    dyad_list.append(state_name_a)
    dyad_list.append(dyad_df.loc[i, 'state_name_b'])
    dyad_list = str(sorted(dyad_list))
    dyad_df.loc[i, 'conflict_pair'] = dyad_list

dyad_df['year'] = dyad_df['year'].astype(int)
dyad_df.sort_values(by = 'year', ascending = True, inplace = True)

dyad_df = deepcopy(dyad_df[['war_num',
                            'c_code_a',
                            'state_name_a',
                            'c_code_b',
                            'state_name_b',
                            'year',
                            'land_contiguity',
                            'sea_contiguity',
                            'total_contiguity',
                            'defense_alliance',
                            'neutrality_alliance',
                            'entente_alliance',
                            'conflict_pair']])

dyad_df = deepcopy(dyad_df[(dyad_df['state_name_a'].isnull()==False) & (dyad_df['state_name_b'].isnull()==False)])
## need to dedupe across conflict pair so a vs b are never repeated interchangably
duplicate_list = ['war_num', 'conflict_pair']
dyad_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
dyad_df.rename({'year': 'first_year'}, axis = 1, inplace = True)

dyad_df = deepcopy(dyad_df.reset_index(drop = True))
dyad_df.drop('conflict_pair', axis = 1, inplace = True)

In [26]:
## filling in nulls with zeros
dyad_df.loc[dyad_df['defense_alliance'].isnull(), 'defense_alliance'] = 0
dyad_df.loc[dyad_df['neutrality_alliance'].isnull(), 'neutrality_alliance'] = 0
dyad_df.loc[dyad_df['entente_alliance'].isnull(), 'entente_alliance'] = 0
dyad_df.loc[dyad_df['land_contiguity'].isnull(), 'land_contiguity'] = 0
dyad_df.loc[dyad_df['sea_contiguity'].isnull(), 'sea_contiguity'] = 0
dyad_df.loc[dyad_df['total_contiguity'].isnull(), 'total_contiguity'] = 0

## Adding Descriptive Data for Dyads

# Check Between Dyads and Participants

In [27]:
print("Dyadic States Missing From Participant Data:\n")
war_list = list(set(list(dyad_df['war_num'])))
for war in war_list:
    participant_list = []
    participant_df_copy = deepcopy(participant_df[participant_df['war_num']==war].reset_index(drop=True))
    dyad_df_copy = deepcopy(dyad_df[dyad_df['war_num']==war].reset_index(drop=True))
    for i, participant in enumerate(participant_df_copy['state_name']):
        c_code_input = participant_df_copy.loc[i, 'c_code']
        if c_code_input == -8:
            participant_list.append(participant)
        else:
            participant_list.append(participant_df_copy.loc[i, 'c_code'])
    dyad_list = []
    for i, participant in enumerate(dyad_df_copy['state_name_a']):
        c_code_input = dyad_df_copy.loc[i, 'c_code_a']
        if c_code_input == -8:
            dyad_list.append(participant)
        else:
            dyad_list.append(dyad_df_copy.loc[i, 'c_code_a'])
    for i, participant in enumerate(dyad_df_copy['state_name_b']):
        c_code_input = dyad_df_copy.loc[i, 'c_code_b']
        if c_code_input == -8:
            dyad_list.append(participant)
        else:
            dyad_list.append(dyad_df_copy.loc[i, 'c_code_b'])
    for country in dyad_list:
        if country not in participant_list:
            war_name = participant_df_copy['war_name'].values[0]
            print(str(war)[:-2] + ", " + war_name + ": " + str(country) + ', ' + c_code_dic[country])
        else:
            pass

Dyadic States Missing From Participant Data:

108, Latvian Liberation: 220.0, France
108, Latvian Liberation: 290.0, Poland
108, Latvian Liberation: 200.0, United Kingdom
139, World War II: 230.0, Spain
139, World War II: 800.0, Thailand
139, World War II: 800.0, Thailand
151, Korean: 920.0, New Zealand
151, Korean: 920.0, New Zealand
169, Six Day War: 645.0, Iraq
184, Turco-Cypriot: 350.0, Greece


# Descriptive Statistics for Each Country by Year

In [28]:
# df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Diplomatic_Exchange_2006v1.csv')

In [29]:
# # https://sites.google.com/site/joseantoniocheibub/datasets/democracy-and-dictatorship-revisited
# # non cow data-set
    
# gov_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/ddrevisited_data_v1.csv', encoding = 'latin-1')
# print(gov_df.columns)

# gov_df.rename({'chgterr': 'territory_change',
#                'ychgterr': 'territory_change_year',
#                'entryy': 'first_recorded_year',
#                'exity': 'last_recorded_year',
#                'bornyear': 'born_year',
#                'endyear': 'died_year',
#                'exselec': 'election_type',
#                'legselec': 'legislation_type',
#                'closed': 'legislature_status',
#                'dejure': 'party_legal_status',
#                'defacto': 'party_existance',
#                'defacto2': 'party_existance_outside_regime', 
#                'lparty': 'legislature_parties',
#                'incumb': 'incumbent_type',
#                'collect': 'collective_leadership',
#                'eheads': 'num_leadership_changes',
#                'ehead': 'leader_name',
#                'epost': 'post_name',
#                'edate': 'entrance_date',
#                'ageeh': 'leader_tenure',
#                'emil': 'military_leader',
#                'royal': 'royal_leader',
#                'comm': 'communist_leader',
#                'edeath': 'leader_died',
#                'democracy': 'democratic_regime',
#                'assconfid': 'cabinet_assembly',
#                'poppreselec': 'popular_election',
#                'regime': 'regime_type',
#                'ttd': 'transition_to_democracy',
#                'tta': 'transition_to_dictatorship',
#                'agedem': 'age_govt',
#                'stra': 'num_transitions_ever',
#                'cowcode': 'c_code_a',
#                'ctryname': 'state_name_a',
#                'headdiff': 'nominal_vs_eff_diff',
#                'un_region_name': 'un_region',
#                'un_continent_name': 'un_continent',
#                 'cowcode2': 'c_code_b'}, axis = 1, inplace = True)

In [30]:
# co-emissions-per-capita.csv

## borders
# pd.read_csv('/Users/charlieyaris/github/international_armed_conflict/Data Sources/contcold.csv', encoding = 'latin-1')

In [31]:
alliance_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/alliance_v4.1_by_member_yearly.csv', encoding = 'latin-1')

print("alliance_v4.1_by_member_yearly columns: \n")
print(sorted(list(alliance_df.columns)))

alliance_df.rename({'ccode': 'c_code',
                    'nonaggression': 'non_aggression_alliances',
                    'entente': 'entente_alliances',
                    'ss_type': 'alliances'}, axis = 1, inplace = True)

alliance_df['non_aggression_alliances'] = alliance_df['non_aggression_alliances'].astype(float)
alliance_df['entente_alliances'] = alliance_df['entente_alliances'].astype(float)

aggregations = {
    'alliances': 'count',
    'non_aggression_alliances': 'sum',
    'entente_alliances': 'sum'
    }

alliance_df = deepcopy(alliance_df.groupby(['c_code', 'year']).agg(aggregations).reset_index())

alliance_v4.1_by_member_yearly columns: 

['all_end_day', 'all_end_month', 'all_end_year', 'all_st_day', 'all_st_month', 'all_st_year', 'ccode', 'defense', 'entente', 'left_censor', 'mem_end_day', 'mem_end_month', 'mem_end_year', 'mem_st_day', 'mem_st_month', 'mem_st_year', 'neutrality', 'nonaggression', 'right_censor', 'ss_type', 'state_name', 'version', 'version4id', 'year']


In [32]:
dyadic_trade_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Dyadic_COW_4.0.csv', encoding = 'utf8')

dyadic_trade_df.rename({'ccode1': 'c_code_a',
                        'ccode2': 'c_code_b',
                        'flow2': 'money_flow_in_a',
                        ## money flow out
                        'flow1': 'money_flow_in_b'}, axis = 1, inplace = True)

print("Dyadic_COW_4.0 columns: \n")
print(sorted(list(dyadic_trade_df.columns)))

## need to union to take summations but won't need to dedupe because there are no duplicates between a and b.
# this means a can be summed on its own when it's combined with b.
switched_columns_list = ['c_code_a',
                         'c_code_b',
                         'money_flow_in_a',
                         'money_flow_in_b']
dyadic_trade_df = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dyadic_trade_df, switched_columns_list))
dyadic_trade_df.rename({'money_flow_in_a': 'money_flow_in',
                        'money_flow_in_b': 'money_flow_out'}, axis = 1, inplace = True)

aggregations = {'money_flow_in': 'sum',
                'money_flow_out': 'sum'}
trade_df_1 = dyadic_trade_df.groupby(['c_code_a', 'year']).agg(aggregations).reset_index()
trade_df_1.rename({'c_code_a':'c_code'}, axis = 1, inplace = True)

trade_df_2 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/National_COW_4.0.csv', encoding = 'latin-1')
trade_df_2.rename({'ccode': 'c_code'}, axis = 1, inplace = True)
print("National_COW_4.0 columns: \n")
print(sorted(list(trade_df_2.columns)))
trade_df_2 = deepcopy(trade_df_2[['c_code', 'year', 'imports', 'exports']])
trade_df = deepcopy(pd.merge(trade_df_1, trade_df_2, how = 'outer', on = ['c_code', 'year']))

Dyadic_COW_4.0 columns: 

['bel_lux_alt_flow1', 'bel_lux_alt_flow2', 'c_code_a', 'c_code_b', 'china_alt_flow1', 'china_alt_flow2', 'dip1', 'dip2', 'importer1', 'importer2', 'money_flow_in_a', 'money_flow_in_b', 'smoothflow1', 'smoothflow2', 'smoothtotrade', 'source1', 'source2', 'spike1', 'spike2', 'tradedip', 'trdspike', 'version', 'year']
National_COW_4.0 columns: 

['alt_exports', 'alt_imports', 'c_code', 'exports', 'imports', 'source1', 'source2', 'stateabb', 'statename', 'version', 'year']


In [33]:
# trade_df.head(3)

In [34]:
mil_cap_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/NMC_5_0-wsupplementary.csv', encoding = 'latin-1')

mil_cap_df.rename({'milex': 'military_expenditure',
                   'milper': 'military_personnel',
                   'irst': 'iron_steel_production',
                   'pec': 'prim_energy_consumption',
                   'tpop': 'total_population',
                   'upop': 'urban_population',
                   'upopgrowth': 'urban_pop_growth_rate',
                   'ccode': 'c_code',
#                    'statenme': 'state_name',
                   'cinc': 'cinc_score'}, axis = 1, inplace = True)

print("NMC_5_0-wsupplementary columns: \n")
print(sorted(list(mil_cap_df.columns)))

mil_cap_df = mil_cap_df.sort_values(by = 'year', ascending = True).reset_index(drop = True)
mil_cap_df = deepcopy(mil_cap_df[['c_code',
                                  'year',
                                  'military_expenditure',
                                  'military_personnel',
                                  'prim_energy_consumption',
                                  'iron_steel_production',
                                  'total_population',
                                  'urban_population',
                                  'cinc_score']])

NMC_5_0-wsupplementary columns: 

['c_code', 'cinc_score', 'iron_steel_production', 'irstanomalycode', 'irstnote', 'irstqualitycode', 'irstsource', 'milexnote', 'milexsource', 'military_expenditure', 'military_personnel', 'milpernote', 'milpersource', 'pecanomalycode', 'pecnote', 'pecqualitycode', 'pecsource', 'prim_energy_consumption', 'stateabb', 'statenme', 'total_population', 'tpopanomalycode', 'tpopnote', 'tpopqualitycode', 'tpopsource', 'upopanomalycode', 'upopgrowthsource', 'upopnote', 'upopqualitycode', 'upopsource', 'urban_pop_growth_rate', 'urban_population', 'version', 'year']


## Merging Descriptive DataFrames

In [35]:
descriptive_df = deepcopy(pd.merge(trade_df, mil_cap_df, how = 'outer', on = ['c_code', 'year']))
descriptive_df = deepcopy(pd.merge(descriptive_df, alliance_df, how = 'outer', on = ['c_code', 'year']))
descriptive_df['year'] = descriptive_df['year'].astype(float)

participant_df['start_year'] = participant_df['start_year'].astype(float)
participant_df['end_year'] = participant_df['end_year'].astype(float)
descriptive_df.rename({'year': 'start_year'}, axis = 1, inplace = True)

participant_df = deepcopy(pd.merge(participant_df, descriptive_df, how = 'left', on = ['c_code', 'start_year']))
descriptive_df.rename({'start_year': 'end_year'}, axis = 1, inplace = True)
participant_df = deepcopy(pd.merge(participant_df, descriptive_df, how = 'left', on = ['c_code', 'end_year']))

In [36]:
## filling in nulls with zeros
## these are ones that most likely mean zero if null (not due to missing data)
participant_df.loc[participant_df['alliances_x'].isnull(), 'alliances_x'] = 0
participant_df.loc[participant_df['alliances_y'].isnull(), 'alliances_y'] = 0
participant_df.loc[participant_df['alliances_x'].isnull(), 'alliances_x'] = 0
participant_df.loc[participant_df['alliances_y'].isnull(), 'alliances_y'] = 0
participant_df.loc[participant_df['entente_alliances_x'].isnull(), 'entente_alliances_x'] = 0
participant_df.loc[participant_df['entente_alliances_y'].isnull(), 'entente_alliances_y'] = 0
participant_df.loc[participant_df['non_aggression_alliances_x'].isnull(), 'non_aggression_alliances_x'] = 0
participant_df.loc[participant_df['non_aggression_alliances_y'].isnull(), 'non_aggression_alliances_y'] = 0
participant_df.loc[participant_df['money_flow_in_x'].isnull(), 'money_flow_in_x'] = 0
participant_df.loc[participant_df['money_flow_in_y'].isnull(), 'money_flow_in_y'] = 0
participant_df.loc[participant_df['money_flow_out_x'].isnull(), 'money_flow_out_x'] = None
participant_df.loc[participant_df['money_flow_out_y'].isnull(), 'money_flow_out_y'] = None
participant_df.loc[participant_df['military_expenditure_x'].isnull(), 'military_expenditure_x'] = 0
participant_df.loc[participant_df['military_expenditure_y'].isnull(), 'military_expenditure_y'] = 0
participant_df.loc[participant_df['military_personnel_x'].isnull(), 'military_personnel_x'] = 0
participant_df.loc[participant_df['military_personnel_y'].isnull(), 'military_personnel_y'] = 0
participant_df.loc[participant_df['total_population_x'].isnull(), 'total_population_x'] = 0
participant_df.loc[participant_df['total_population_y'].isnull(), 'total_population_y'] = 0
participant_df.loc[participant_df['urban_population_x'].isnull(), 'urban_population_x'] = 0
participant_df.loc[participant_df['urban_population_y'].isnull(), 'urban_population_y'] = 0
participant_df.loc[participant_df['iron_steel_production_x'].isnull(), 'iron_steel_production_x'] = 0
participant_df.loc[participant_df['iron_steel_production_y'].isnull(), 'iron_steel_production_y'] = 0
participant_df.loc[participant_df['prim_energy_consumption_x'].isnull(), 'prim_energy_consumption_x'] = 0
participant_df.loc[participant_df['prim_energy_consumption_y'].isnull(), 'prim_energy_consumption_y'] = 0

# converting these to their proper units according to documentation
participant_df['money_flow_in_x'] = ([s * 1000000 for s in participant_df['money_flow_in_x']])
participant_df['money_flow_in_y'] = ([s * 1000000 for s in participant_df['money_flow_in_y']])
participant_df['money_flow_out_x'] = ([s * 1000000 for s in participant_df['money_flow_out_x']])
participant_df['money_flow_out_y'] = ([s * 1000000 for s in participant_df['money_flow_out_y']])
participant_df['military_expenditure_x'] = ([s * 1000 for s in participant_df['military_expenditure_x']])
participant_df['military_expenditure_y'] = ([s * 1000 for s in participant_df['military_expenditure_y']])
participant_df['military_personnel_x'] = ([s * 1000 for s in participant_df['military_personnel_x']])
participant_df['military_personnel_y'] = ([s * 1000 for s in participant_df['military_personnel_y']])
participant_df['total_population_x'] = ([s * 1000 for s in participant_df['total_population_x']])
participant_df['total_population_y'] = ([s * 1000 for s in participant_df['total_population_y']])
participant_df['urban_population_x'] = ([s * 1000 for s in participant_df['urban_population_x']])
participant_df['urban_population_y'] = ([s * 1000 for s in participant_df['urban_population_y']])
participant_df['iron_steel_production_x'] = ([s * 2000000 for s in participant_df['iron_steel_production_x']])
participant_df['iron_steel_production_y'] = ([s * 2000000 for s in participant_df['iron_steel_production_y']])
participant_df['prim_energy_consumption_x'] = ([s * 2000000 for s in participant_df['prim_energy_consumption_x']])
# -9000000 is unknown value
participant_df.loc[participant_df['money_flow_in_x'] == -9000000, 'money_flow_in_x'] = None
participant_df.loc[participant_df['money_flow_in_y'] == -9000000, 'money_flow_in_y'] = None
participant_df.loc[participant_df['money_flow_out_x'] == -9000000, 'money_flow_out_x'] = None
participant_df.loc[participant_df['money_flow_out_y'] == -9000000, 'money_flow_out_y'] = None
# unsure what -9000 refers to 
participant_df.loc[participant_df['military_expenditure_x'] == -9000, 'military_expenditure_x'] = 0
participant_df.loc[participant_df['military_expenditure_y'] == -9000, 'military_expenditure_y'] = 0
participant_df.loc[participant_df['military_personnel_x'] == -9000, 'military_personnel_x'] = 0
participant_df.loc[participant_df['military_personnel_y'] == -9000, 'military_personnel_y'] = 0
participant_df.loc[participant_df['total_population_x'] == -9000, 'total_population_x'] = 0
participant_df.loc[participant_df['total_population_y'] == -9000, 'total_population_y'] = 0
participant_df.loc[participant_df['urban_population_x'] == -9000, 'urban_population_x'] = 0
participant_df.loc[participant_df['urban_population_y'] == -9000, 'urban_population_y'] = 0
# unsure what -18000000 refers to 
participant_df.loc[participant_df['iron_steel_production_x'] == -18000000, 'iron_steel_production_x'] = 0
participant_df.loc[participant_df['iron_steel_production_y'] == -18000000, 'iron_steel_production_y'] = 0
participant_df.loc[participant_df['prim_energy_consumption_x'] == -18000000, 'prim_energy_consumption_x'] = 0
participant_df.loc[participant_df['prim_energy_consumption_y'] == -18000000, 'prim_energy_consumption_y'] = 0

In [37]:
print("{} total participants.".format(len(participant_df)))
print("{} total conflicts.".format(len(dyad_df)))
print("{} total wars.".format(len(war_df)))

1705 total participants.
1058 total conflicts.
679 total wars.


In [38]:
descriptive_df.to_pickle('/Users/the_networks_of_war/data_sources/pickles/descriptive_df.pkl')
participant_df.to_pickle('/Users/the_networks_of_war/data_sources/pickles/participant_df.pkl')
dyad_df.to_pickle('/Users/the_networks_of_war/data_sources/pickles/dyad_df.pkl')
war_df.to_pickle('/Users/the_networks_of_war/data_sources/pickles/war_df.pkl')

In [39]:
descriptive_df = pd.read_pickle('/Users/the_networks_of_war/data_sources/pickles/descriptive_df.pkl')
participant_df = pd.read_pickle('/Users/the_networks_of_war/data_sources/pickles/participant_df.pkl')
dyad_df = pd.read_pickle('/Users/the_networks_of_war/data_sources/pickles/dyad_df.pkl')
war_df = pd.read_pickle('/Users/the_networks_of_war/data_sources/pickles/war_df.pkl')

In [40]:
# why would vietnam and republic of vietnam not appear in contiguity data?

In [45]:
file_df = pd.DataFrame()

for i, war in enumerate(war_df['war_num']):
    
    file_directory = './json_files_by_war/'
    file_name = 'war_num_' + str(war).replace('.', '_') + '.json'
    file_df.loc[i, 'file_name'] = file_name
    file_df.loc[i, 'war_name'] = war_df[war_df['war_num']==war]['war_name'].values[0]
    file_df.loc[i, 'start_year'] = war_df[war_df['war_num']==war]['start_year'].values[0]
    file_df.loc[i, 'end_year'] = war_df[war_df['war_num']==war]['end_year'].values[0]
    file_df.loc[i, 'war_type'] = war_df[war_df['war_num']==war]['war_type'].values[0]
    file_df.loc[i, 'war_sub_type'] = war_df[war_df['war_num']==war]['war_sub_type'].values[0]
    file_df.loc[i, 'ongoing_war'] = war_df[war_df['war_num']==war]['ongoing_war'].values[0]
    file_df.loc[i, 'total_days_in_war'] = war_df[war_df['war_num']==war]['total_days_in_war'].values[0]
    file_df.loc[i, 'total_participants'] = war_df[war_df['war_num']==war]['total_participants'].values[0]
    
    participant_df_copy = deepcopy(pd.read_pickle('/Users/the_networks_of_war/data_sources/pickles/participant_df.pkl'))
    dyad_df_copy = deepcopy(pd.read_pickle('/Users/the_networks_of_war/data_sources/pickles/dyad_df.pkl'))
    war_df_copy = deepcopy(pd.read_pickle('/Users/the_networks_of_war/data_sources/pickles/war_df.pkl'))

    war_df_copy = deepcopy(war_df_copy[war_df_copy['war_num']==war].reset_index(drop = True))
    participant_df_copy = deepcopy(participant_df_copy[participant_df_copy['war_num']==war].reset_index(drop = True))
    dyad_df_copy = deepcopy(dyad_df_copy[dyad_df_copy['war_num']==war].reset_index(drop = True))
    
    node_names = []
    for i, participant in enumerate(participant_df_copy['state_name']):
        c_code_input = participant_df_copy.loc[i, 'c_code']
        if c_code_input == -8:
            node_names.append(participant)
        else:
            node_names.append(participant_df_copy.loc[i, 'c_code'])


    graph_file = open(file_directory + file_name, 'w').close()
    graph_file = open(file_directory + file_name, 'w')

    json_dic = {}
    json_dic['nodes'] = {}
    json_dic['links'] = {}
    start_line = '{\n  "war": [\n'
    middle_line_1 = '  ],\n  "nodes": [\n'
    middle_line_2 = '  ],\n  "links": [\n'
    end_line = '\n  ]\n}'

    graph_file.write(start_line)

    add_line = ('    {"war_name": "' + str(war_df_copy.loc[0, 'war_name'])
                + '", "war_num": "' + str(war_df_copy.loc[0, 'war_num'])
                + '", "war_type_code": "' + str(war_df_copy.loc[0, 'war_type_code'])
                + '", "war_type": "' + str(war_df_copy.loc[0, 'war_type'])
                + '", "war_sub_type": "' + str(war_df_copy.loc[0, 'war_sub_type'])
                + '", "war_start_date": "' + str(war_df_copy.loc[0, 'start_date'])
                + '", "war_end": "' + str(war_df_copy.loc[0, 'end_date'])
                + '", "total_days_in_war": "' + str(war_df_copy.loc[0, 'total_days_in_war'])
                + '", "lagging_war": "' + str(war_df_copy.loc[0, 'lagging_war'])
                + '", "leading_war": "' + str(war_df_copy.loc[0, 'leading_war'])
                + '", "ongoing_war": "' + str(war_df_copy.loc[0, 'ongoing_war'])
                + '"}\n')

    graph_file.write(add_line)

    graph_file.write(middle_line_1)
    for i, node in enumerate(participant_df_copy['state_name']):
        add_line = (
            '    {"id": "' + i
            + "country": "' + str(participant_df_copy.loc[i, 'state_name'])
            + '", "country_code": "' + str(participant_df_copy.loc[i, 'c_code'])
            + '", "side": "' + str(participant_df_copy.loc[i, 'side'])
            + '", "battle_deaths": "' + str(participant_df_copy.loc[i, 'battle_deaths'])
            + '", "start_year": "' + str(participant_df_copy.loc[i, 'start_year'])
            + '", "start_date": "' + str(participant_df_copy.loc[i, 'start_date'])
            + '", "end_year": "' + str(participant_df_copy.loc[i, 'end_year'])
            + '", "end_date": "' + str(participant_df_copy.loc[i, 'end_date'])
            + '", "days_at_war": "' + str(participant_df_copy.loc[i, 'days_at_war'])
            + '", "peak_forces_available": "' + str(participant_df_copy.loc[i, 'peak_forces_available'])
            + '", "peak_battle_forces": "' + str(participant_df_copy.loc[i, 'peak_battle_forces'])
            + '", "alliances_x": "' + str(participant_df_copy.loc[i, 'alliances_x'])
            + '", "alliances_y": "' + str(participant_df_copy.loc[i, 'alliances_y'])
            + '", "non_aggression_alliances_x": "' + str(participant_df_copy.loc[i, 'non_aggression_alliances_x'])
            + '", "non_aggression_alliances_y": "' + str(participant_df_copy.loc[i, 'non_aggression_alliances_y'])
            + '", "entente_alliances_x": "' + str(participant_df_copy.loc[i, 'entente_alliances_x'])
            + '", "entente_alliances_y": "' + str(participant_df_copy.loc[i, 'entente_alliances_y'])
            + '", "money_flow_out_x": "' + str(participant_df_copy.loc[i, 'money_flow_out_x'])
            + '", "money_flow_out_y": "' + str(participant_df_copy.loc[i, 'money_flow_out_y'])
            + '", "money_flow_in_x": "' + str(participant_df_copy.loc[i, 'money_flow_in_x'])
            + '", "money_flow_in_y": "' + str(participant_df_copy.loc[i, 'money_flow_in_y'])
            + '", "imports_x": "' + str(participant_df_copy.loc[i, 'imports_x'])
            + '", "imports_y": "' + str(participant_df_copy.loc[i, 'imports_y'])
            + '", "exports_x": "' + str(participant_df_copy.loc[i, 'exports_x'])
            + '", "exports_y": "' + str(participant_df_copy.loc[i, 'exports_y'])
            + '", "military_expenditure_x": "' + str(participant_df_copy.loc[i, 'military_expenditure_x'])
            + '", "military_expenditure_y": "' + str(participant_df_copy.loc[i, 'military_expenditure_y'])
            + '", "military_personnel_x": "' + str(participant_df_copy.loc[i, 'military_personnel_x'])
            + '", "military_personnel_y": "' + str(participant_df_copy.loc[i, 'military_personnel_y'])
            + '", "prim_energy_consumption_x": "' + str(participant_df_copy.loc[i, 'prim_energy_consumption_x'])
            + '", "prim_energy_consumption_y": "' + str(participant_df_copy.loc[i, 'prim_energy_consumption_y'])
            + '", "iron_steel_production_x": "' + str(participant_df_copy.loc[i, 'iron_steel_production_x'])
            + '", "iron_steel_production_y": "' + str(participant_df_copy.loc[i, 'iron_steel_production_y'])
            + '", "total_population_x": "' + str(participant_df_copy.loc[i, 'total_population_x'])
            + '", "total_population_y": "' + str(participant_df_copy.loc[i, 'total_population_y'])
            + '", "urban_population_x": "' + str(participant_df_copy.loc[i, 'urban_population_x'])
            + '", "urban_population_y": "' + str(participant_df_copy.loc[i, 'urban_population_y'])
            + '", "cinc_score_x": "' + str(participant_df_copy.loc[i, 'cinc_score_x'])
            + '", "cinc_score_y": "' + str(participant_df_copy.loc[i, 'cinc_score_y'])
            + '"},\n'
        )
        if i+1==len(participant_df_copy['state_name']):
            add_line = deepcopy(add_line[:-2] + '\n')
        graph_file.write(add_line)

    graph_file.write(middle_line_2)


    ## sometimes a country could be in the dyad and not in the participant df_copy.
    ## this is rare but has happened (see spain in WWII)
    ## for this reason, a full outer join or something like it must incorporate/address all that is in both dataframes

    for i, node_1 in enumerate(dyad_df_copy['c_code_a']):
        if dyad_df_copy.loc[i, 'c_code_a'] == -8:
            node_1_input = dyad_df_copy.loc[i, 'state_name_a']
        else:
            node_1_input = node_1
        if dyad_df_copy.loc[i, 'c_code_b'] == -8:
            node_2_input = dyad_df_copy.loc[i, 'state_name_b']
        else:
            node_2_input = dyad_df_copy.loc[i, 'c_code_b']
        if node_1_input not in node_names:
            print("{} is missing from participant_df for {}.".format(dyad_df_copy.loc[i, 'state_name_a'], war_df_copy.loc[0, 'war_name']))   
        elif node_2_input not in node_names:
            print("{} is missing from participant_df for {}.".format(dyad_df_copy.loc[i, 'state_name_b'], war_df_copy.loc[0, 'war_name']))   
        else:
            add_line = (
#                 '    {"source": ' + str(node_names.index(node_1_input))
#                 + ', "target": ' + str(node_names.index(node_2_input))
                '    {"source": ' + node_1_input
                + ', "target": ' + node_2_input
                + ', "first_year": ' + str(dyad_df_copy.loc[i, 'first_year'].astype(float))
                + ', "defense_alliance": ' + str(dyad_df_copy.loc[i, 'defense_alliance'].astype(float))
                + ', "neutrality_alliance": ' + str(dyad_df_copy.loc[i, 'neutrality_alliance'].astype(float))
                + ', "entente_alliance": ' + str(dyad_df_copy.loc[i, 'entente_alliance'].astype(float))
                + ', "land_contiguity": ' + str(dyad_df_copy.loc[i, 'land_contiguity'].astype(int))
                + ', "sea_contiguity": ' + str(dyad_df_copy.loc[i, 'sea_contiguity'].astype(int))
                + ', "total_contiguity": ' + str(dyad_df_copy.loc[i, 'total_contiguity'].astype(int))
                + ', "bond": ' + str(1)) + '},\n'
            
            if i+1==len(dyad_df_copy['state_name_a']):
                add_line = deepcopy(add_line[:-2] + '\n')
                
            graph_file.write(add_line)

    graph_file.write(end_line)

Yemen is missing from participant_df for Third Al-Houthi Rebellion.
Ukraine is missing from participant_df for Donbas War.
Al-Shabaab  is missing from participant_df for Somali-Al-Shabaab war.
Al-Shabaab  is missing from participant_df for Somali-Al-Shabaab war.
Al-Shabaab  is missing from participant_df for Somali-Al-Shabaab war.
Al-Shabaab  is missing from participant_df for Somali-Al-Shabaab war.
Al-Shabaab  is missing from participant_df for Somali-Al-Shabaab war.
Al-Shabaab  is missing from participant_df for Somali-Al-Shabaab war.
Al-Shabaab  is missing from participant_df for Somali-Al-Shabaab war.
Mali is missing from participant_df for North Mali War.
Libya is missing from participant_df for Libyan Civil War.
Libya is missing from participant_df for Libyan Civil War.
Libya is missing from participant_df for Libyan Civil War.
Libya is missing from participant_df for Libyan Civil War.
Libya is missing from participant_df for Libyan Civil War.
Libya is missing from participant_df

Ethiopia is missing from participant_df for Second Ogaden War phase 1.
Indonesia is missing from participant_df for East Timorese War phase 3.
South Africa is missing from participant_df for War over Angola.
Cuba is missing from participant_df for War over Angola.
South Africa is missing from participant_df for War over Angola.
Angola is missing from participant_df for War over Angola.
Morocco is missing from participant_df for Western Saharan.
Cyprus is missing from participant_df for Turco-Cypriot.
Greece is missing from participant_df for Turco-Cypriot.
Iraq is missing from participant_df for Fourth Iraqi Kurds War.
Chile is missing from participant_df for Chilean Coup.
Saudi Arabia is missing from participant_df for Yom Kippur War.
Jordan is missing from participant_df for Yom Kippur War.
Iraq is missing from participant_df for Yom Kippur War.
Israel is missing from participant_df for Yom Kippur War.
Syria is missing from participant_df for Yom Kippur War.
Pakistan is missing from 

Poland is missing from participant_df for World War II.
Poland is missing from participant_df for World War II.
Italy is missing from participant_df for World War II.
France is missing from participant_df for World War II.
Belgium is missing from participant_df for World War II.
France is missing from participant_df for World War II.
United Kingdom is missing from participant_df for World War II.
United Kingdom is missing from participant_df for World War II.
United Kingdom is missing from participant_df for World War II.
Germany is missing from participant_df for World War II.
France is missing from participant_df for World War II.
Netherlands is missing from participant_df for World War II.
South Africa is missing from participant_df for World War II.
Italy is missing from participant_df for World War II.
New Zealand is missing from participant_df for World War II.
South Africa is missing from participant_df for World War II.
Greece is missing from participant_df for World War II.
Ru

United States is missing from participant_df for Moro Rebellion.
Turkey is missing from participant_df for Second Balkan.
Romania is missing from participant_df for Second Balkan.
Yugoslavia is missing from participant_df for Second Balkan.
Greece is missing from participant_df for Second Balkan.
China is missing from participant_df for Second Nationalist Revolution.
France is missing from participant_df for Moroccan Berber.
Spain is missing from participant_df for Moroccan Berber.
Ecuador is missing from participant_df for Ecuadorian Civil War.
Bulgaria is missing from participant_df for First Balkan.
Greece is missing from participant_df for First Balkan.
Turkey is missing from participant_df for First Balkan.
China is missing from participant_df for First Nationalist Revolution.
Paraguay is missing from participant_df for Paraguayan Radical Liberal Rebellion.
Turkey is missing from participant_df for Italian-Turkish.
Mexico is missing from participant_df for Third Mexican War.
Mexic

China is missing from participant_df for Xinjiang Muslim Revolt.
Brazil is missing from participant_df for Lopez.
Argentina is missing from participant_df for Lopez.
Italy is missing from participant_df for Bandit War.
United Kingdom is missing from participant_df for British Umbeyla Campaign.
Ecuador is missing from participant_df for Ecuadorian-Colombian.
Argentina is missing from participant_df for Second Argentina War.
Russia is missing from participant_df for Second Polish War.
United States is missing from participant_df for Shimonoseki War.
United Kingdom is missing from participant_df for British-Maori.
France is missing from participant_df for Franco-Mexican.
Ottoman Empire is missing from participant_df for Third Turco-Montenegrin War.
United States of America is missing from participant_df for U.S. Civil War.
Italy is missing from participant_df for Italian-Roman.
Two Sicilies is missing from participant_df for Neapolitan.
Colombia is missing from participant_df for Third Co

In [46]:
file_df.to_csv('war_file_list.csv', index = None)