In [1]:
from warnings import filterwarnings

filterwarnings('ignore')

import pandas as pd
import numpy as np
from copy import deepcopy

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [3]:
pwd

'/Users/charlieyaris/github/cyaris.github.io/assets/the_networks_of_war'

In [4]:
df = pd.read_csv('/Users/the_networks_of_war/data_sources/Inter-StateWarData_v4.0.csv', encoding = 'latin-1')

aggregations = {
    'ccode': 'count'
    }

grouped_df = df.groupby(['WarName', 'WarNum']).agg(aggregations).reset_index()
grouped_df.rename({'ccode': 'total_instances'}, axis = 1, inplace = True)
grouped_df.sort_values(by = 'total_instances', ascending = False).head(10)

Unnamed: 0,WarName,WarNum,total_instances
93,World War II,139,29
44,Korean,151,16
92,World War I,106,15
34,Gulf War,211,14
70,Seven Weeks,55,11
86,War for Kosovo,221,8
84,"Vietnam War, Phase 2",163,8
94,Yom Kippur War,181,6
1,Arab-Israeli,148,6
8,Boxer Rebellion,82,6


In [5]:
dyad_df = pd.read_csv('/Users/the_networks_of_war/data_sources/directed_dyadic_war.csv', encoding = 'latin-1')
dyad_df.rename({'warnum': 'war_num', 'statea': 'c_code_a', 'stateb': 'c_code_b'}, axis = 1, inplace = True)
dyad_df = deepcopy(dyad_df[(dyad_df['war_num']==139) & (dyad_df['durindx']==1)][['war_num', 'c_code_a', 'c_code_b', 'year']])

In [6]:
c_code_df = pd.read_csv('/Users/the_networks_of_war/data_sources/COW country codes.csv', encoding = 'utf8')
c_code_df.rename({'CCode': 'c_code',
                  'StateNme': 'state_name'}, axis = 1, inplace = True)
c_code_df.drop(['StateAbb'], axis = 1, inplace = True)

In [7]:
c_code_df.rename({'c_code': 'c_code_a',
                  'state_name': 'state_name_a'}, axis = 1, inplace = True)
dyad_df = deepcopy(pd.merge(c_code_df, dyad_df, on = 'c_code_a'))
c_code_df.rename({'c_code_a': 'c_code_b',
                  'state_name_a': 'state_name_b'}, axis = 1, inplace = True)
dyad_df = deepcopy(pd.merge(c_code_df, dyad_df, on = 'c_code_b'))

In [8]:
dyadic_trade_df = pd.read_csv('/Users/the_networks_of_war/data_sources/Dyadic_COW_4.0.csv', encoding = 'utf8')

dyadic_trade_df.rename({'ccode1': 'c_code_a',
                        'ccode2': 'c_code_b',
                        'flow1': 'money_flow_out_a',
                        'flow2': 'money_flow_in_a'}, axis = 1, inplace = True)

dyadic_trade_df['money_flow_out_a'] = ([s * 1000000 for s in dyadic_trade_df['money_flow_out_a']])
dyadic_trade_df['money_flow_in_a'] = ([s * 1000000 for s in dyadic_trade_df['money_flow_in_a']])
dyadic_trade_df.loc[dyadic_trade_df['money_flow_out_a'] == -9000000, 'money_flow_out_a'] = None
dyadic_trade_df.loc[dyadic_trade_df['money_flow_in_a'] == -9000000, 'money_flow_in_a'] = None

dyadic_trade_df = deepcopy(dyadic_trade_df[['year',
                                            'c_code_a',
                                            'c_code_b',
                                            'money_flow_out_a',
                                            'money_flow_in_a']])

In [9]:
## need to union to take summations but won't need to dedupe because there are no duplicates between a and b.
# this means a can be summed on its own when it's combined with b.

dyadic_trade_union_df = deepcopy(dyadic_trade_df)
## doing these inefficient column name changes to fill in for a much needed sql union of mismatching column names
dyadic_trade_union_df.rename({'c_code_a': 'c_code_a_new',
                      'c_code_b': 'c_code_b_new',
                      'money_flow_out_a': 'money_flow_out_a_new',
                      'money_flow_in_a': 'money_flow_in_a_new'}, axis = 1, inplace = True)

dyadic_trade_union_df.rename({'c_code_a_new': 'c_code_b',
                      'c_code_b_new': 'c_code_a',
                      'money_flow_out_a_new': 'money_flow_in_a',
                      'money_flow_in_a_new': 'money_flow_out_a'}, axis = 1, inplace = True)

dyadic_trade_df = deepcopy(pd.concat([dyadic_trade_df, dyadic_trade_union_df], ignore_index = True).reset_index())

In [10]:
aggregations = {
    'money_flow_in_a': 'sum',
    'money_flow_out_a': 'sum',
    }

trade_df_1 = dyadic_trade_df.groupby(['c_code_a', 'year']).agg(aggregations).reset_index()

In [11]:
trade_df_2 = pd.read_csv('/Users/the_networks_of_war/data_sources/National_COW_4.0.csv', encoding = 'latin-1')
trade_df_2.rename({'ccode': 'c_code_a', 'imports': 'imports_a', 'exports': 'exports_a'}, axis = 1, inplace = True)
trade_df_2 = deepcopy(trade_df_2[['c_code_a', 'year', 'imports_a', 'exports_a']])

trade_start_df = deepcopy(pd.merge(trade_df_1, trade_df_2, on = ['c_code_a', 'year']))
trade_start_df.rename({'year': 'start_year_a',
                       'money_flow_in_a': 'money_flow_in_start_a',
                       'money_flow_out_a': 'money_flow_out_start_a',
                       'imports_a': 'imports_start_a',
                       'exports_a': 'exports_start_a'}, axis = 1, inplace = True)

## will need to join on start year of conflict and end year of conflict separately

trade_end_df = deepcopy(trade_start_df)
trade_end_df.rename({'start_year_a': 'end_year_a',
                       'money_flow_in_start_a': 'money_flow_in_end_a',
                       'money_flow_out_start_a': 'money_flow_out_end_a',
                       'imports_start_a': 'imports_end_a',
                       'exports_start_a': 'exports_end_a'}, axis = 1, inplace = True)

In [12]:
mil_cap_start_df = pd.read_csv('/Users/the_networks_of_war/data_sources/NMC_5_0-wsupplementary.csv', encoding = 'latin-1')

mil_cap_start_df.rename({'year': 'start_year_a',
                   'milex': 'military_expenditure_start_a',
                   'milper': 'military_personnel_start_a',
                   'irst': 'iron_steel_production_start_a',
                   'pec': 'prim_energy_consumption_start_a',
                   'tpop': 'total_population_start_a',
                   'upop': 'urban_population_start_a',
                   'upopgrowth': 'urban_pop_growth_rate_start_a',
                   'ccode': 'c_code_a',
#                    'statenme': 'state_name_a',
                   'cinc': 'cinc_score_start_a'}, axis = 1, inplace = True)

mil_cap_start_df['military_expenditure_start_a'] = ([s * 1000 for s in mil_cap_start_df['military_expenditure_start_a']])
mil_cap_start_df['military_personnel_start_a'] = ([s * 1000 for s in mil_cap_start_df['military_personnel_start_a']])
mil_cap_start_df['total_population_start_a'] = ([s * 1000 for s in mil_cap_start_df['total_population_start_a']])
mil_cap_start_df['urban_population_start_a'] = ([s * 1000 for s in mil_cap_start_df['urban_population_start_a']])
mil_cap_start_df['iron_steel_production_start_a'] = ([s * 2000000 for s in mil_cap_start_df['iron_steel_production_start_a']])
mil_cap_start_df['prim_energy_consumption_start_a'] = ([s * 2000000 for s in mil_cap_start_df['prim_energy_consumption_start_a']])

mil_cap_start_df.loc[mil_cap_start_df['military_expenditure_start_a'] == -9000, 'military_expenditure_start_a'] = 0
mil_cap_start_df.loc[mil_cap_start_df['military_personnel_start_a'] == -9000, 'military_personnel_start_a'] = 0
mil_cap_start_df.loc[mil_cap_start_df['total_population_start_a'] == -9000, 'total_population_start_a'] = 0
mil_cap_start_df.loc[mil_cap_start_df['urban_population_start_a'] == -9000, 'urban_population_start_a'] = 0
mil_cap_start_df.loc[mil_cap_start_df['iron_steel_production_start_a'] == -18000000 , 'iron_steel_production_start_a'] = 0
mil_cap_start_df.loc[mil_cap_start_df['prim_energy_consumption_start_a'] == -18000000 , 'prim_energy_consumption_start_a'] = 0

mil_cap_start_df = mil_cap_start_df.sort_values(by = 'start_year_a', ascending = True).reset_index()

mil_cap_start_df = deepcopy(mil_cap_start_df[['c_code_a',
                                  'start_year_a',
                                  'military_expenditure_start_a',
                                  'military_personnel_start_a',
                                  'prim_energy_consumption_start_a',
                                  'iron_steel_production_start_a',
                                  'total_population_start_a',
                                  'urban_population_start_a',
                                  'cinc_score_start_a']])

## will need to join on start year of conflict and end year of conflict separately

mil_cap_end_df = deepcopy(mil_cap_start_df)
mil_cap_end_df.rename({'start_year_a': 'end_year_a',
                       'military_expenditure_start_a': 'military_expenditure_end_a',
                       'military_personnel_start_a': 'military_personnel_end_a',
                       'prim_energy_consumption_start_a': 'prim_energy_consumption_end_a',
                       'iron_steel_production_start_a': 'iron_steel_production_end_a',
                       'total_population_start_a': 'total_population_end_a',
                       'urban_population_start_a': 'urban_population_end_a',
                       'cinc_score_start_a': 'cinc_score_end_a'}, axis = 1, inplace = True)

In [13]:
participant_df = pd.read_csv('/Users/the_networks_of_war/data_sources/Inter-StateWarData_v4.0.csv', encoding = 'latin-1')
participant_df['start_date'] = pd.to_datetime(participant_df['StartYear1'].astype(str) + "-" + participant_df['StartMonth1'].astype(str) + "-" + participant_df['StartDay1'].astype(str))
participant_df['end_date'] = pd.to_datetime(participant_df['EndYear1'].astype(str) + "-" + participant_df['EndMonth1'].astype(str) + "-" + participant_df['EndDay1'].astype(str))
participant_df['days_at_war'] = participant_df['end_date'] - participant_df['start_date']
participant_df = deepcopy(participant_df[participant_df['WarNum']==139][['WarNum',
                                                                         'WarName',
                                                                         'WarType',
                                                                         'ccode',
                                                                         'Side',
                                                                         'BatDeath',
                                                                         'start_date',
                                                                         'StartYear1',
                                                                         'end_date',
                                                                         'EndYear1',
                                                                         'days_at_war']])
participant_df.rename({'WarNum': 'war_num',
                       'WarName': 'war_name',
                       'WarType': 'war_type',
                       'ccode': 'c_code_a',
                       'Side': 'side_a',
                       'BatDeath': 'battle_deaths_a',
                       'start_date': 'start_date_a',
                       'StartYear1': 'start_year_a',
                       'end_date': 'end_date_a',
                       'EndYear1': 'end_year_a',
                       'days_at_war': 'days_at_war_a'}, axis = 1, inplace = True)

In [14]:
dyad_df = deepcopy(pd.merge(dyad_df, participant_df, on = ['war_num', 'c_code_a']))
participant_df.drop(['war_name', 'war_type'], axis = 1, inplace = True)
participant_df.rename({'c_code_a': 'c_code_b',
                       'side_a': 'side_b',
                       'battle_deaths_a': 'battle_deaths_b',
                       'start_date_a': 'start_date_b',
                       'start_year_a': 'start_year_b',
                       'end_date_a': 'end_date_b',
                       'end_year_a': 'end_year_b',
                       'days_at_war_a': 'days_at_war_b'}, axis = 1, inplace = True)
dyad_df = deepcopy(pd.merge(dyad_df, participant_df, on = ['war_num', 'c_code_b']))

In [15]:
dyad_df = deepcopy(pd.merge(dyad_df, trade_start_df, on = ['start_year_a', 'c_code_a']))

trade_start_df.rename({'c_code_a': 'c_code_b',
                   'start_year_a': 'start_year_b',
                   'money_flow_in_start_a': 'money_flow_in_start_b',
                   'money_flow_out_start_a': 'money_flow_out_start_b',
                   'imports_start_a': 'imports_start_b',
                   'exports_start_a': 'exports_start_b'}, axis = 1, inplace = True)
dyad_df = deepcopy(pd.merge(dyad_df, trade_start_df, on = ['start_year_b', 'c_code_b']))

dyad_df = deepcopy(pd.merge(dyad_df, trade_end_df, on = ['end_year_a', 'c_code_a']))

trade_end_df.rename({'c_code_a': 'c_code_b',
                     'end_year_a': 'end_year_b',
                     'money_flow_in_end_a': 'money_flow_in_end_b',
                     'money_flow_out_end_a': 'money_flow_out_end_b',
                     'imports_end_a': 'imports_end_b',
                     'exports_end_a': 'exports_end_b'}, axis = 1, inplace = True)

dyad_df = deepcopy(pd.merge(dyad_df, trade_end_df, on = ['end_year_b', 'c_code_b']))

In [16]:
dyad_df = deepcopy(pd.merge(dyad_df, mil_cap_start_df, on = ['start_year_a', 'c_code_a']))

mil_cap_start_df.rename({'c_code_a': 'c_code_b',
                         'start_year_a': 'start_year_b',
                         'military_expenditure_start_a': 'military_expenditure_start_b',
                         'military_personnel_start_a': 'military_personnel_start_b',
                         'prim_energy_consumption_start_a': 'prim_energy_consumption_start_b',
                         'iron_steel_production_start_a': 'iron_steel_production_start_b',
                         'total_population_start_a': 'total_population_start_b',
                         'urban_population_start_a': 'urban_population_start_b',
                         'cinc_score_start_a': 'cinc_score_start_b'}, axis = 1, inplace = True)

dyad_df = deepcopy(pd.merge(dyad_df, mil_cap_start_df, on = ['start_year_b', 'c_code_b']))

dyad_df = deepcopy(pd.merge(dyad_df, mil_cap_end_df, on = ['end_year_a', 'c_code_a']))

mil_cap_end_df.rename({'c_code_a': 'c_code_b',
                       'end_year_a': 'end_year_b',
                       'military_expenditure_end_a': 'military_expenditure_end_b',
                       'military_personnel_end_a': 'military_personnel_end_b',
                       'prim_energy_consumption_end_a': 'prim_energy_consumption_end_b',
                       'iron_steel_production_end_a': 'iron_steel_production_end_b',
                       'total_population_end_a': 'total_population_end_b',
                       'urban_population_end_a': 'urban_population_end_b',
                       'cinc_score_end_a': 'cinc_score_end_b'}, axis = 1, inplace = True)

dyad_df = deepcopy(pd.merge(dyad_df, mil_cap_end_df, on = ['end_year_b', 'c_code_b']))

In [17]:
dyad_union_df = deepcopy(dyad_df)

## doing these inefficient column name changes to fill in for a much needed sql union of mismatching column names
dyad_union_df.rename({'c_code_a': 'c_code_a_new',
                      'c_code_b': 'c_code_b_new',
                      'state_name_a': 'state_name_a_new',
                      'state_name_b': 'state_name_b_new',
                      'side_a': 'side_a_new',
                      'side_b': 'side_b_new',
                      'battle_deaths_a': 'battle_deaths_a_new',
                      'battle_deaths_b': 'battle_deaths_b_new',
                      'start_date_a': 'start_date_a_new',
                      'start_date_b': 'start_date_b_new',
                      'end_date_a': 'end_date_a_new',
                      'end_date_b': 'end_date_b_new',
                      'days_at_war_a': 'days_at_war_a_new',
                      'days_at_war_b': 'days_at_war_b_new',
                      'money_flow_in_start_a': 'money_flow_in_start_a_new',
                      'money_flow_in_start_b': 'money_flow_in_start_b_new',
                      'money_flow_out_start_a': 'money_flow_out_start_a_new',
                      'money_flow_out_start_b': 'money_flow_out_start_b_new',
                      'imports_start_a': 'imports_start_a_new',
                      'imports_start_b': 'imports_start_b_new',
                      'exports_start_a': 'exports_start_a_new',
                      'exports_start_b': 'exports_start_b_new',
                      'money_flow_in_end_a': 'money_flow_in_end_a_new',
                      'money_flow_in_end_b': 'money_flow_in_end_b_new',
                      'money_flow_out_end_a': 'money_flow_out_end_a_new',
                      'money_flow_out_end_b': 'money_flow_out_end_b_new',
                      'imports_end_a': 'imports_end_a_new',
                      'imports_end_b': 'imports_end_b_new',
                      'exports_end_a': 'exports_end_a_new',
                      'exports_end_b': 'exports_end_b_new',
                      'military_expenditure_start_a': 'military_expenditure_start_a_new',
                      'military_expenditure_start_b': 'military_expenditure_start_b_new',
                      'military_personnel_start_a': 'military_personnel_start_a_new',
                      'military_personnel_start_b': 'military_personnel_start_b_new',
                      'prim_energy_consumption_start_a': 'prim_energy_consumption_start_a_new',
                      'prim_energy_consumption_start_b': 'prim_energy_consumption_start_b_new',
                      'iron_steel_production_start_a': 'iron_steel_production_start_a_new',
                      'iron_steel_production_start_b': 'iron_steel_production_start_b_new',
                      'total_population_start_a': 'total_population_start_a_new',
                      'total_population_start_b': 'total_population_start_b_new',
                      'urban_population_start_a': 'urban_population_start_a_new',
                      'urban_population_start_b': 'urban_population_start_b_new',
                      'cinc_score_start_a': 'cinc_score_start_a_new',
                      'cinc_score_start_b': 'cinc_score_start_b_new',
                      'military_expenditure_end_a': 'military_expenditure_end_a_new',
                      'military_expenditure_end_b': 'military_expenditure_end_b_new',
                      'military_personnel_end_a': 'military_personnel_end_a_new',
                      'military_personnel_end_b': 'military_personnel_end_b_new',
                      'prim_energy_consumption_end_a': 'prim_energy_consumption_end_a_new',
                      'prim_energy_consumption_end_b': 'prim_energy_consumption_end_b_new',
                      'iron_steel_production_end_a': 'iron_steel_production_end_a_new',
                      'iron_steel_production_end_b': 'iron_steel_production_end_b_new',
                      'total_population_end_a': 'total_population_end_a_new',
                      'total_population_end_b': 'total_population_end_b_new',
                      'urban_population_end_a': 'urban_population_end_a_new',
                      'urban_population_end_b': 'urban_population_end_b_new',
                      'cinc_score_end_a': 'cinc_score_end_a_new',
                      'cinc_score_end_b': 'cinc_score_end_b_new'}, axis = 1, inplace = True)

dyad_union_df.rename({'c_code_a_new': 'c_code_b',
                      'c_code_b_new': 'c_code_a',
                      'state_name_a_new': 'state_name_b',
                      'state_name_b_new': 'state_name_a',
                      'side_a_new': 'side_b',
                      'side_b_new': 'side_a',
                      'battle_deaths_a_new': 'battle_deaths_b',
                      'battle_deaths_b_new': 'battle_deaths_a',
                      'start_date_a_new': 'start_date_b',
                      'start_date_b_new': 'start_date_a',
                      'end_date_a_new': 'end_date_b',
                      'end_date_b_new': 'end_date_a',
                      'days_at_war_a_new': 'days_at_war_b',
                      'days_at_war_b_new': 'days_at_war_a',
                      'money_flow_in_start_a_new': 'money_flow_in_start_b',
                      'money_flow_in_start_b_new': 'money_flow_in_start_a',
                      'money_flow_out_start_a_new': 'money_flow_out_start_b',
                      'money_flow_out_start_b_new': 'money_flow_out_start_a',
                      'imports_start_a_new': 'imports_start_b',
                      'imports_start_b_new': 'imports_start_a',
                      'exports_start_a_new': 'exports_start_b',
                      'exports_start_b_new': 'exports_start_a',
                      'money_flow_in_end_a_new': 'money_flow_in_end_b',
                      'money_flow_in_end_b_new': 'money_flow_in_end_a',
                      'money_flow_out_end_a_new': 'money_flow_out_end_b',
                      'money_flow_out_end_b_new': 'money_flow_out_end_a',
                      'imports_end_a_new': 'imports_end_b',
                      'imports_end_b_new': 'imports_end_a',
                      'exports_end_a_new': 'exports_end_b',
                      'exports_end_b_new': 'exports_end_a',
                      'military_expenditure_start_a_new': 'military_expenditure_start_b',
                      'military_expenditure_start_b_new': 'military_expenditure_start_a',
                      'military_personnel_start_a_new': 'military_personnel_start_b',
                      'military_personnel_start_b_new': 'military_personnel_start_a',
                      'prim_energy_consumption_start_a_new': 'prim_energy_consumption_start_b',
                      'prim_energy_consumption_start_b_new': 'prim_energy_consumption_start_a',
                      'iron_steel_production_start_a_new': 'iron_steel_production_start_b',
                      'iron_steel_production_start_b_new': 'iron_steel_production_start_a',
                      'total_population_start_a_new': 'total_population_start_b',
                      'total_population_start_b_new': 'total_population_start_a',
                      'urban_population_start_a_new': 'urban_population_start_b',
                      'urban_population_start_b_new': 'urban_population_start_a',
                      'cinc_score_start_a_new': 'cinc_score_start_b',
                      'cinc_score_start_b_new': 'cinc_score_start_a',
                      'military_expenditure_end_a_new': 'military_expenditure_end_b',
                      'military_expenditure_end_b_new': 'military_expenditure_end_a',
                      'military_personnel_end_a_new': 'military_personnel_end_b',
                      'military_personnel_end_b_new': 'military_personnel_end_a',
                      'prim_energy_consumption_end_a_new': 'prim_energy_consumption_end_b',
                      'prim_energy_consumption_end_b_new': 'prim_energy_consumption_end_a',
                      'iron_steel_production_end_a_new': 'iron_steel_production_end_b',
                      'iron_steel_production_end_b_new': 'iron_steel_production_end_a',
                      'total_population_end_a_new': 'total_population_end_b',
                      'total_population_end_b_new': 'total_population_end_a',
                      'urban_population_end_a_new': 'urban_population_end_b',
                      'urban_population_end_b_new': 'urban_population_end_a',
                      'cinc_score_end_a_new': 'cinc_score_end_b',
                      'cinc_score_end_b_new': 'cinc_score_end_a'}, axis = 1, inplace = True)

dyad_df = deepcopy(pd.concat([dyad_df, dyad_union_df], ignore_index = True).reset_index())

In [18]:
for i, country_code_a in enumerate(dyad_df['c_code_a']):
    dyad_df.loc[i, 'days_at_war_a'] = int(str(dyad_df.loc[i, 'days_at_war_a']).split(' ')[0])
    dyad_df.loc[i, 'days_at_war_b'] = int(str(dyad_df.loc[i, 'days_at_war_b']).split(' ')[0])
    if int(country_code_a) < int(dyad_df.loc[i, 'c_code_b']):
        dyad_df.loc[i, 'conflict_pair'] = str(country_code_a) + " " + str(dyad_df.loc[i, 'c_code_b'])
    else:
        dyad_df.loc[i, 'conflict_pair'] = str(dyad_df.loc[i, 'c_code_b']) + " " + str(country_code_a)

dyad_df.sort_values(by = 'year', ascending = True, inplace = True)
connections_df = deepcopy(dyad_df[['war_num',
                                   'war_name',
                                   'war_type',
                                   'c_code_a',
                                   'state_name_a',
                                   'c_code_b',
                                   'state_name_b',
                                   'conflict_pair']])
duplicate_list = ['conflict_pair']
connections_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
connections_df = deepcopy(connections_df.reset_index(drop = True))

## keeping final non-null value recorded for total lost in battle
dyad_df.sort_values(by = ['c_code_a','battle_deaths_a'], ascending = (True, False), inplace = True)
countries_df = deepcopy(dyad_df[['war_num',
                                 'war_name',
                                 'war_type',
                                 'c_code_a',
                                 'state_name_a',
                                 'battle_deaths_a',
                                 'side_a',
                                 'start_year_a',
                                 'start_date_a',
                                 'end_year_a',
                                 'end_date_a',
                                 'days_at_war_a',
                                 'money_flow_out_start_a',
                                 'money_flow_out_end_a',
                                 'money_flow_in_start_a',
                                 'money_flow_in_end_a',
                                 'imports_start_a',
                                 'imports_end_a',
                                 'exports_start_a',
                                 'exports_end_a',
                                 'military_expenditure_start_a',
                                 'military_expenditure_end_a',
                                 'military_personnel_start_a',
                                 'military_personnel_end_a',
                                 'prim_energy_consumption_start_a',
                                 'prim_energy_consumption_end_a',
                                 'iron_steel_production_start_a',
                                 'iron_steel_production_end_a',
                                 'total_population_start_a',
                                 'total_population_end_a',
                                 'urban_population_end_a',
                                 'urban_population_start_a',
                                 'cinc_score_start_a',
                                 'cinc_score_end_a'
                                ]])
duplicate_list = ['c_code_a']
countries_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
countries_df = deepcopy(countries_df.reset_index(drop=True))

In [19]:
countries_df.head()

Unnamed: 0,war_num,war_name,war_type,c_code_a,state_name_a,battle_deaths_a,side_a,start_year_a,start_date_a,end_year_a,end_date_a,days_at_war_a,money_flow_out_start_a,money_flow_out_end_a,money_flow_in_start_a,money_flow_in_end_a,imports_start_a,imports_end_a,exports_start_a,exports_end_a,military_expenditure_start_a,military_expenditure_end_a,military_personnel_start_a,military_personnel_end_a,prim_energy_consumption_start_a,prim_energy_consumption_end_a,iron_steel_production_start_a,iron_steel_production_end_a,total_population_start_a,total_population_end_a,urban_population_end_a,urban_population_start_a,cinc_score_start_a,cinc_score_end_a
0,139,World War II,1,2,United States of America,405400,1,1941,1941-12-07,1945,1945-08-14,1346,325039996.97,466910992.01,753730010.1098,1486953455.2,3392.0,4186.0,5153.0,9897.0,6301000000,90000000000,1801000,12123000,2062428000000,2438136000000,150300000000,144608000000,133402000.0,139928000.0,39100000.0,38183000.0,0.24449,0.38386
1,139,World War II,1,20,Canada,41992,1,1939,1939-09-10,1945,1945-08-14,2165,26064467.6512,14441043.0902,64344856.72,47516796.0,751.04999,1414.54541,924.91998,2970.0,125700000,2664365000,6000,801000,81928000000,112186000000,2814000000,5222000000,11267000.0,12072000.0,2845000.0,2589000.0,0.00909,0.01704
2,139,World War II,1,140,Brazil,1000,1,1944,1944-07-06,1945,1945-05-07,305,109999.999,279999.998,21820000.0,2900000.1,415.18405,448.58902,548.41516,625.85944,144000000,171180000,0,0,7392000000,8800000000,442000000,412000000,45141000.0,46215000.0,5243000.0,4990000.0,0.01104,0.01114
3,139,World War II,1,200,United Kingdom,418765,1,1940,1939-09-03,1941,1945-08-14,2172,977959507.8,358150755.60957,462190716.6,142074069.0,4054.54541,4608.3335,2240.90918,1875.0,7895671000,17002048000,394000,5090000,439372000000,420850000000,26866000000,24028000000,47762000.0,49182000.0,18705000.0,18429000.0,0.09968,0.08799
4,139,World War II,1,210,Netherlands,7900,1,1939,1940-05-10,1945,1940-05-14,4,34099999.9998,34099999.9998,269530000.048,269530000.048,542.98163,542.98163,344.80927,344.80927,168422000,168422000,64000,64000,25336000000,25336000000,306000000,306000000,8880000.0,8880000.0,2418000.0,2418000.0,0.00505,0.00505


In [20]:
## manually filling in missing battle numbers from google search.
## https://en.wikipedia.org/wiki/World_War_II_casualties
## to be investigated across all cow datasets later for whether they are located elsewhere.

# countries_df.loc[(countries_df['state_name_a']=='Japan') & (countries_df['warnum']==139), 'batdtha'] = 2300000
# countries_df.loc[(countries_df['state_name_a']=='France') & (countries_df['warnum']==139), 'batdtha'] = 600000

In [22]:
network_nodes = list(countries_df['state_name_a'])

graph_file = open("the_networks_of_war.json", 'w+')

json_dic = {}
json_dic['nodes'] = {}
json_dic['links'] = {}
start_line = '{\n  "nodes": [\n'
middle_line = '  ],\n  "links": [\n'
end_line = '\n  ]\n}'

graph_file.write(start_line)
for i, node in enumerate(countries_df['state_name_a']):
    if node == network_nodes[-1]:
        add_line = (
            '    {"country": "' + node
            + '", "side": "' + str(countries_df.loc[i, 'side_a'])
            + '", "battle_deaths": "' + str(countries_df.loc[i, 'battle_deaths_a'])
            + '", "start_year": "' + str(countries_df.loc[i, 'start_year_a'])
            + '", "start_date": "' + str(countries_df.loc[i, 'start_date_a'])
            + '", "end_year": "' + str(countries_df.loc[i, 'end_year_a'])
            + '", "end_date": "' + str(countries_df.loc[i, 'end_date_a'])
            + '", "days_at_war": "' + str(countries_df.loc[i, 'days_at_war_a'])
            + '", "money_flow_out_start": "' + str(countries_df.loc[i, 'money_flow_out_start_a'])
            + '", "money_flow_out_end": "' + str(countries_df.loc[i, 'money_flow_out_end_a'])
            + '", "money_flow_in_start": "' + str(countries_df.loc[i, 'money_flow_in_start_a'])
            + '", "money_flow_in_end": "' + str(countries_df.loc[i, 'money_flow_in_end_a'])
            + '", "imports_start": "' + str(countries_df.loc[i, 'imports_start_a'])
            + '", "imports_end": "' + str(countries_df.loc[i, 'imports_end_a'])
            + '", "exports_start": "' + str(countries_df.loc[i, 'exports_start_a'])
            + '", "exports_end": "' + str(countries_df.loc[i, 'exports_end_a'])
            + '", "military_expenditure_start": "' + str(countries_df.loc[i, 'military_expenditure_start_a'])
            + '", "military_expenditure_end": "' + str(countries_df.loc[i, 'military_expenditure_end_a'])
            + '", "military_personnel_start": "' + str(countries_df.loc[i, 'military_personnel_start_a'])
            + '", "military_personnel_end": "' + str(countries_df.loc[i, 'military_personnel_end_a'])
            + '", "prim_energy_consumption_start": "' + str(countries_df.loc[i, 'prim_energy_consumption_start_a'])
            + '", "prim_energy_consumption_end": "' + str(countries_df.loc[i, 'prim_energy_consumption_end_a'])
            + '", "iron_steel_production_start": "' + str(countries_df.loc[i, 'iron_steel_production_start_a'])
            + '", "iron_steel_production_end": "' + str(countries_df.loc[i, 'iron_steel_production_end_a'])
            + '", "total_population_start": "' + str(countries_df.loc[i, 'total_population_start_a'])
            + '", "total_population_end": "' + str(countries_df.loc[i, 'total_population_end_a'])
            + '", "urban_population_start": "' + str(countries_df.loc[i, 'urban_population_start_a'])
            + '", "urban_population_end": "' + str(countries_df.loc[i, 'urban_population_end_a'])
            + '", "cinc_score_start": "' + str(countries_df.loc[i, 'cinc_score_start_a'])
            + '", "cinc_score_end": "' + str(countries_df.loc[i, 'cinc_score_end_a']) + '"'
            + '}\n'
        )
        graph_file.write(add_line)
    else:
        add_line = (
            '    {"country": "' + node
            + '", "side": "' + str(countries_df.loc[i, 'side_a'])
            + '", "battle_deaths": "' + str(countries_df.loc[i, 'battle_deaths_a'])
            + '", "start_year": "' + str(countries_df.loc[i, 'start_year_a'])
            + '", "start_date": "' + str(countries_df.loc[i, 'start_date_a'])
            + '", "end_year": "' + str(countries_df.loc[i, 'end_year_a'])
            + '", "end_date": "' + str(countries_df.loc[i, 'end_date_a'])
            + '", "days_at_war": "' + str(countries_df.loc[i, 'days_at_war_a'])
            + '", "money_flow_out_start": "' + str(countries_df.loc[i, 'money_flow_out_start_a'])
            + '", "money_flow_out_end": "' + str(countries_df.loc[i, 'money_flow_out_end_a'])
            + '", "money_flow_in_start": "' + str(countries_df.loc[i, 'money_flow_in_start_a'])
            + '", "money_flow_in_end": "' + str(countries_df.loc[i, 'money_flow_in_end_a'])
            + '", "imports_start": "' + str(countries_df.loc[i, 'imports_start_a'])
            + '", "imports_end": "' + str(countries_df.loc[i, 'imports_end_a'])
            + '", "exports_start": "' + str(countries_df.loc[i, 'exports_start_a'])
            + '", "exports_end": "' + str(countries_df.loc[i, 'exports_end_a'])
            + '", "military_expenditure_start": "' + str(countries_df.loc[i, 'military_expenditure_start_a'])
            + '", "military_expenditure_end": "' + str(countries_df.loc[i, 'military_expenditure_end_a'])
            + '", "military_personnel_start": "' + str(countries_df.loc[i, 'military_personnel_start_a'])
            + '", "military_personnel_end": "' + str(countries_df.loc[i, 'military_personnel_end_a'])
            + '", "prim_energy_consumption_start": "' + str(countries_df.loc[i, 'prim_energy_consumption_start_a'])
            + '", "prim_energy_consumption_end": "' + str(countries_df.loc[i, 'prim_energy_consumption_end_a'])
            + '", "iron_steel_production_start": "' + str(countries_df.loc[i, 'iron_steel_production_start_a'])
            + '", "iron_steel_production_end": "' + str(countries_df.loc[i, 'iron_steel_production_end_a'])
            + '", "total_population_start": "' + str(countries_df.loc[i, 'total_population_start_a'])
            + '", "total_population_end": "' + str(countries_df.loc[i, 'total_population_end_a'])
            + '", "urban_population_start": "' + str(countries_df.loc[i, 'urban_population_start_a'])
            + '", "urban_population_end": "' + str(countries_df.loc[i, 'urban_population_end_a'])
            + '", "cinc_score_start": "' + str(countries_df.loc[i, 'cinc_score_start_a'])
            + '", "cinc_score_end": "' + str(countries_df.loc[i, 'cinc_score_end_a']) + '"'
            + '},\n'
        )
        graph_file.write(add_line)        

graph_file.write(middle_line)

for i, node_1 in enumerate(connections_df['state_name_a']):
    if i+1 != len(connections_df['state_name_a']):
        add_line = (
            '    {"source": ' + str(network_nodes.index(node_1))
            + ', "target": ' + str(network_nodes.index(connections_df.loc[i, 'state_name_b']))
            + ', "bond": ' + str(1)
            + '},\n'
        )
        graph_file.write(add_line)
    else:
        add_line = (
            '    {"source": ' + str(network_nodes.index(node_1))
            + ', "target": ' + str(network_nodes.index(connections_df.loc[i, 'state_name_b']))
            + ', "bond": ' + str(1)
            + '}\n'
        )
        graph_file.write(add_line)

graph_file.write(end_line)

6