In [1]:
from warnings import filterwarnings

filterwarnings('ignore')

import pandas as pd
import numpy as np
from copy import deepcopy
from traceback import format_exc
from pprint import pprint
import the_networks_of_war_python_functions

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

# Descriptive Statistics for Each Country by Year
### Note: Applies to states/countries only.
### This will be joined to the participants of each war

In [3]:
part_df_1 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/alliance_v4.1_by_member_yearly.csv', encoding = 'latin-1')

# print('alliance_v4.1_by_member_yearly columns: \n')
# pprint(sorted(list(part_df_1.columns)))
# print('\n')

part_df_1.rename({'ccode': 'c_code',
                  'defense': 'defense_alliances',
                  'entente': 'entente_alliances',
                  'neutrality': 'neutrality_alliances',
                  'ss_type': 'alliances'}, axis = 1, inplace = True)

part_df_1['defense_alliances'] = part_df_1['defense_alliances'].astype(float)
part_df_1['entente_alliances'] = part_df_1['entente_alliances'].astype(float)
part_df_1['neutrality_alliances'] = part_df_1['neutrality_alliances'].astype(float)

aggregations = {
    'alliances': 'count',
    'defense_alliances': 'sum',
    'entente_alliances': 'sum',
    'neutrality_alliances': 'sum',
    }

part_df_1 = deepcopy(part_df_1.groupby(['c_code', 'year']).agg(aggregations).reset_index())

In [4]:
## dyadic trade data that will need to be adjusted to be non-dyadic (by country, by year)
part_df_2_1 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Dyadic_COW_4.0.csv', encoding = 'utf8')
part_df_2_1.rename({'ccode1': 'c_code_a',
                    'ccode2': 'c_code_b',
                    'flow2': 'money_flow_in_a',
                    ## money flow out
                    'flow1': 'money_flow_in_b'}, axis = 1, inplace = True)

# print('Dyadic_COW_4.0 columns: \n')
# pprint(sorted(list(part_df_2_1.columns)))
# print('\n')

## need to union to take summations but won't need to dedupe because there are no duplicates between a and b.
# this means a can be summed on its own when it's combined with b.
switched_columns_list = ['c_code_a',
                         'c_code_b',
                         'money_flow_in_a',
                         'money_flow_in_b']
part_df_2_1 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(part_df_2_1, switched_columns_list))
part_df_2_1.rename({'money_flow_in_a': 'money_flow_in',
                    'money_flow_in_b': 'money_flow_out'}, axis = 1, inplace = True)

aggregations = {'money_flow_in': 'sum',
                'money_flow_out': 'sum'}
part_df_2_1 = part_df_2_1.groupby(['c_code_a', 'year']).agg(aggregations).reset_index()
part_df_2_1.rename({'c_code_a':'c_code'}, axis = 1, inplace = True)

part_df_2_2 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/National_COW_4.0.csv', encoding = 'latin-1')
part_df_2_2.rename({'ccode': 'c_code'}, axis = 1, inplace = True)

# print('National_COW_4.0 columns: \n')
# pprint(sorted(list(trade_df_2.columns)))
# print('\n')

part_df_2_2 = deepcopy(part_df_2_2[['c_code', 'year', 'imports', 'exports']])

part_df_2 = deepcopy(pd.merge(part_df_2_1, part_df_2_2, how = 'outer', on = ['c_code', 'year']))

In [5]:
# trade_df.head(3)

In [6]:
part_df_3 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/NMC_5_0-wsupplementary.csv', encoding = 'latin-1')

part_df_3.rename({'milex': 'military_expenditure',
                  'milper': 'military_personnel',
                  'irst': 'iron_steel_production',
                  'pec': 'prim_energy_consumption',
                  'tpop': 'total_population',
                  'upop': 'urban_population',
                  'upopgrowth': 'urban_pop_growth_rate',
                  'ccode': 'c_code',
                  'cinc': 'cinc_score'}, axis = 1, inplace = True)

# print('NMC_5_0-wsupplementary columns: \n')
# pprint(sorted(list(part_df_3.columns)))
# print('\n')

part_df_3 = part_df_3.sort_values(by = 'year', ascending = True).reset_index(drop = True)
part_df_3 = deepcopy(part_df_3[['c_code',
                                'year',
                                'military_expenditure',
                                'military_personnel',
                                'prim_energy_consumption',
                                'iron_steel_production',
                                'total_population',
                                'urban_population',
                                'cinc_score']])

In [7]:
descriptive_df_1 = deepcopy(pd.merge(part_df_1, part_df_2, how = 'outer', on = ['c_code', 'year']))
descriptive_df_1 = deepcopy(pd.merge(descriptive_df_1, part_df_3, how = 'outer', on = ['c_code', 'year']))
descriptive_df_1['year'] = descriptive_df_1['year'].astype(float)

In [8]:
print('total rows of descriptive participant data: {}'.format(format(len(descriptive_df_1), ',d')))
descriptive_df_1.to_pickle('/Users/the_networks_of_war/data_sources/pickles/participant_descriptive_df.pkl')

total rows of descriptive participant data: 16,388


# Dyadic Descriptive Statistics by Year
### Note: Applies to states/countries only.
### This will be joined to the dyadic pairs for each war

In [9]:
## lot's to use in this dataset so I'll start with the basics
dy_df_1 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/tc2018.csv', encoding = 'utf8')[['gainer', 'loser', 'year']]
## must be dyadic (two states per row)
dy_df_1 = deepcopy(dy_df_1[dy_df_1['loser']!=-9])
dy_df_1.rename({'gainer': 'c_code_a',
                'loser': 'c_code_b'}, axis = 1, inplace = True)
## creating a binary field to represent this dataset
## more specific fields can be added later
dy_df_1['territory_exchange'] = 1
dy_df_1 = deepcopy(dy_df_1[['c_code_a',
                            'c_code_b',
                            'year',
                            'territory_exchange']])
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b']
dy_df_1 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dy_df_1, switched_columns_list))

In [10]:
## contiguity dataframe for states of colonial dependencies
dy_df_2 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/contcold.csv', encoding = 'utf8')[['statelno', 'statehno', 'year']]
dy_df_2.rename({'statelno': 'c_code_a',
                'statehno': 'c_code_b',}, axis = 1, inplace = True)
## creating a binary field to represent this dataset
## more specific fields can be added later
dy_df_2['colonial_contiguity'] = 1

dy_df_2 = deepcopy(dy_df_2[['c_code_a',
                            'c_code_b',
                            'year',
                            'colonial_contiguity']])
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b']
dy_df_2 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dy_df_2, switched_columns_list))

In [11]:
dy_df_3 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/contdird.csv', encoding = 'utf8')[['state1no', 'state2no', 'year']]
dy_df_3.rename({'state1no': 'c_code_a',
                'state2no': 'c_code_b'}, axis = 1, inplace = True)
## creating a binary field to represent this dataset
## more specific fields can be added later
dy_df_3['contiguity'] = 1
dy_df_3 = deepcopy(dy_df_3[['c_code_a',
                            'c_code_b',
                            'year',
                            'contiguity']])
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b']
dy_df_3 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dy_df_3, switched_columns_list))

In [12]:
dy_df_4 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/alliance_v4.1_by_dyad_yearly.csv', encoding = 'utf8')[['ccode1', 'ccode2', 'year']]
dy_df_4.rename({'ccode1': 'c_code_a',
                'ccode2': 'c_code_b'}, axis = 1, inplace = True)
## creating a binary field to represent this dataset
## more specific fields can be added later
dy_df_4['alliance'] = 1
dy_df_4 = deepcopy(dy_df_4[['c_code_a',
                            'c_code_b',
                            'year',
                            'alliance']])
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b']
dy_df_4 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dy_df_4, switched_columns_list))

In [13]:
dy_df_5 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/DCAD-v1.0-dyadic.csv', encoding = 'latin-1')[['ccode1', 'ccode2', 'year']]
dy_df_5.rename({'ccode1': 'c_code_a',
                  'ccode2': 'c_code_b'}, axis = 1, inplace = True)
## creating a binary field to represent this dataset
## more specific fields can be added later
dy_df_5['defense_cooperation_agreements'] = 1
dy_df_5 = deepcopy(dy_df_5[['c_code_a',
                            'c_code_b',
                            'year',
                            'defense_cooperation_agreements']])
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b']
dy_df_5 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dy_df_5, switched_columns_list))

In [14]:
dy_df_6 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/dyadic_formatv3.csv', encoding = 'utf8')[['ccode1', 'ccode2', 'year']]
dy_df_6.rename({'ccode1': 'c_code_a',
                'ccode2': 'c_code_b'}, axis = 1, inplace = True)
## creating a binary field to represent this dataset
## more specific fields can be added later
dy_df_6['inter_governmental_organizations'] = 1
dy_df_6 = deepcopy(dy_df_6[['c_code_a',
                            'c_code_b',
                            'year',
                            'inter_governmental_organizations']])
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b']
dy_df_6 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dy_df_6, switched_columns_list))

In [15]:
## this one needs to be filled since its only 5 years
dy_df_7 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Diplomatic_Exchange_2006v1.csv', encoding = 'utf8')[['ccode1', 'ccode2', 'year']]
dy_df_7.rename({'ccode1': 'c_code_a',
                'ccode2': 'c_code_b'}, axis = 1, inplace = True)
## creating a binary field to represent this dataset
## more specific fields can be added later
dy_df_7['diplomatic_exchange'] = 1
dy_df_7 = deepcopy(dy_df_7[['c_code_a',
                            'c_code_b',
                            'year',
                            'diplomatic_exchange']])
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b']
dy_df_7 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dy_df_7, switched_columns_list))

In [16]:
# for year in np.arange(1800, 2020):
#     for row in dy_df_7['year']:
#         if len(dy_df_7[dy_df_7['year']==year])== 0:
#             temp_dyad_df = deepcopy(dy_df_7[dy_df_7['year']==year].reset_index())
#             for i, dyad in enumerate(temp_dyad_df['year']):
#                 dyad_df_length = deepcopy(len(dy_df_7))
#                 dy_df_7.loc[dyad_df_length, 'year'] = year
#                 dy_df_7.loc[dyad_df_length, 'c_code_a'] = temp_dyad_df.loc[i, 'c_code_a']
#                 dy_df_7.loc[dyad_df_length, 'c_code_b'] = temp_dyad_df.loc[i, 'c_code_b']
#         else:
#             current_year = year
            
# dy_df_7['diplomatic_exchange'] = 1
# print(len(dy_df_7))

In [17]:
## this one needs to be filled since its only 5 years
dy_df_8 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Dyadic_COW_4.0.csv', encoding = 'utf8')[['ccode1', 'ccode2', 'year']]
dy_df_8.rename({'ccode1': 'c_code_a',
                'ccode2': 'c_code_b'}, axis = 1, inplace = True)
## creating a binary field to represent this dataset
## more specific fields can be added later
dy_df_8['trade_relations'] = 1
dy_df_8 = deepcopy(dy_df_8[['c_code_a',
                            'c_code_b',
                            'year',
                            'trade_relations']])
## unioning mismatching columns so each participant will get their own row
switched_columns_list = ['c_code_a',
                         'c_code_b']
dy_df_8 = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dy_df_8, switched_columns_list))

In [18]:
# dy_df_test = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/ddrevisited_data_v1.csv', encoding = 'latin-1')

In [19]:
descriptive_df_2 = deepcopy(pd.merge(dy_df_1, dy_df_2, how = 'outer', on = ['c_code_a', 'c_code_b', 'year']))
descriptive_df_2 = deepcopy(pd.merge(descriptive_df_2, dy_df_3, how = 'outer', on = ['c_code_a', 'c_code_b', 'year']))
descriptive_df_2 = deepcopy(pd.merge(descriptive_df_2, dy_df_4, how = 'outer', on = ['c_code_a', 'c_code_b', 'year']))
descriptive_df_2 = deepcopy(pd.merge(descriptive_df_2, dy_df_5, how = 'outer', on = ['c_code_a', 'c_code_b', 'year']))
descriptive_df_2 = deepcopy(pd.merge(descriptive_df_2, dy_df_6, how = 'outer', on = ['c_code_a', 'c_code_b', 'year']))
descriptive_df_2 = deepcopy(pd.merge(descriptive_df_2, dy_df_7, how = 'outer', on = ['c_code_a', 'c_code_b', 'year']))
descriptive_df_2 = deepcopy(pd.merge(descriptive_df_2, dy_df_8, how = 'outer', on = ['c_code_a', 'c_code_b', 'year']))
descriptive_df_2['year'] = descriptive_df_2['year'].astype(float)

In [20]:
print('total rows of descriptive dyadic data: {}'.format(format(len(descriptive_df_2), ',d')))
descriptive_df_2.to_pickle('/Users/the_networks_of_war/data_sources/pickles/dyadic_descriptive_df.pkl')

total rows of descriptive dyadic data: 3,536,632
