In [1]:
from warnings import filterwarnings

filterwarnings('ignore')

import pandas as pd
import numpy as np
from copy import deepcopy

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [3]:
c_code_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/COW country codes.csv', encoding = 'utf8')
c_code_df.rename({'CCode': 'c_code',
                  'StateNme': 'state_name'}, axis = 1, inplace = True)
c_code_df.drop(['StateAbb'], axis = 1, inplace = True)

duplicate_list = ['c_code', 'state_name']
c_code_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
c_code_df = deepcopy(c_code_df.reset_index(drop = True))

c_code_dic = {}
for i, c_code in enumerate(c_code_df['c_code']):
    c_code_dic[c_code] = c_code_df.loc[i, 'state_name']
    
print(str(len(c_code_dic.keys())) + " total countries")

217 total countries


In [4]:
## battle deaths and start/end dates are in this file too, but it's more confusing than the participant_df.
## this will just be used to get the combinations of countries directly at war with each other.

dyad_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/directed_dyadic_war.csv', encoding = 'latin-1')
dyad_df.rename({'warnum': 'war_num', 'statea': 'c_code_a', 'stateb': 'c_code_b'}, axis = 1, inplace = True)

print("directed_dyadic_war columns: \n")
print(dyad_df.columns)

dyad_df = deepcopy(dyad_df[['war_num', 'c_code_a', 'c_code_b', 'year']])

for i, c_code_a in enumerate(dyad_df['c_code_a']):
    dyad_df.loc[i, 'state_name_a'] = c_code_dic[c_code_a]
    dyad_df.loc[i, 'state_name_b'] = c_code_dic[dyad_df.loc[i, 'c_code_b']] 

directed_dyadic_war columns: 

Index(['war_num', 'disno', 'dyindex', 'c_code_a', 'c_code_b', 'warstrtmnth',
       'warstrtday', 'warstrtyr', 'warendmnth', 'warenday', 'warendyr', 'year',
       'warolea', 'waroleb', 'wardyadrolea', 'wardyadroleb', 'outcomea',
       'batdtha', 'batdthb', 'changes_1', 'changes_2', 'batdths', 'durindx'],
      dtype='object')


In [5]:
dyad_df.head(3)

Unnamed: 0,war_num,c_code_a,c_code_b,year,state_name_a,state_name_b
0,1,220,230,1823,France,Spain
1,1,230,220,1823,Spain,France
2,4,365,640,1828,Russia,Turkey


In [6]:
dyad_union_df = deepcopy(dyad_df)

## doing these inefficient column name changes to fill in for a much needed sql union of mismatching column names
dyad_union_df.rename({'c_code_a': 'c_code_a_new',
                      'c_code_b': 'c_code_b_new',
                      'state_name_a': 'state_name_a_new',
                      'state_name_b': 'state_name_b_new'}, axis = 1, inplace = True)

dyad_union_df.rename({'c_code_a_new': 'c_code_b',
                      'c_code_b_new': 'c_code_a',
                      'state_name_a_new': 'state_name_b',
                      'state_name_b_new': 'state_name_a'}, axis = 1, inplace = True)

dyad_df = deepcopy(pd.concat([dyad_df, dyad_union_df], ignore_index = True).reset_index(drop = True))

In [7]:
dyad_df.head(3)

Unnamed: 0,c_code_a,c_code_b,state_name_a,state_name_b,war_num,year
0,220,230,France,Spain,1,1823
1,230,220,Spain,France,1,1823
2,365,640,Russia,Turkey,4,1828


In [8]:
for i, country_code_a in enumerate(dyad_df['c_code_a']):
    if int(country_code_a) < int(dyad_df.loc[i, 'c_code_b']):
        dyad_df.loc[i, 'conflict_pair'] = str(country_code_a) + " " + str(dyad_df.loc[i, 'c_code_b'])
    else:
        dyad_df.loc[i, 'conflict_pair'] = str(dyad_df.loc[i, 'c_code_b']) + " " + str(country_code_a)

dyad_df.sort_values(by = 'year', ascending = True, inplace = True)

dyad_df = deepcopy(dyad_df[['war_num',
                            'c_code_a',
                            'state_name_a',
                            'c_code_b',
                            'state_name_b',
                            'conflict_pair']])

duplicate_list = ['war_num', 'conflict_pair']
dyad_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
dyad_df = deepcopy(dyad_df.reset_index(drop = True))

In [9]:
participant_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Inter-StateWarData_v4.0.csv', encoding = 'latin-1')

print("Inter-StateWarData_v4.0 columns: \n")
print(participant_df.columns)

participant_df.rename({'WarNum': 'war_num',
                       'WarName': 'war_name',
                       'WarType': 'war_type',
                       'ccode': 'c_code',
                       'StateName': 'state_name',
                       'Side': 'side',
                       'BatDeath': 'battle_deaths',
                       ## using only first start date and first end date for now
                       ## this will need to be fine-tuned later on
                       'StartYear1': 'start_year',
                       'StartMonth1': 'start_month',
                       'StartDay1': 'start_day',
                       'EndYear1': 'end_year',
                       'EndMonth1': 'end_month',
                       'EndDay1': 'end_day',}, axis = 1, inplace = True)

participant_df['start_date'] = pd.to_datetime(participant_df['start_year'].astype(str) + "-" + participant_df['start_month'].astype(str) + "-" + participant_df['start_day'].astype(str))
participant_df['end_date'] = pd.to_datetime(participant_df['end_year'].astype(str) + "-" + participant_df['end_month'].astype(str) + "-" + participant_df['end_day'].astype(str))

## accounting for all cases where countries have more than one side
aggregations = {
    ## they will become side 3
    'side': 'sum',
    'battle_deaths': 'sum',
    'start_date': 'min',
    'start_year': 'max',
    'end_date': 'max',
    'end_year': 'max'
    }

participant_df = deepcopy(participant_df.groupby(['war_num', 'war_name', 'war_type', 'c_code', 'state_name']).agg(aggregations).reset_index())

Inter-StateWarData_v4.0 columns: 

Index(['WarNum', 'WarName', 'WarType', 'ccode', 'StateName', 'Side',
       'StartMonth1', 'StartDay1', 'StartYear1', 'EndMonth1', 'EndDay1',
       'EndYear1', 'StartMonth2', 'StartDay2', 'StartYear2', 'EndMonth2',
       'EndDay2', 'EndYear2', 'TransFrom', 'WhereFought', 'Initiator',
       'Outcome', 'TransTo', 'BatDeath', 'Version'],
      dtype='object')


In [10]:
participant_df['days_at_war'] = participant_df['end_date'] - participant_df['start_date']

for i, country_code in enumerate(participant_df['c_code']):
    participant_df.loc[i, 'days_at_war'] = int(str(participant_df.loc[i, 'days_at_war']).split(' ')[0])

participant_df = deepcopy(participant_df[['war_num',
                                          'war_name',
                                          'war_type',
                                          'c_code',
                                          'state_name',
                                          'side',
                                          'battle_deaths',
                                          'start_date',
                                          'start_year',
                                          'end_date',
                                          'end_year',
                                          'days_at_war']])

In [11]:
participant_df.head(3)

Unnamed: 0,war_num,war_name,war_type,c_code,state_name,side,battle_deaths,start_date,start_year,end_date,end_year,days_at_war
0,1,Franco-Spanish War,1,220,France,1,400,1823-04-07,1823,1823-11-13,1823,220
1,1,Franco-Spanish War,1,230,Spain,2,600,1823-04-07,1823,1823-11-13,1823,220
2,4,First Russo-Turkish,1,365,Russia,1,50000,1828-04-26,1828,1829-09-14,1829,506


In [12]:
dyadic_trade_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/Dyadic_COW_4.0.csv', encoding = 'utf8')

print("Dyadic_COW_4.0 columns: \n")
print(dyadic_trade_df.columns)

dyadic_trade_df.rename({'ccode1': 'c_code_a',
                        'ccode2': 'c_code_b',
                        'flow1': 'money_flow_out_a',
                        'flow2': 'money_flow_in_a'}, axis = 1, inplace = True)

dyadic_trade_df['money_flow_out_a'] = ([s * 1000000 for s in dyadic_trade_df['money_flow_out_a']])
dyadic_trade_df['money_flow_in_a'] = ([s * 1000000 for s in dyadic_trade_df['money_flow_in_a']])
dyadic_trade_df.loc[dyadic_trade_df['money_flow_out_a'] == -9000000, 'money_flow_out_a'] = None
dyadic_trade_df.loc[dyadic_trade_df['money_flow_in_a'] == -9000000, 'money_flow_in_a'] = None

dyadic_trade_df = deepcopy(dyadic_trade_df[['year',
                                            'c_code_a',
                                            'c_code_b',
                                            'money_flow_out_a',
                                            'money_flow_in_a']])

Dyadic_COW_4.0 columns: 

Index(['ccode1', 'ccode2', 'year', 'importer1', 'importer2', 'flow1', 'flow2',
       'smoothflow1', 'smoothflow2', 'smoothtotrade', 'spike1', 'spike2',
       'dip1', 'dip2', 'trdspike', 'tradedip', 'bel_lux_alt_flow1',
       'bel_lux_alt_flow2', 'china_alt_flow1', 'china_alt_flow2', 'source1',
       'source2', 'version'],
      dtype='object')


In [13]:
dyadic_trade_df.head(3)

Unnamed: 0,year,c_code_a,c_code_b,money_flow_out_a,money_flow_in_a
0,1920,2,20,611859990.0,735479980.0
1,1921,2,20,335440000.0,442989990.0
2,1922,2,20,364019990.0,502840000.0


In [14]:
## need to union to take summations but won't need to dedupe because there are no duplicates between a and b.
# this means a can be summed on its own when it's combined with b.

dyadic_trade_union_df = deepcopy(dyadic_trade_df)
## doing these inefficient column name changes to fill in for a much needed sql union of mismatching column names
dyadic_trade_union_df.rename({'c_code_a': 'c_code_a_new',
                      'c_code_b': 'c_code_b_new',
                      'money_flow_out_a': 'money_flow_out_a_new',
                      'money_flow_in_a': 'money_flow_in_a_new'}, axis = 1, inplace = True)

dyadic_trade_union_df.rename({'c_code_a_new': 'c_code_b',
                      'c_code_b_new': 'c_code_a',
                      'money_flow_out_a_new': 'money_flow_in_a',
                      'money_flow_in_a_new': 'money_flow_out_a'}, axis = 1, inplace = True)

dyadic_trade_df = deepcopy(pd.concat([dyadic_trade_df, dyadic_trade_union_df], ignore_index = True).reset_index())

In [15]:
dyadic_trade_df.head(3)

Unnamed: 0,index,c_code_a,c_code_b,money_flow_in_a,money_flow_out_a,year
0,0,2,20,735479980.0,611859990.0,1920
1,1,2,20,442989990.0,335440000.0,1921
2,2,2,20,502840000.0,364019990.0,1922


In [16]:
aggregations = {
    'money_flow_in_a': 'sum',
    'money_flow_out_a': 'sum',
    }

trade_df_1 = dyadic_trade_df.groupby(['c_code_a', 'year']).agg(aggregations).reset_index()
trade_df_1.rename({'c_code_a':'c_code', 'money_flow_in_a': 'money_flow_in', 'money_flow_out_a': 'money_flow_out'}, axis = 1, inplace = True)

In [17]:
trade_df_1.head(3)

Unnamed: 0,c_code,year,money_flow_in,money_flow_out
0,2,1870,359400000.15,256170000.17
1,2,1871,430509999.8,319960000.9
2,2,1872,353040010.44,313530009.19


In [18]:
trade_df_2 = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/National_COW_4.0.csv', encoding = 'latin-1')
trade_df_2.rename({'ccode': 'c_code'}, axis = 1, inplace = True)

print("National_COW_4.0 columns: \n")
print(trade_df_2.columns)

trade_df_2 = deepcopy(trade_df_2[['c_code', 'year', 'imports', 'exports']])

trade_df = deepcopy(pd.merge(trade_df_1, trade_df_2, on = ['c_code', 'year']))

National_COW_4.0 columns: 

Index(['c_code', 'statename', 'stateabb', 'year', 'imports', 'exports',
       'alt_imports', 'alt_exports', 'source1', 'source2', 'version'],
      dtype='object')


In [19]:
trade_df.head(3)

Unnamed: 0,c_code,year,money_flow_in,money_flow_out,imports,exports
0,2,1870,359400000.15,256170000.17,450.0,418.0
1,2,1871,430509999.8,319960000.9,534.0,475.0
2,2,1872,353040010.44,313530009.19,632.0,474.0


In [20]:
mil_cap_df = pd.read_csv('/Users/the_networks_of_war/data_sources/csvs/NMC_5_0-wsupplementary.csv', encoding = 'latin-1')

mil_cap_df.rename({'milex': 'military_expenditure',
                   'milper': 'military_personnel',
                   'irst': 'iron_steel_production',
                   'pec': 'prim_energy_consumption',
                   'tpop': 'total_population',
                   'upop': 'urban_population',
                   'upopgrowth': 'urban_pop_growth_rate',
                   'ccode': 'c_code',
#                    'statenme': 'state_name',
                   'cinc': 'cinc_score'}, axis = 1, inplace = True)

print("NMC_5_0-wsupplementary columns: \n")
print(mil_cap_df.columns)

mil_cap_df['military_expenditure'] = ([s * 1000 for s in mil_cap_df['military_expenditure']])
mil_cap_df['military_personnel'] = ([s * 1000 for s in mil_cap_df['military_personnel']])
mil_cap_df['total_population'] = ([s * 1000 for s in mil_cap_df['total_population']])
mil_cap_df['urban_population'] = ([s * 1000 for s in mil_cap_df['urban_population']])
mil_cap_df['iron_steel_production'] = ([s * 2000000 for s in mil_cap_df['iron_steel_production']])
mil_cap_df['prim_energy_consumption'] = ([s * 2000000 for s in mil_cap_df['prim_energy_consumption']])

mil_cap_df.loc[mil_cap_df['military_expenditure'] == -9000, 'military_expenditure'] = 0
mil_cap_df.loc[mil_cap_df['military_personnel'] == -9000, 'military_personnel'] = 0
mil_cap_df.loc[mil_cap_df['total_population'] == -9000, 'total_population'] = 0
mil_cap_df.loc[mil_cap_df['urban_population'] == -9000, 'urban_population'] = 0
mil_cap_df.loc[mil_cap_df['iron_steel_production'] == -18000000 , 'iron_steel_production'] = 0
mil_cap_df.loc[mil_cap_df['prim_energy_consumption'] == -18000000 , 'prim_energy_consumption'] = 0

mil_cap_df = mil_cap_df.sort_values(by = 'year', ascending = True).reset_index()

mil_cap_df = deepcopy(mil_cap_df[['c_code',
                                  'year',
                                  'military_expenditure',
                                  'military_personnel',
                                  'prim_energy_consumption',
                                  'iron_steel_production',
                                  'total_population',
                                  'urban_population',
                                  'cinc_score']])

NMC_5_0-wsupplementary columns: 

Index(['statenme', 'stateabb', 'c_code', 'year', 'military_expenditure',
       'milexsource', 'milexnote', 'military_personnel', 'milpersource',
       'milpernote', 'iron_steel_production', 'irstsource', 'irstnote',
       'irstqualitycode', 'irstanomalycode', 'prim_energy_consumption',
       'pecsource', 'pecnote', 'pecqualitycode', 'pecanomalycode',
       'total_population', 'tpopsource', 'tpopnote', 'tpopqualitycode',
       'tpopanomalycode', 'urban_population', 'upopsource', 'upopnote',
       'upopqualitycode', 'upopanomalycode', 'urban_pop_growth_rate',
       'upopgrowthsource', 'cinc_score', 'version'],
      dtype='object')


In [21]:
mil_cap_df.head(3)

Unnamed: 0,c_code,year,military_expenditure,military_personnel,prim_energy_consumption,iron_steel_production,total_population,urban_population,cinc_score
0,2,1816,3823000,17000,508000000,160000000,8659000.0,101000.0,0.0397
1,230,1816,6512000,125000,0,20000000,11073000.0,221000.0,0.04639
2,210,1816,2375000,26000,2284000000,100000000,5610000.0,337000.0,0.03991


In [22]:
descriptive_df = deepcopy(pd.merge(trade_df, mil_cap_df, on = ['c_code', 'year']))

In [23]:
descriptive_df.head(3)

Unnamed: 0,c_code,year,money_flow_in,money_flow_out,imports,exports,military_expenditure,military_personnel,prim_energy_consumption,iron_steel_production,total_population,urban_population,cinc_score
0,2,1870,359400000.15,256170000.17,450.0,418.0,13128000,50000,75558000000,3384000000,39905000.0,4130000.0,0.099
1,2,1871,430509999.8,319960000.9,534.0,475.0,11811000,42000,79092000000,3470000000,40938000.0,4302000.0,0.09838
2,2,1872,353040010.44,313530009.19,632.0,474.0,14246000,42000,96958000000,5180000000,41972000.0,4481000.0,0.11528


In [24]:
descriptive_df.rename({'year': 'start_year'}, axis = 1, inplace = True)
participant_df = deepcopy(pd.merge(participant_df, descriptive_df, on = ['c_code', 'start_year']))
descriptive_df.rename({'start_year': 'end_year'}, axis = 1, inplace = True)
participant_df = deepcopy(pd.merge(participant_df, descriptive_df, on = ['c_code', 'end_year']))

print("participant_df columns: \n")
print(participant_df.columns)

participant_df columns: 

Index(['war_num', 'war_name', 'war_type', 'c_code', 'state_name', 'side',
       'battle_deaths', 'start_date', 'start_year', 'end_date', 'end_year',
       'days_at_war', 'money_flow_in_x', 'money_flow_out_x', 'imports_x',
       'exports_x', 'military_expenditure_x', 'military_personnel_x',
       'prim_energy_consumption_x', 'iron_steel_production_x',
       'total_population_x', 'urban_population_x', 'cinc_score_x',
       'money_flow_in_y', 'money_flow_out_y', 'imports_y', 'exports_y',
       'military_expenditure_y', 'military_personnel_y',
       'prim_energy_consumption_y', 'iron_steel_production_y',
       'total_population_y', 'urban_population_y', 'cinc_score_y'],
      dtype='object')


In [25]:
## manually filling in missing battle numbers from google search.
## https://en.wikipedia.org/wiki/World_War_II_casualties
## to be investigated across all cow datasets later for whether they are located elsewhere.

# countries_df.loc[(countries_df['state_name_a']=='Japan') & (countries_df['warnum']==139), 'batdtha'] = 2300000
# countries_df.loc[(countries_df['state_name_a']=='France') & (countries_df['warnum']==139), 'batdtha'] = 600000

In [27]:
war_df = deepcopy(participant_df[['war_num', 'war_name', 'war_type']])
duplicate_list = ['war_num', 'war_name', 'war_type']
war_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)

In [28]:
participant_df.to_pickle('/Users/the_networks_of_war/data_sources/pickles/participant_df.pkl')
dyad_df.to_pickle('/Users/the_networks_of_war/data_sources/pickles/dyad_df.pkl')
war_df.to_pickle('/Users/the_networks_of_war/data_sources/pickles/war_df.pkl')

In [30]:
war_df = deepcopy(war_df[war_df['war_num']==139].reset_index(drop = True))
participant_df = deepcopy(participant_df[participant_df['war_num']==139].reset_index(drop = True))
dyad_df = deepcopy(dyad_df[dyad_df['war_num']==139].reset_index(drop = True))

network_nodes = list(participant_df['c_code'])

graph_file = open("the_networks_of_war.json", 'w+')

json_dic = {}
json_dic['nodes'] = {}
json_dic['links'] = {}
start_line = '{\n  "war": [\n'
middle_line_1 = '  ],\n  "nodes": [\n'
middle_line_2 = '  ],\n  "links": [\n'
end_line = '\n  ]\n}'

graph_file.write(start_line)

add_line = ('    {"war_name": "' + str(war_df.loc[0, 'war_name'])
            + '", "war_num": "' + str(war_df.loc[0, 'war_num'])
            + '", "war_type": "' + str(war_df.loc[0, 'war_type'])
            + '"}\n')
            
graph_file.write(add_line)

graph_file.write(middle_line_1)
for i, node in enumerate(participant_df['c_code']):
    if node == network_nodes[-1]:
        add_line = (
            '    {"country": "' + str(participant_df.loc[i, 'state_name'])
            + '", "country_code": "' + str(participant_df.loc[i, 'c_code'])
            + '", "side": "' + str(participant_df.loc[i, 'side'])
            + '", "battle_deaths": "' + str(participant_df.loc[i, 'battle_deaths'])
            + '", "start_year": "' + str(participant_df.loc[i, 'start_year'])
            + '", "start_date": "' + str(participant_df.loc[i, 'start_date'])
            + '", "end_year": "' + str(participant_df.loc[i, 'end_year'])
            + '", "end_date": "' + str(participant_df.loc[i, 'end_date'])
            + '", "days_at_war": "' + str(participant_df.loc[i, 'days_at_war'])
            + '", "money_flow_out_start": "' + str(participant_df.loc[i, 'money_flow_out_x'])
            + '", "money_flow_out_end": "' + str(participant_df.loc[i, 'money_flow_out_y'])
            + '", "money_flow_in_start": "' + str(participant_df.loc[i, 'money_flow_in_x'])
            + '", "money_flow_in_end": "' + str(participant_df.loc[i, 'money_flow_in_y'])
            + '", "imports_start": "' + str(participant_df.loc[i, 'imports_x'])
            + '", "imports_end": "' + str(participant_df.loc[i, 'imports_y'])
            + '", "exports_start": "' + str(participant_df.loc[i, 'exports_x'])
            + '", "exports_end": "' + str(participant_df.loc[i, 'exports_y'])
            + '", "military_expenditure_start": "' + str(participant_df.loc[i, 'military_expenditure_x'])
            + '", "military_expenditure_end": "' + str(participant_df.loc[i, 'military_expenditure_y'])
            + '", "military_personnel_start": "' + str(participant_df.loc[i, 'military_personnel_x'])
            + '", "military_personnel_end": "' + str(participant_df.loc[i, 'military_personnel_y'])
            + '", "prim_energy_consumption_start": "' + str(participant_df.loc[i, 'prim_energy_consumption_x'])
            + '", "prim_energy_consumption_end": "' + str(participant_df.loc[i, 'prim_energy_consumption_y'])
            + '", "iron_steel_production_start": "' + str(participant_df.loc[i, 'iron_steel_production_x'])
            + '", "iron_steel_production_end": "' + str(participant_df.loc[i, 'iron_steel_production_y'])
            + '", "total_population_start": "' + str(participant_df.loc[i, 'total_population_x'])
            + '", "total_population_end": "' + str(participant_df.loc[i, 'total_population_y'])
            + '", "urban_population_start": "' + str(participant_df.loc[i, 'urban_population_x'])
            + '", "urban_population_end": "' + str(participant_df.loc[i, 'urban_population_y'])
            + '", "cinc_score_start": "' + str(participant_df.loc[i, 'cinc_score_x'])
            + '", "cinc_score_end": "' + str(participant_df.loc[i, 'cinc_score_y'])
            + '"' + '}\n'
        )
        graph_file.write(add_line)
    else:
        add_line = (
            '    {"country": "' + str(participant_df.loc[i, 'state_name'])
            + '", "country_code": "' + str(participant_df.loc[i, 'c_code'])
            + '", "side": "' + str(participant_df.loc[i, 'side'])
            + '", "battle_deaths": "' + str(participant_df.loc[i, 'battle_deaths'])
            + '", "start_year": "' + str(participant_df.loc[i, 'start_year'])
            + '", "start_date": "' + str(participant_df.loc[i, 'start_date'])
            + '", "end_year": "' + str(participant_df.loc[i, 'end_year'])
            + '", "end_date": "' + str(participant_df.loc[i, 'end_date'])
            + '", "days_at_war": "' + str(participant_df.loc[i, 'days_at_war'])
            + '", "money_flow_out_start": "' + str(participant_df.loc[i, 'money_flow_out_x'])
            + '", "money_flow_out_end": "' + str(participant_df.loc[i, 'money_flow_out_y'])
            + '", "money_flow_in_start": "' + str(participant_df.loc[i, 'money_flow_in_x'])
            + '", "money_flow_in_end": "' + str(participant_df.loc[i, 'money_flow_in_y'])
            + '", "imports_start": "' + str(participant_df.loc[i, 'imports_x'])
            + '", "imports_end": "' + str(participant_df.loc[i, 'imports_y'])
            + '", "exports_start": "' + str(participant_df.loc[i, 'exports_x'])
            + '", "exports_end": "' + str(participant_df.loc[i, 'exports_y'])
            + '", "military_expenditure_start": "' + str(participant_df.loc[i, 'military_expenditure_x'])
            + '", "military_expenditure_end": "' + str(participant_df.loc[i, 'military_expenditure_y'])
            + '", "military_personnel_start": "' + str(participant_df.loc[i, 'military_personnel_x'])
            + '", "military_personnel_end": "' + str(participant_df.loc[i, 'military_personnel_y'])
            + '", "prim_energy_consumption_start": "' + str(participant_df.loc[i, 'prim_energy_consumption_x'])
            + '", "prim_energy_consumption_end": "' + str(participant_df.loc[i, 'prim_energy_consumption_y'])
            + '", "iron_steel_production_start": "' + str(participant_df.loc[i, 'iron_steel_production_x'])
            + '", "iron_steel_production_end": "' + str(participant_df.loc[i, 'iron_steel_production_y'])
            + '", "total_population_start": "' + str(participant_df.loc[i, 'total_population_x'])
            + '", "total_population_end": "' + str(participant_df.loc[i, 'total_population_y'])
            + '", "urban_population_start": "' + str(participant_df.loc[i, 'urban_population_x'])
            + '", "urban_population_end": "' + str(participant_df.loc[i, 'urban_population_y'])
            + '", "cinc_score_start": "' + str(participant_df.loc[i, 'cinc_score_x'])
            + '", "cinc_score_end": "' + str(participant_df.loc[i, 'cinc_score_y'])
            + '"' + '},\n'
        )
        graph_file.write(add_line)        

graph_file.write(middle_line_2)


## sometimes a country could be in the dyad and not in the participant df.
## this is rare but has happened (see spain in WWII)
## for this reason, a full outer join or something like it must incorporate/address all that is in both dataframes

for i, node_1 in enumerate(dyad_df['c_code_a']):
    if node_1 not in network_nodes:
        print("{} is missing from participant_df for {}.".format(c_code_dic[node_1], war_df.loc[0, 'war_name']))
    elif dyad_df.loc[i, 'c_code_b'] not in network_nodes:
        print("{} is missing from participant_df for {}.".format(c_code_dic[dyad_df.loc[i, 'c_code_b']], war_df.loc[0, 'war_name']))
    elif i+1 != len(dyad_df['c_code_a']):
        add_line = (
            '    {"source": ' + str(network_nodes.index(node_1))
            + ', "target": ' + str(network_nodes.index(dyad_df.loc[i, 'c_code_b']))
            + ', "bond": ' + str(1)
            + '},\n'
        )
        graph_file.write(add_line)
    else:
        add_line = (
            '    {"source": ' + str(network_nodes.index(node_1))
            + ', "target": ' + str(network_nodes.index(dyad_df.loc[i, 'c_code_b']))
            + ', "bond": ' + str(1)
            + '}\n'
        )
        graph_file.write(add_line)

graph_file.write(end_line)

Spain is missing from participant_df for World War II.
Thailand is missing from participant_df for World War II.
Thailand is missing from participant_df for World War II.


6