In [1]:
from warnings import filterwarnings

filterwarnings('ignore')

import pandas as pd
import numpy as np
from copy import deepcopy

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
pwd

'/Users/charlieyaris/github/cyaris.github.io/assets/the_networks_of_wars'

In [4]:
df = pd.read_csv('../../../international_armed_conflict/Data Sources/Inter-StateWarData_v4.0.csv', encoding = 'latin-1')
# df = pd.read_csv('../../../international_armed_conflict/Data Sources/Intra-StateWarData_v4.1.csv', encoding = 'latin-1')

aggregations = {
    'ccode': 'count'
    }

grouped_df = df.groupby(['WarName', 'WarNum']).agg(aggregations).reset_index()
grouped_df.rename({'ccode': 'total_instances'}, axis = 1, inplace = True)
grouped_df.sort_values(by = 'total_instances', ascending = False).head(10)

Unnamed: 0,WarName,WarNum,total_instances
93,World War II,139,29
44,Korean,151,16
92,World War I,106,15
34,Gulf War,211,14
70,Seven Weeks,55,11
86,War for Kosovo,221,8
84,"Vietnam War, Phase 2",163,8
94,Yom Kippur War,181,6
1,Arab-Israeli,148,6
8,Boxer Rebellion,82,6


In [5]:
dyad_df = pd.read_csv('../../../international_armed_conflict/Data Sources/directed_dyadic_war.csv', encoding = 'latin-1')
dyad_df.rename({'warnum': 'war_num', 'statea': 'c_code_a', 'stateb': 'c_code_b'}, axis = 1, inplace = True)
dyad_df = deepcopy(dyad_df[(dyad_df['war_num']==211) & (dyad_df['durindx']==1)][['war_num', 'c_code_a', 'c_code_b', 'year']])

c_code_df = pd.read_pickle('../../../international_armed_conflict/Pickles/c_code_df.pkl')
c_code_df.rename({'c_code': 'c_code_a', 'state_name': 'state_name_a'}, axis = 1, inplace = True)
dyad_df = deepcopy(pd.merge(c_code_df, dyad_df, on = 'c_code_a'))
c_code_df.rename({'c_code_a': 'c_code_b', 'state_name_a': 'state_name_b'}, axis = 1, inplace = True)
dyad_df = deepcopy(pd.merge(c_code_df, dyad_df, on = 'c_code_b'))

participant_df = pd.read_csv('../../../international_armed_conflict/Data Sources/Inter-StateWarData_v4.0.csv', encoding = 'latin-1')
participant_df = deepcopy(participant_df[participant_df['WarNum']==211][['WarNum', 'WarName', 'WarType', 'ccode', 'Side', 'BatDeath']])
participant_df.rename({'WarNum': 'war_num', 'WarName': 'war_name', 'WarType': 'war_type', 'ccode': 'c_code_a', 'Side': 'side_a', 'BatDeath': 'battle_deaths_a'}, axis = 1, inplace = True)
dyad_df = deepcopy(pd.merge(dyad_df, participant_df, on = ['war_num', 'c_code_a']))
participant_df.drop(['war_name', 'war_type'], axis = 1, inplace = True)
participant_df.rename({'c_code_a': 'c_code_b', 'side_a': 'side_b', 'battle_deaths_a': 'battle_deaths_b'}, axis = 1, inplace = True)
dyad_df = deepcopy(pd.merge(dyad_df, participant_df, on = ['war_num', 'c_code_b']))

dyad_union_df = deepcopy(dyad_df)
## doing these inefficient column name changes to fill in for a much needed sql union of mismatching column names
dyad_union_df.rename({'c_code_a': 'c_code_new_a', 'c_code_b': 'c_code_new_b', 'state_name_a': 'state_name_new_a', 'state_name_b': 'state_name_new_b', 'side_a': 'side_a_new', 'side_b': 'side_b_new', 'battle_deaths_a': 'battle_deaths_a_new', 'battle_deaths_b': 'battle_deaths_b_new'}, axis = 1, inplace = True)
dyad_union_df.rename({'c_code_new_a': 'c_code_b', 'c_code_new_b': 'c_code_a', 'state_name_new_a': 'state_name_b', 'state_name_new_b': 'state_name_a', 'side_a_new': 'side_b', 'side_b_new': 'side_a', 'battle_deaths_a_new': 'battle_deaths_b', 'battle_deaths_b_new': 'battle_deaths_a'}, axis = 1, inplace = True)

dyad_df = deepcopy(pd.concat([dyad_df, dyad_union_df])).reset_index()

In [6]:
for i, country_code_a in enumerate(dyad_df['c_code_a']):
    if int(country_code_a) < int(dyad_df.loc[i, 'c_code_b']):
        dyad_df.loc[i, 'conflict_pair'] = str(country_code_a) + " " + str(dyad_df.loc[i, 'c_code_b'])
    else:
        dyad_df.loc[i, 'conflict_pair'] = str(dyad_df.loc[i, 'c_code_b']) + " " + str(country_code_a)

dyad_df.sort_values(by = 'year', ascending = True, inplace = True)
connections_df = deepcopy(dyad_df[['war_num', 'war_name', 'war_type', 'c_code_a', 'state_name_a', 'c_code_b', 'state_name_b', 'conflict_pair']])
duplicate_list = ['conflict_pair']
connections_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
connections_df = deepcopy(connections_df.reset_index(drop = True))

## keeping final non-null value recorded for total lost in battle
dyad_df.sort_values(by = ['c_code_a','battle_deaths_a'], ascending = (True, False), inplace = True)
countries_df = deepcopy(dyad_df[['war_num', 'war_name', 'war_type','c_code_a', 'state_name_a', 'battle_deaths_a', 'side_a']])
duplicate_list = ['c_code_a']
countries_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)
countries_df = deepcopy(countries_df.reset_index(drop=True))

In [7]:
## manually filling in missing battle numbers from google search.
## https://en.wikipedia.org/wiki/World_War_II_casualties
## to be investigated across all cow datasets later for whether they are located elsewhere.

# countries_df.loc[(countries_df['state_name_a']=='Japan') & (countries_df['warnum']==139), 'batdtha'] = 2300000
# countries_df.loc[(countries_df['state_name_a']=='France') & (countries_df['warnum']==139), 'batdtha'] = 600000

In [9]:
network_nodes = list(countries_df['state_name_a'])

graph_file = open("the_networks_of_wars.json", 'w+')

json_dic = {}
json_dic['nodes'] = {}
json_dic['links'] = {}
start_line = '{\n  "nodes": [\n'
middle_line = '  ],\n  "links": [\n'
end_line = '\n  ]\n}'

graph_file.write(start_line)
for i, node in enumerate(countries_df['state_name_a']):
    if node == network_nodes[-1]:
        add_line = (
            '    {"country": "' + node
            + '", "side": ' + str(countries_df.loc[i, 'side_a'])
            + ', "size": ' + str(countries_df.loc[i, 'battle_deaths_a'])
            + '}\n'
        )
        graph_file.write(add_line)
    else:
        add_line = (
            '    {"country": "' + node
            + '", "side": ' + str(countries_df.loc[i, 'side_a'])
            + ', "size": ' + str(countries_df.loc[i, 'battle_deaths_a'])
            + '},\n'
        )
        graph_file.write(add_line)        

graph_file.write(middle_line)

for i, node_1 in enumerate(connections_df['state_name_a']):
    if i+1 != len(connections_df['state_name_a']):
        add_line = (
            '    {"source": ' + str(network_nodes.index(node_1))
            + ', "target": ' + str(network_nodes.index(connections_df.loc[i, 'state_name_b']))
            + ', "bond": ' + str(1)
            + '},\n'
        )
        graph_file.write(add_line)
    else:
        add_line = (
            '    {"source": ' + str(network_nodes.index(node_1))
            + ', "target": ' + str(network_nodes.index(connections_df.loc[i, 'state_name_b']))
            + ', "bond": ' + str(1)
            + '}\n'
        )
        graph_file.write(add_line)

graph_file.write(end_line)

6