# Merge CoW and prep for time series integration

In [1]:
import pandas as pd

In [2]:
cow_par = pd.read_csv("../Data/CoW/Wrangled/war_participants.csv")
cow_pol = pd.read_csv("../Data/CoW/Wrangled/polities.csv")
cow_war = pd.read_csv("../Data/CoW/Wrangled/wars.csv", usecols=['WarID', 'WarTypeName', 'IsIntervention', 'IsInternational'])

In [3]:
cow_pol_states = cow_pol[cow_pol['PolityType']=='State']
cow_pol_states_list = list(cow_pol_states['PolityID'].unique())

cow_par = cow_par[cow_par['PolityID'].isin(cow_pol_states_list)]

In [4]:
cow_par['EndDate'] = cow_par['EndDate'].fillna('2008-01-01')
cow_par['StartDate'] = pd.to_datetime(cow_par['StartDate'])
cow_par['EndDate'] = pd.to_datetime(cow_par['EndDate'])

cow_par['StartDate'] = cow_par['StartDate'].apply(lambda dt: dt.replace(day=1, month=1))
cow_par['EndDate'] = cow_par['EndDate'].apply(lambda dt: dt.replace(day=1, month=1))


cow_par_ts = pd.concat([pd.DataFrame({'year': pd.date_range(row.StartDate, row.EndDate, freq='YS'),
                                      'cow_id': row.PolityID, 
                                      'war_id': row.WarID, 
                                      'IsInitiator': row.IsInitiator, 
                                      'Outcome': row.Outcome, 
                                      'total_deaths': row.Deaths}, 
                                 columns=['year', 'cow_id', 'war_id', 'IsInitiator', 'Outcome', 'total_deaths']) 
                                 for i, row in cow_par.iterrows()], ignore_index=True)
cow_par_ts['year'] = cow_par_ts['year'].dt.year
cow_par_ts = cow_par_ts[cow_par_ts['year'] > 1945].reset_index(drop=True)
cow_par_ts

Unnamed: 0,year,cow_id,war_id,IsInitiator,Outcome,total_deaths
0,1947,750,147,1,6,2500.0
1,1948,750,147,1,6,2500.0
2,1949,750,147,1,6,2500.0
3,1947,770,147,0,6,1000.0
4,1948,770,147,0,6,1000.0
...,...,...,...,...,...,...
1528,1949,732,1573,0,1,
1529,1968,698,1577,0,6,
1530,1969,698,1577,0,6,
1531,1970,698,1577,0,6,


In [5]:
cow_war = cow_war.rename(columns={'WarID': 'war_id', 'WarTypeName': 'war_type'})
cow_merged = cow_par_ts.merge(cow_war, on=['war_id'], how='left')
cow_merged

Unnamed: 0,year,cow_id,war_id,IsInitiator,Outcome,total_deaths,war_type,IsIntervention,IsInternational
0,1947,750,147,1,6,2500.0,Inter-State War,,
1,1948,750,147,1,6,2500.0,Inter-State War,,
2,1949,750,147,1,6,2500.0,Inter-State War,,
3,1947,770,147,0,6,1000.0,Inter-State War,,
4,1948,770,147,0,6,1000.0,Inter-State War,,
...,...,...,...,...,...,...,...,...,...
1528,1949,732,1573,0,1,,Non-State War,,
1529,1968,698,1577,0,6,,Non-State War,,
1530,1969,698,1577,0,6,,Non-State War,,
1531,1970,698,1577,0,6,,Non-State War,,


In [6]:
cow_merged_wars = cow_merged.groupby(['war_id', 'cow_id']).agg({'year': 'count'})
cow_merged_wars = cow_merged_wars.reset_index().rename(columns={'year':'num_years'})
cow_merged_wars

Unnamed: 0,war_id,cow_id,num_years
0,147,750,3
1,147,770,3
2,148,645,2
3,148,651,3
4,148,652,2
...,...,...,...
427,938,531,3
428,940,780,3
429,941,679,1
430,1573,732,2


In [7]:
cow_merged2 = cow_merged.merge(cow_merged_wars, on=['war_id', 'cow_id'])
cow_merged2['avg_deaths'] = (cow_merged2['total_deaths'] / cow_merged2['num_years'])
cow_merged2['avg_deaths'] = cow_merged2['avg_deaths'].round(0).astype('Int64')
cow_merged2

Unnamed: 0,year,cow_id,war_id,IsInitiator,Outcome,total_deaths,war_type,IsIntervention,IsInternational,num_years,avg_deaths
0,1947,750,147,1,6,2500.0,Inter-State War,,,3,833
1,1948,750,147,1,6,2500.0,Inter-State War,,,3,833
2,1949,750,147,1,6,2500.0,Inter-State War,,,3,833
3,1947,770,147,0,6,1000.0,Inter-State War,,,3,333
4,1948,770,147,0,6,1000.0,Inter-State War,,,3,333
...,...,...,...,...,...,...,...,...,...,...,...
1528,1949,732,1573,0,1,,Non-State War,,,2,
1529,1968,698,1577,0,6,,Non-State War,,,4,
1530,1969,698,1577,0,6,,Non-State War,,,4,
1531,1970,698,1577,0,6,,Non-State War,,,4,


In [8]:
cow_merged2['war_type'].unique()

array(['Inter-State War ', 'Extra-State War ', 'Intra-State War ',
       'Non-State War '], dtype=object)

In [9]:
type_map = {'Inter-State War ':'interstate', 'Extra-State War ':'extrastate', 'Intra-State War ':'intrastate', 'Non-State War ':'nonstate'}
cow_merged2['war_type'] = cow_merged2['war_type'].map(type_map)
type_dummy = pd.get_dummies(cow_merged2['war_type'], prefix='type')
cow_merged2 = pd.concat([cow_merged2, type_dummy], axis=1)

In [10]:
cow_merged2['Outcome'].unique()

array([6, 1, 2, 4, 3, 7, 5])

In [11]:
outcome_map = {1: 'win', 2: 'lose', 3: 'tied', 4: 'war-transitioned', 5: 'ongoing2008', 6: 'stalemate', 7: 'cont-conflict'}
cow_merged2['Outcome'] = cow_merged2['Outcome'].map(outcome_map)
outcome_dummy = pd.get_dummies(cow_merged2['Outcome'], prefix='outcome')
cow_merged2 = pd.concat([cow_merged2, outcome_dummy], axis=1)

In [12]:
cow_merged2.columns

Index(['year', 'cow_id', 'war_id', 'IsInitiator', 'Outcome', 'total_deaths',
       'war_type', 'IsIntervention', 'IsInternational', 'num_years',
       'avg_deaths', 'type_extrastate', 'type_interstate', 'type_intrastate',
       'type_nonstate', 'outcome_cont-conflict', 'outcome_lose',
       'outcome_ongoing2008', 'outcome_stalemate', 'outcome_tied',
       'outcome_war-transitioned', 'outcome_win'],
      dtype='object')

In [13]:
cow_gb = cow_merged2.groupby(['cow_id', 'year']).agg({'war_id': 'nunique', 
                                                      'IsInitiator': 'sum', 
                                                      'avg_deaths': 'sum', 
                                                      'type_extrastate': 'sum', 
                                                      'type_interstate': 'sum', 
                                                      'type_intrastate': 'sum',
                                                      'type_nonstate': 'sum', 
                                                      'outcome_cont-conflict': 'sum', 
                                                      'outcome_lose': 'sum',
                                                      'outcome_ongoing2008': 'sum', 
                                                      'outcome_stalemate': 'sum', 
                                                      'outcome_tied': 'sum',
                                                      'outcome_war-transitioned': 'sum', 
                                                      'outcome_win': 'sum'})
cow_gb = cow_gb.rename(columns={'war_id': 'war_count'})
cow_gb

Unnamed: 0_level_0,Unnamed: 1_level_0,war_count,IsInitiator,avg_deaths,type_extrastate,type_interstate,type_intrastate,type_nonstate,outcome_cont-conflict,outcome_lose,outcome_ongoing2008,outcome_stalemate,outcome_tied,outcome_war-transitioned,outcome_win
cow_id,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2,1950,1,0,13622,0,1,0,0,0,0,0,1,0,0,0
2,1951,1,0,13622,0,1,0,0,0,0,0,1,0,0,0
2,1952,1,0,13622,0,1,0,0,0,0,0,1,0,0,0
2,1953,1,0,13622,0,1,0,0,0,0,0,1,0,0,0
2,1958,1,0,1,0,0,1,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900,2008,2,2,1,2,0,0,0,0,0,2,0,0,0,0
910,1989,1,1,0,0,0,1,0,1,0,0,0,0,0,0
910,1990,1,1,0,0,0,1,0,1,0,0,0,0,0,0
910,1991,1,1,0,0,0,1,0,1,0,0,0,0,0,0


In [14]:
cow_gb.to_csv("../Data/FINAL/cow.csv")