# Geographic Visualization of CoW vs UCDP/PRIO

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

from bokeh.io import show
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, 
                          GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider)
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer
from bokeh.plotting import figure

## Import Country Shapes to GeoPandas DataFrame

In [2]:
cshapes_fp = "./Data/CShapes/cshapes_0.6/cshapes.shp"

cshapes_df = gpd.read_file(cshapes_fp)

In [3]:
cshapes_df.columns

Index(['CNTRY_NAME', 'AREA', 'CAPNAME', 'CAPLONG', 'CAPLAT', 'FEATUREID',
       'COWCODE', 'COWSYEAR', 'COWSMONTH', 'COWSDAY', 'COWEYEAR', 'COWEMONTH',
       'COWEDAY', 'GWCODE', 'GWSYEAR', 'GWSMONTH', 'GWSDAY', 'GWEYEAR',
       'GWEMONTH', 'GWEDAY', 'ISONAME', 'ISO1NUM', 'ISO1AL2', 'ISO1AL3',
       'geometry'],
      dtype='object')

In [4]:
cshapes_df.head()

Unnamed: 0,CNTRY_NAME,AREA,CAPNAME,CAPLONG,CAPLAT,FEATUREID,COWCODE,COWSYEAR,COWSMONTH,COWSDAY,...,GWSMONTH,GWSDAY,GWEYEAR,GWEMONTH,GWEDAY,ISONAME,ISO1NUM,ISO1AL2,ISO1AL3,geometry
0,Guyana,211982.004988,Georgetown,-58.2,6.8,0,110,1966,5,26,...,5,26,2016,6,30,Guyana,328,GY,GUY,"POLYGON ((-58.17262 6.81222, -58.15494 6.82819..."
1,Suriname,145952.274029,Paramaribo,-55.2,5.833333,1,115,1975,11,25,...,11,25,2016,6,30,Suriname,740,SR,SUR,"POLYGON ((-55.12796 5.82217, -55.10445 5.83945..."
2,Trinidad and Tobago,5041.728952,Port-of-Spain,-61.5,10.65,2,52,1962,8,31,...,8,31,2016,6,30,Trinidad and Tobago,780,TT,TTO,"MULTIPOLYGON (((-61.07945 10.82416, -61.07556 ..."
3,Venezuela,916782.217193,Caracas,-66.9,10.5,3,101,1946,1,1,...,1,1,2016,6,30,Venezuela,862,VE,VEN,"MULTIPOLYGON (((-66.31029 10.62602, -66.28309 ..."
4,Samoa,2955.212366,Apia,-172.0,-13.8,4,990,1976,12,15,...,1,1,2016,6,30,Samoa,882,WS,WSM,"MULTIPOLYGON (((-172.59650 -13.50911, -172.551..."


In [45]:
cow_geo_df = cshapes_df[cshapes_df['COWCODE'] != -1].drop(columns=['GWCODE', 'GWSYEAR', 'GWSMONTH', 'GWSDAY', 'GWEYEAR', 'GWEMONTH', 'GWEDAY', 'ISONAME', 'ISO1NUM', 'ISO1AL2'])
gw_geo_df = cshapes_df[cshapes_df['GWCODE'] != -1].drop(columns=['COWCODE', 'COWSYEAR', 'COWSMONTH', 'COWSDAY', 'COWEYEAR', 'COWEMONTH', 'COWEDAY','ISONAME', 'ISO1NUM', 'ISO1AL2'])

In [46]:
cow_geo_df['StartDate'] = cow_geo_df['COWSYEAR'].astype(str) + '-' + cow_geo_df['COWSMONTH'].astype(str) + '-' + cow_geo_df['COWSDAY'].astype(str)
cow_geo_df['StartDate'] = pd.to_datetime(cow_geo_df['StartDate'])

cow_geo_df['EndDate'] = cow_geo_df['COWEYEAR'].astype(str) + '-' + cow_geo_df['COWEMONTH'].astype(str) + '-' + cow_geo_df['COWEDAY'].astype(str)
cow_geo_df['EndDate'] = pd.to_datetime(cow_geo_df['EndDate'])

gw_geo_df['StartDate'] = gw_geo_df['GWSYEAR'].astype(str) + '-' + gw_geo_df['GWSMONTH'].astype(str) + '-' + gw_geo_df['GWSDAY'].astype(str)
gw_geo_df['StartDate'] = pd.to_datetime(gw_geo_df['StartDate'])

gw_geo_df['EndDate'] = gw_geo_df['GWEYEAR'].astype(str) + '-' + gw_geo_df['GWEMONTH'].astype(str) + '-' + gw_geo_df['GWEDAY'].astype(str)
gw_geo_df['EndDate'] = pd.to_datetime(gw_geo_df['EndDate'])

In [56]:
cow_geo_df[cow_geo_df['COWCODE'] == 2]

Unnamed: 0,CNTRY_NAME,AREA,CAPNAME,CAPLONG,CAPLAT,FEATUREID,COWCODE,COWSYEAR,COWSMONTH,COWSDAY,COWEYEAR,COWEMONTH,COWEDAY,ISO1AL3,geometry,StartDate,EndDate
233,United States,9468306.0,Washington,-77.0,38.895,238,2,1946,1,1,2016,6,30,USA,"MULTIPOLYGON (((-155.82334 20.27250, -155.7447...",1946-01-01,2016-06-30


In [47]:
cshapes_cowcodes = cow_geo_df['COWCODE'].unique().tolist()

In [48]:
cshapes_gwcodes = gw_geo_df['GWCODE'].unique().tolist()

code citation: https://stackoverflow.com/questions/42151886/expanding-pandas-data-frame-with-date-range-in-columns
for expanding to a time-series from a time range

In [65]:
cow_geo_ts_df = pd.concat([pd.DataFrame({'month': pd.date_range(row.StartDate, row.EndDate, freq='MS'),
                                         'cow_id': row.COWCODE,
                                         'iso_alpha3': row.ISO1AL3,
                                         'country': row.CNTRY_NAME, 
                                         'shape_id': row.FEATUREID}, 
                                columns=['month', 'cow_id', 'iso_alpha3', 'country', 'shape_id']) 
                           for i, row in cow_geo_df.iterrows()], ignore_index=True)

cow_geo_ts_df = cow_geo_ts_df.merge(cow_geo_df[['FEATUREID', 'geometry']], left_on=['shape_id'], right_on=['FEATUREID'])
cow_geo_ts_df

Unnamed: 0,month,cow_id,iso_alpha3,country,shape_id,FEATUREID,geometry
0,1966-06-01,110,GUY,Guyana,0,0,"POLYGON ((-58.17262 6.81222, -58.15494 6.82819..."
1,1966-07-01,110,GUY,Guyana,0,0,"POLYGON ((-58.17262 6.81222, -58.15494 6.82819..."
2,1966-08-01,110,GUY,Guyana,0,0,"POLYGON ((-58.17262 6.81222, -58.15494 6.82819..."
3,1966-09-01,110,GUY,Guyana,0,0,"POLYGON ((-58.17262 6.81222, -58.15494 6.82819..."
4,1966-10-01,110,GUY,Guyana,0,0,"POLYGON ((-58.17262 6.81222, -58.15494 6.82819..."
...,...,...,...,...,...,...,...
123926,2016-02-01,626,SSD,South Sudan,246,246,"POLYGON ((34.09223 9.47747, 34.09416 9.45472, ..."
123927,2016-03-01,626,SSD,South Sudan,246,246,"POLYGON ((34.09223 9.47747, 34.09416 9.45472, ..."
123928,2016-04-01,626,SSD,South Sudan,246,246,"POLYGON ((34.09223 9.47747, 34.09416 9.45472, ..."
123929,2016-05-01,626,SSD,South Sudan,246,246,"POLYGON ((34.09223 9.47747, 34.09416 9.45472, ..."


In [67]:
cow_geo_ts_df.groupby('cow_id').agg({'month': ['min', 'max', 'count']})

Unnamed: 0_level_0,month,month,month
Unnamed: 0_level_1,min,max,count
cow_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2,1946-01-01,2016-06-01,846
20,1946-01-01,2016-06-01,846
31,1973-08-01,2016-06-01,515
40,1946-01-01,2016-06-01,846
41,1946-01-01,2016-06-01,846
...,...,...,...
970,1999-10-01,2016-06-01,201
983,1991-10-01,2016-06-01,297
986,1995-01-01,2016-06-01,258
987,1991-10-01,2016-06-01,297


In [68]:
gw_geo_ts_df = pd.concat([pd.DataFrame({'month': pd.date_range(row.StartDate, row.EndDate, freq='MS'),
                                        'gw_id': row.GWCODE,
                                        'iso_alpha3': row.ISO1AL3,
                                        'country': row.CNTRY_NAME, 
                                        'shape_id': row.FEATUREID}, 
                                columns=['month', 'gw_id', 'iso_alpha3', 'country', 'shape_id']) 
                           for i, row in gw_geo_df.iterrows()], ignore_index=True)

gw_geo_ts_df = gw_geo_ts_df.merge(gw_geo_df[['FEATUREID', 'geometry']], left_on=['shape_id'], right_on=['FEATUREID'])
gw_geo_ts_df

Unnamed: 0,month,gw_id,iso_alpha3,country,shape_id,FEATUREID,geometry
0,1966-06-01,110,GUY,Guyana,0,0,"POLYGON ((-58.173 6.812, -58.155 6.828, -58.03..."
1,1966-07-01,110,GUY,Guyana,0,0,"POLYGON ((-58.173 6.812, -58.155 6.828, -58.03..."
2,1966-08-01,110,GUY,Guyana,0,0,"POLYGON ((-58.173 6.812, -58.155 6.828, -58.03..."
3,1966-09-01,110,GUY,Guyana,0,0,"POLYGON ((-58.173 6.812, -58.155 6.828, -58.03..."
4,1966-10-01,110,GUY,Guyana,0,0,"POLYGON ((-58.173 6.812, -58.155 6.828, -58.03..."
...,...,...,...,...,...,...,...
128757,1992-04-01,345,YUG,Yugoslavia,247,247,"MULTIPOLYGON (((17.746 42.693, 17.740 42.692, ..."
128758,1991-09-01,365,SUN,USSR,251,251,"MULTIPOLYGON (((73.656 39.455, 73.669 39.365, ..."
128759,1991-10-01,365,SUN,USSR,253,253,"MULTIPOLYGON (((152.221 47.173, 152.207 47.125..."
128760,1991-11-01,365,SUN,USSR,254,254,"MULTIPOLYGON (((152.221 47.173, 152.207 47.125..."


In [69]:
gw_geo_ts_df.groupby('gw_id').agg({'month': ['min', 'max', 'count']})

Unnamed: 0_level_0,month,month,month
Unnamed: 0_level_1,min,max,count
gw_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2,1946-01-01,2016-06-01,846
20,1946-01-01,2016-06-01,846
31,1973-08-01,2016-06-01,515
40,1946-01-01,2016-06-01,846
41,1946-01-01,2016-06-01,846
...,...,...,...
973,1978-10-01,2016-06-01,453
983,1986-11-01,2016-06-01,356
986,1994-10-01,2016-06-01,261
987,1986-12-01,2016-06-01,355


## Import CoW conflict data

In [35]:
cow_par = pd.read_csv("./Data/CorrelatesOfWar/war_participants.csv")
cow_con = pd.read_csv("./Data/CorrelatesOfWar/wars.csv")

In [12]:
cow_par['StartDate'] = pd.to_datetime(cow_par['StartDate'])
cow_par['EndDate'] = pd.to_datetime(cow_par['EndDate'])

In [73]:
cow_con

Unnamed: 0,WarID,WarShortName,WarLongName,WarType,WarTypeName,IsIntervention,IsInternational
0,1,Franco-Spanish War,Franco-Spanish War of 1823,1,Inter-State War,,
1,4,First Russo-Turkish,First Russo-Turkish War of 1828-1829,1,Inter-State War,,
2,7,Mexican-American,Mexican-American War of 1846-1847,1,Inter-State War,,
3,10,Austro-Sardinian,Austro-Sardinian War of 1848-1849,1,Inter-State War,,
4,13,First Schleswig-Holstein,First Schleswig-Holstein War of 1848-1849,1,Inter-State War,,
...,...,...,...,...,...,...,...
649,1574,Rwandan Social Revolution,Rwandan Social Revolution of 1959-1962,8,Non-State War,,
650,1577,Dhofar Rebellion Phase 1,Dhofar Rebellion Phase 1 of 1968-1971,8,Non-State War,,
651,1581,Angola Guerilla War,Angola Guerilla War of 1974-1975,8,Non-State War,,
652,1582,East Timorese War Phase 1,East Timorese War Phase 1 of 1975,8,Non-State War,,


In [76]:
cow_par = cow_par[cow_par['PolityID'].isin(cshapes_cowcodes)]
cow_cut = cow_par[(cow_par['StartDate'] >= '1945-01-01') & (cow_par['StartDate'] <= '2016-06-30')]
cow_merged = cow_cut.merge(cow_con[['WarID', 'WarLongName', 'WarTypeName']], how='left', on=['WarID'])
cow_merged['EndDate'] = cow_merged['EndDate'].fillna('2016-12-31')
cow_merged

Unnamed: 0,WarID,PolityID,StartDate,StartDate_Prec,EndDate,EndDate_Prec,Side,IsInitiator,Outcome,Deaths,WarLongName,WarTypeName
0,139,712,1945-08-10,Day,1945-08-14,Day,A,0,1,3000.0,World War II of 1939-1945,Inter-State War
1,139,365,1945-08-08,Day,1945-08-14,Day,A,0,1,7500000.0,World War II of 1939-1945,Inter-State War
2,147,750,1947-10-26,Day,1949-01-01,Day,A,1,6,2500.0,First Kashmir War of 1947-1949,Inter-State War
3,147,770,1947-10-26,Day,1949-01-01,Day,B,0,6,1000.0,First Kashmir War of 1947-1949,Inter-State War
4,148,666,1948-05-15,Day,1948-07-18,Day,A,0,1,3000.0,Arab-Israeli War of 1948-1949,Inter-State War
...,...,...,...,...,...,...,...,...,...,...,...,...
446,938,531,2006-10-19,Day,2008-06-11,Day,B,0,2,,Third Somalia War of 2006-2008,Intra-State War
447,940,780,2006-10-11,Day,2016-12-31,Ongoing,A,1,5,,Second Sri Lanka Tamil War of 2006-present,Intra-State War
448,941,679,2007-01-29,Day,2007-06-16,Day,A,0,6,1500.0,Second Yemeni Cleric War of 2007,Intra-State War
449,1573,732,1948-04-03,Day,1949-05-01,Month,B,0,1,,Cheju Rebellion of 1948-1949,Non-State War


In [79]:
cow_war_ts = pd.concat([pd.DataFrame({'month': pd.date_range(row.StartDate, row.EndDate, freq='MS'),
                                        'cow_id': row.PolityID,
                                        'WarID': row.WarID,
                                        'WarName': row.WarLongName, 
                                        'WarType': row.WarTypeName}, 
                                columns=['month', 'cow_id', 'WarID', 'WarName', 'WarType']) 
                           for i, row in cow_merged.iterrows()], ignore_index=True)
cow_war_ts = cow_war_ts[cow_war_ts['month'] <= '2016-06-01']
cow_war_ts

Unnamed: 0,month,cow_id,WarID,WarName,WarType
0,1947-11-01,750,147,First Kashmir War of 1947-1949,Inter-State War
1,1947-12-01,750,147,First Kashmir War of 1947-1949,Inter-State War
2,1948-01-01,750,147,First Kashmir War of 1947-1949,Inter-State War
3,1948-02-01,750,147,First Kashmir War of 1947-1949,Inter-State War
4,1948-03-01,750,147,First Kashmir War of 1947-1949,Inter-State War
...,...,...,...,...,...
14928,1971-06-01,698,1577,Dhofar Rebellion Phase 1 of 1968-1971,Non-State War
14929,1971-07-01,698,1577,Dhofar Rebellion Phase 1 of 1968-1971,Non-State War
14930,1971-08-01,698,1577,Dhofar Rebellion Phase 1 of 1968-1971,Non-State War
14931,1971-09-01,698,1577,Dhofar Rebellion Phase 1 of 1968-1971,Non-State War


## Import UCDP/PRIO conflict data

In [31]:
ucdp_par = pd.read_csv("./Data/UCDP-PRIO_ArmedConflict/participants_gw.csv")
ucdp_obs = pd.read_csv("./Data/UCDP-PRIO_ArmedConflict/observations.csv")
ucdp_con = pd.read_csv("./Data/UCDP-PRIO_ArmedConflict/conflicts.csv")

In [33]:
ucdp_merged = ucdp_par.merge(ucdp_obs[['obs_id', 'intensity_level', 'cumulative_intensity']], on=['obs_id']) \
                      .merge(ucdp_con[['conflict_id', 'type_of_conflict']], on=['conflict_id'])
ucdp_merged

Unnamed: 0,obs_id,gw_id,side,role,conflict_id,year,intensity_level,cumulative_intensity,type_of_conflict
0,200-1946,145,A,primary,200,1946,War,War,Internal
1,200-1952,145,A,primary,200,1952,Minor,War,Internal
2,200-1967,145,A,primary,200,1967,Minor,War,Internal
3,201-1946,220,A,primary,201,1946,Minor,Minor,Extrasystemic
4,201-1947,220,A,primary,201,1947,Minor,Minor,Extrasystemic
...,...,...,...,...,...,...,...,...,...
4442,14275-2017,840,A,primary,14275,2017,War,War,Internal
4443,14275-2018,840,A,primary,14275,2018,Minor,War,Internal
4444,14333-2016,616,A,primary,14333,2016,Minor,Minor,Internal
4445,14609-2018,630,A,primary,14609,2018,Minor,Minor,Interstate


In [38]:
ucdp_merged = ucdp_merged[ucdp_merged['gw_id'].isin(cshapes_gwcodes)]