# Create POLITY table

In CoW, nonstate actors do not have a unique identifier. In order to have a normalized table structure for the CoW data, they need to be assigned one. 

In [1]:
import pandas as pd
import numpy as np

In [2]:
dfStates = pd.read_csv('../Data/CoW/Raw/states2016.csv', encoding='utf-8')
dfNonStateWarEntities = pd.read_csv('../Data/CoW/Raw/Non-StateWarData_v4.0.csv', usecols=['SideA1', 'SideA2', 'SideB1', 'SideB2', 'SideB3', 'SideB4', 'SideB5'], encoding='utf-8', na_values=[-7, -8, -9])
dfIntraStateWarEntities = pd.read_csv('../Data/CoW/Raw/Intra-StateWarData_v4.1.csv', usecols=['CcodeA', 'SideA', 'CcodeB', 'SideB'], encoding='latin-1', na_values=[-7, -8, -9])
dfExtraStateWarEntities = pd.read_csv('../Data/CoW/Raw/Extra-StateWarData_v4.0.csv', usecols=['ccode1', 'SideA', 'ccode2', 'SideB'], encoding='latin-1', na_values=[-7, -8, -9])

In [4]:
dfStatesPOL = dfStates[['stateabb', 'ccode', 'statenme']].drop_duplicates() \
              .rename(columns={'stateabb':'StateAbbr', 'ccode':'PolityID', 'statenme':'PolityName'})
dfStatesPOL['PolityType'] = 'State'
dfStatesPOL = dfStatesPOL[['PolityID', 'PolityName', 'PolityType', 'StateAbbr']]
dfStatesPOL

Unnamed: 0,PolityID,PolityName,PolityType,StateAbbr
0,2,United States of America,State,USA
1,20,Canada,State,CAN
2,31,Bahamas,State,BHM
3,40,Cuba,State,CUB
5,41,Haiti,State,HAI
...,...,...,...,...
238,970,Nauru,State,NAU
239,983,Marshall Islands,State,MSI
240,986,Palau,State,PAL
241,987,Federated States of Micronesia,State,FSM


In [5]:
IOrows = [(0, 'League of Nations', 'International Organization', ''),
          (1, 'United Nations', 'International Organization', '')]
dfIOrows = pd.DataFrame(IOrows, columns=['PolityID', 'PolityName', 'PolityType', 'StateAbbr'])

dfPolity = pd.concat([dfIOrows, dfStatesPOL])
dfPolity

Unnamed: 0,PolityID,PolityName,PolityType,StateAbbr
0,0,League of Nations,International Organization,
1,1,United Nations,International Organization,
0,2,United States of America,State,USA
1,20,Canada,State,CAN
2,31,Bahamas,State,BHM
...,...,...,...,...
238,970,Nauru,State,NAU
239,983,Marshall Islands,State,MSI
240,986,Palau,State,PAL
241,987,Federated States of Micronesia,State,FSM


In [9]:
dfNSWE_A1 = dfNonStateWarEntities[['SideA1']].rename(columns={'SideA1':'PolityName'})
dfNSWE_A2 = dfNonStateWarEntities[['SideA2']].rename(columns={'SideA2':'PolityName'})
dfNSWE_B1 = dfNonStateWarEntities[['SideB1']].rename(columns={'SideB1':'PolityName'})
dfNSWE_B2 = dfNonStateWarEntities[['SideB2']].rename(columns={'SideB2':'PolityName'})
dfNSWE_B3 = dfNonStateWarEntities[['SideB3']].rename(columns={'SideB3':'PolityName'})
dfNSWE_B4 = dfNonStateWarEntities[['SideB4']].rename(columns={'SideB4':'PolityName'})
dfNSWE_B5 = dfNonStateWarEntities[['SideB5']].rename(columns={'SideB5':'PolityName'})

NSWEallsides = [dfNSWE_A1, dfNSWE_A2, dfNSWE_B1, dfNSWE_B2, dfNSWE_B3, dfNSWE_B4, dfNSWE_B5]
dfNSWEallsides = pd.concat(NSWEallsides).dropna()
dfNSWEallsides['PolityName'] = dfNSWEallsides['PolityName'].str.strip()
dfNSWEallsides = dfNSWEallsides.drop_duplicates().reset_index(drop=True)
dfNSWEallsides

Unnamed: 0,PolityName
0,Te Rauparaha's Ngati Toa
1,Shaka Zulu
2,Burma
3,Buenos Aires
4,Hongi Hika's Nga Phuhi
...,...
113,Waikato
114,Waikato River Maori
115,Ngati Ira
116,Te Arawa


In [10]:
dfISWE_A = dfIntraStateWarEntities[['CcodeA', 'SideA']].rename(columns={'CcodeA':'PolityID', 'SideA':'PolityName'})
dfISWE_B = dfIntraStateWarEntities[['CcodeB', 'SideB']].rename(columns={'CcodeB':'PolityID', 'SideB':'PolityName'})

dfISWEallsides = pd.concat([dfISWE_A, dfISWE_B]).dropna(subset=['PolityName'])
dfISWEallsides['PolityName'] = dfISWEallsides['PolityName'].str.strip()
dfISWEallsides = dfISWEallsides.drop_duplicates()
dfISWEallsides

Unnamed: 0,PolityID,PolityName
0,365.0,Russia
1,,Sidon
2,300.0,Austria
3,329.0,Two Sicilies
4,230.0,Spain
...,...,...
434,,MILF & NPA
435,,FUDC
437,,SCIC
439,531.0,Eritrea


In [11]:
dfESWE_A = dfExtraStateWarEntities[['ccode1', 'SideA']].rename(columns={'ccode1':'PolityID', 'SideA':'PolityName'})
dfESWE_B = dfExtraStateWarEntities[['ccode2', 'SideB']].rename(columns={'ccode2':'PolityID', 'SideB':'PolityName'})

dfESWEallsides = pd.concat([dfESWE_A, dfESWE_B]).dropna(subset=['PolityName'])
dfESWEallsides['PolityName'] = dfESWEallsides['PolityName'].str.strip()
dfESWEallsides = dfESWEallsides.drop_duplicates()
dfESWEallsides

Unnamed: 0,PolityID,PolityName
0,210.0,Netherlands
1,200.0,United Kingdom
2,640.0,Ottoman Empire
3,230.0,Spain
16,140.0,Brazil
...,...,...
174,,Khmer Rouge
176,,Muhajadin
178,,PKK in Iraq
186,,al-Qaeda & Taliban


In [18]:
dfWarEntities = pd.concat([dfESWEallsides, dfISWEallsides, dfNSWEallsides], sort=True)
dfWarEntities = dfWarEntities[dfWarEntities['PolityID'].isna()] \
                .drop_duplicates() \
                .reset_index(drop=True)
dfWarEntities['PolityType'] = 'NonState Actor'
dfWarEntities

Unnamed: 0,PolityID,PolityName,PolityType
0,,Algeria,NonState Actor
1,,Saudi Wahhabis,NonState Actor
2,,San Martin revolutionaries,NonState Actor
3,,New Granada,NonState Actor
4,,Mina Expedition,NonState Actor
...,...,...,...
496,,Waikato,NonState Actor
497,,Waikato River Maori,NonState Actor
498,,Ngati Ira,NonState Actor
499,,Te Arawa,NonState Actor


In [22]:
start = 10000
nsa_ids = np.arange(start, start+dfWarEntities.shape[0])
dfWarEntities['PolityID'] = nsa_ids
dfWarEntities

Unnamed: 0,PolityID,PolityName,PolityType
0,10000,Algeria,NonState Actor
1,10001,Saudi Wahhabis,NonState Actor
2,10002,San Martin revolutionaries,NonState Actor
3,10003,New Granada,NonState Actor
4,10004,Mina Expedition,NonState Actor
...,...,...,...
496,10496,Waikato,NonState Actor
497,10497,Waikato River Maori,NonState Actor
498,10498,Ngati Ira,NonState Actor
499,10499,Te Arawa,NonState Actor


In [24]:
dfPolity = pd.concat([dfPolity, dfWarEntities], sort=True).reset_index(drop=True)
dfPolity

Unnamed: 0,PolityID,PolityName,PolityType,StateAbbr
0,0,League of Nations,International Organization,
1,1,United Nations,International Organization,
2,2,United States of America,State,USA
3,20,Canada,State,CAN
4,31,Bahamas,State,BHM
...,...,...,...,...
1216,10496,Waikato,NonState Actor,
1217,10497,Waikato River Maori,NonState Actor,
1218,10498,Ngati Ira,NonState Actor,
1219,10499,Te Arawa,NonState Actor,


In [25]:
dfPolity.to_csv("../Data/CoW/Wrangled/polities.csv", index=False, encoding='utf-8')