# Data Prepping Quality of Government
This notebook obtains indicators from the Quality of Government Standard Datasets and adds them to the existing dataset containing variables from UCDP, FORGE and the World Bank. The following variables are obtained from the Quality of Government data: quality of government, military expenditure index and military personnel index. The variables have missing values and need to be imputed.

In [1]:
### Load libraries -------
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import numpy as np
import os

# Pandas parameters
#pd.set_option('display.max_colwidth', -1)
#pd.set_option('display.max_rows', 2000)
#pd.set_option('display.max_columns', 500)

In [2]:
### Define plot parameters ------
plt.rcParams["font.family"] = "garamond"
plt.rcParams.update({'font.size': 22})
rcParams.update({'figure.autolayout': True})

In [3]:
### Define out paths ------

# check if out path exists, if it does not exists add the path
if not os.path.exists('C:\\Users\\frank\\Desktop\\master_thesis_out'):
    os.makedirs('C:\\Users\\frank\\Desktop\\master_thesis_out')

# add subfolders for the out path
out_paths = {
    "desciptive_plots_outcome": 'C:\\Users\\frank\\Desktop\\master_thesis_out\desciptive_plots_outcome',
    "desciptive_tables_outcome": 'C:\\Users\\frank\\Desktop\\master_thesis_out\desciptive_tables_outcome',
    "data": 'C:\\Users\\frank\\Desktop\\master_thesis_out\data'
}

for key, val in out_paths.items():
    if not os.path.exists(val):
        os.makedirs(val)
        
out_paths["desciptive_tables_outcome"]

'C:\\Users\\frank\\Desktop\\master_thesis_out\\desciptive_tables_outcome'

In [4]:
### Load data --------
ucdp_forge_wb = pd.read_csv(os.path.join(out_paths["data"], "ucdp_forge_wb.csv"))
ucdp_forge_wb.head(4)
len(ucdp_forge_wb)

2065

In [5]:
### Check existing cow country codes ----
# check manually with home page: https://correlatesofwar.org/data-sets/cow-country-codes
con_codes = ucdp_forge_wb.groupby(["StateAbb", 'ccode_wb', 'cname', 'ccode']).size().reset_index().rename(columns={0:'count'})
con_codes.head(3)

Unnamed: 0,StateAbb,ccode_wb,cname,ccode,count
0,AFG,AFG,Afghanistan,700,64
1,ALG,DZA,Algeria,615,41
2,ANG,AGO,Angola,540,60


In [6]:
### Inconsistency in existing cow country codes -----
ucdp_forge_wb[["year", 'cname', 'ccode']].loc[ucdp_forge_wb["ccode"]==678] # this should be Yemen 679
# 678 refers to Yemen Arab Republic

Unnamed: 0,year,cname,ccode
105,2009,Yemen,678
106,2010,Yemen,678
107,2011,Yemen,678
1308,1994,Yemen,678


In [7]:
### Change cow code for Yemen manually ------
ucdp_forge_wb.loc[ucdp_forge_wb['ccode'] == 678, 'ccode'] = 679
ucdp_forge_wb[["year", 'cname', 'ccode']].loc[ucdp_forge_wb["ccode"]==679]

Unnamed: 0,year,cname,ccode
105,2009,Yemen,679
106,2010,Yemen,679
107,2011,Yemen,679
1308,1994,Yemen,679


In [8]:
### Load QoG Standard Dataset version Jan21 ----------
# Codebook: https://www.qogdata.pol.gu.se/data/codebook_std_jan21.pdf

qog = pd.read_csv("https://www.qogdata.pol.gu.se/data/qog_std_ts_jan21.csv", low_memory=False)
qog.head()

Unnamed: 0,ccode,cname,year,ccodealp,cname_year,ccodealp_year,ccodecow,ccodewb,version,aid_cpnc,...,wwbi_rrespubpemp,wwbi_sprpempn,wwbi_sprpempp,wwbi_sprpemps,wwbi_sprpempt,wwbi_spupempn,wwbi_spupempp,wwbi_spupemps,wwbi_spupempt,wwbi_tertiarypubsec
0,4,Afghanistan,1946,AFG,Afghanistan 1946,AFG46,700.0,4.0,QoGStdTSJan21,,...,,,,,,,,,,
1,4,Afghanistan,1947,AFG,Afghanistan 1947,AFG47,700.0,4.0,QoGStdTSJan21,,...,,,,,,,,,,
2,4,Afghanistan,1948,AFG,Afghanistan 1948,AFG48,700.0,4.0,QoGStdTSJan21,,...,,,,,,,,,,
3,4,Afghanistan,1949,AFG,Afghanistan 1949,AFG49,700.0,4.0,QoGStdTSJan21,,...,,,,,,,,,,
4,4,Afghanistan,1950,AFG,Afghanistan 1950,AFG50,700.0,4.0,QoGStdTSJan21,,...,,,,,,,,,,


In [9]:
### Check cow country codes in qog data ----
# check manually with home page: https://correlatesofwar.org/data-sets/cow-country-codes
qog_codes = qog.groupby(["cname", "ccodecow"]).size().reset_index().rename(columns={0:'count'})
qog_codes.head(3)

Unnamed: 0,cname,ccodecow,count
0,Afghanistan,700.0,75
1,Albania,339.0,75
2,Algeria,615.0,75


In [10]:
### Subset qog data to only include variables which are needed ----
qog_s = qog[["ccodecow", "year", "icrg_qog", "bicc_milexp", "bicc_milper"]]
qog_s = qog_s.rename(columns={'ccodecow': 'ccode_qog', 'year': 'year', 'icrg_qog': 'gov_qual', 
                              'bicc_milexp': 'milexp_in', 'bicc_milper': 'milper_in'})
qog_s.head(3)

Unnamed: 0,ccode_qog,year,gov_qual,milexp_in,milper_in
0,700.0,1946,,,
1,700.0,1947,,,
2,700.0,1948,,,


In [11]:
### Merge qog variables with existing dataset (how = left) --------
ucdp_forge_wb_qog = pd.merge(ucdp_forge_wb, qog_s, how='left', left_on=['year', 'ccode'], right_on=['year', 'ccode_qog'])
ucdp_forge_wb_qog.head(3)

Unnamed: 0,year,StateAbb,ccode,cname,side_a,side_a_new_id,side_b,side_b_new_id,dyad_name,dyad_new_id,...,parent_merger,ccode_wb,oil,forest_cov,rural,internet_use,ccode_qog,gov_qual,milexp_in,milper_in
0,1990,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,0,IRN,21.474983,5.572374,43.67,0.0,630.0,0.305556,1.84,0.52
1,1991,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,0,IRN,,5.587699,43.135,0.0,630.0,0.37037,1.52,0.52
2,1992,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,0,IRN,,5.603023,42.347,0.0,630.0,0.467593,1.34,0.51


In [12]:
### Check if merge was successful ---------
codes = ucdp_forge_wb_qog.groupby(["ccode_qog", 'ccode']).size()
ucdp_forge_wb["ccode"].unique()
len(codes)

73

In [13]:
### Delete merge variable ----
ucdp_forge_wb_qog = ucdp_forge_wb_qog.drop(['ccode_qog'], 1)
len(ucdp_forge_wb_qog)

2065

In [14]:
### Which variables are in data -------------
dat_var = []
for col in ucdp_forge_wb_qog.columns:
    dat_var.append(col)
dat_var

['year',
 'StateAbb',
 'ccode',
 'cname',
 'side_a',
 'side_a_new_id',
 'side_b',
 'side_b_new_id',
 'dyad_name',
 'dyad_new_id',
 'active_year',
 'best',
 'high',
 'low',
 'foundloc',
 'foundyear',
 'foundmo',
 'foundday',
 'fightyear',
 'fightmo',
 'fightday',
 'goalnominal',
 'goalindep',
 'goalauto',
 'goalrights',
 'goalrep',
 'goalchange',
 'goaldem',
 'goalother',
 'goalnote',
 'ideology',
 'ideolcom',
 'ideolleft',
 'ideolright',
 'ideolnat',
 'ideolanti',
 'ideolrel',
 'ideoloth',
 'ideolnote',
 'religious',
 'religion',
 'ethnic',
 'ethnicity',
 'preorg',
 'preorgno',
 'preorgreb',
 'preorgter',
 'preorgpar',
 'preorgmvt',
 'preorgyou',
 'preorglab',
 'preorgmil',
 'preorggov',
 'preorgfmr',
 'preorgrel',
 'preorgfor',
 'preorgref',
 'preorgeth',
 'preorgoth',
 'preorgname',
 'merger',
 'splinter',
 'splinterUCDP',
 'foundloc_cat',
 'foundloc_cat_lab',
 'foundloc_cat_cat',
 'foundloc_cat_cat_lab',
 'age_formation',
 'age_active',
 'goal_territory',
 'goal_gov_represent',
 'go

In [15]:
### Check missing values in added variables ----
missing = ucdp_forge_wb_qog[['year',
 'cname', 'ccode',
 'gov_qual',
 'milexp_in',
 'milper_in']]
null_data = missing[missing.isnull().any(axis=1)]
null_data.sort_values(by=["cname", 'ccode', "year"]).head(3)

Unnamed: 0,year,cname,ccode,gov_qual,milexp_in,milper_in
762,1989,Afghanistan,700,,,
767,1989,Afghanistan,700,,,
781,1989,Afghanistan,700,,,


In [16]:
### Check countries which are completely missing since this indicates a merging issue ----
qog[["ccodecow", "year", "icrg_qog", "bicc_milexp", "bicc_milper"]].loc[qog["ccodecow"]==346] # Bosnia and Herzegovina, check
qog[["ccodecow", "year", "icrg_qog", "bicc_milexp", "bicc_milper"]].loc[qog["ccodecow"]==530] # Ethiopia, check
qog[["ccodecow", "year", "icrg_qog", "bicc_milexp", "bicc_milper"]].loc[qog["ccodecow"]==625] # Sudan, check
qog[["ccodecow", "year", "icrg_qog", "bicc_milexp", "bicc_milper"]].loc[qog["ccodecow"]==365] # USSR, check
qog[["ccodecow", "year", "icrg_qog", "bicc_milexp", "bicc_milper"]].loc[qog["ccodecow"]==678] # Yemen <--- not in qog data, should be checked (see above)

Unnamed: 0,ccodecow,year,icrg_qog,bicc_milexp,bicc_milper


In [17]:
#### Reset index -----------
ucdp_forge_wb_qog.reset_index(drop=False, inplace=False)
ucdp_forge_wb_qog.head(3)

Unnamed: 0,year,StateAbb,ccode,cname,side_a,side_a_new_id,side_b,side_b_new_id,dyad_name,dyad_new_id,...,parent_civil_society,parent_merger,ccode_wb,oil,forest_cov,rural,internet_use,gov_qual,milexp_in,milper_in
0,1990,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,1,0,IRN,21.474983,5.572374,43.67,0.0,0.305556,1.84,0.52
1,1991,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,1,0,IRN,,5.587699,43.135,0.0,0.37037,1.52,0.52
2,1992,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,1,0,IRN,,5.603023,42.347,0.0,0.467593,1.34,0.51


In [18]:
### Save dataset -----------
ucdp_forge_wb_qog.to_csv(os.path.join(out_paths["data"], "ucdp_forge_wb_qog.csv"), index=False, sep=',')
len(ucdp_forge_wb_qog)

2065