# Data Prepping Our World in Data
This notebook adds data on terrain ruggedness obtained from Our World in Data. The values are added manually as this appeared to be the easiest way. Our World in Data obtains the original data from this source: https://diegopuga.org/data/rugged/. This variable contains constant values for all years since terrain changes only very slowly. The data were originally gathered with regards to 1996. 

In [1]:
### Load libraries -------
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import numpy as np
import os

# Pandas parameters
#pd.set_option('display.max_colwidth', -1)
#pd.set_option('display.max_rows', 2000)
#pd.set_option('display.max_columns', 500)

In [2]:
### Define plot parameters ------
plt.rcParams["font.family"] = "garamond"
plt.rcParams.update({'font.size': 22})
rcParams.update({'figure.autolayout': True})

In [3]:
### Define out paths ------

# check if out path exists, if it does not exists add the path
if not os.path.exists('C:\\Users\\frank\\Desktop\\master_thesis_out'):
    os.makedirs('C:\\Users\\frank\\Desktop\\master_thesis_out')

# add subfolders for the out path
out_paths = {
    "desciptive_plots_outcome": 'C:\\Users\\frank\\Desktop\\master_thesis_out\desciptive_plots_outcome',
    "desciptive_tables_outcome": 'C:\\Users\\frank\\Desktop\\master_thesis_out\desciptive_tables_outcome',
    "data": 'C:\\Users\\frank\\Desktop\\master_thesis_out\data'
}

for key, val in out_paths.items():
    if not os.path.exists(val):
        os.makedirs(val)
        
out_paths["desciptive_tables_outcome"]

'C:\\Users\\frank\\Desktop\\master_thesis_out\\desciptive_tables_outcome'

In [4]:
### Load data --------
ucdp_forge_wb_qog_polity = pd.read_csv(os.path.join(out_paths["data"], "ucdp_forge_wb_qog_polity.csv"))
ucdp_forge_wb_qog_polity.head(4)
len(ucdp_forge_wb_qog_polity)

2065

In [5]:
### Generate mountainous terrain indicator - Terrain Ruggedness Index ------------
# Source of data: https://ourworldindata.org/grapher/terrain-ruggedness-index?tab=table
countries=list(ucdp_forge_wb_qog_polity["cname"].unique())
countries

# Generate empty variable
NaN = np.nan
ucdp_forge_wb_qog_polity["terrain"] =  NaN

In [6]:
### List of countries in dataset ------
ucdp_forge_wb_qog_polity['cname'].unique()

array(['Iran', 'Philippines', 'Paraguay', 'Myanmar', 'India', 'Yemen',
       'Guatemala', 'Israel', 'Iraq', 'Lebanon', 'Laos', 'Ethiopia',
       'Nepal', 'Venezuela', 'DRC', 'Burundi', 'Chad', 'Colombia',
       'Indonesia', 'Peru', 'Nigeria', 'Cambodia', 'Guinea', 'Sudan',
       'Sri Lanka', 'Uganda', 'UK', 'El Salvador', 'Bangladesh',
       'Pakistan', 'Eritrea', 'Angola', 'Morocco', 'Mozambique',
       'Afghanistan', 'Nicaragua', 'Somalia', 'Liberia', 'Spain',
       'Turkey', 'Comoros', 'Panama', 'Papua New Guinea', 'Romania',
       'Mali', 'Niger', 'Rwanda', 'Senegal', 'USSR',
       'Trinidad and Tobago', 'Djibouti', 'Georgia', 'Haiti',
       'Sierra Leone', 'Yugoslavia', 'Algeria', 'Azerbaijan',
       'Bosnia and Herzegovina', 'Croatia', 'Egypt', 'Moldova',
       'Tajikistan', 'Russia', 'Mexico', 'Congo-Brazzaville',
       'Guinea-Bissau', 'Lesotho', 'Uzbekistan',
       'Central African Republic', 'Macedonia', 'Ivory Coast', 'Thailand',
       'Mauritania', 'Libya'], 

In [7]:
### Add terrain values manually from source --------------
# https://ourworldindata.org/grapher/terrain-ruggedness-index?tab=table
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Iraq', 'terrain'] = 67.00 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Afghanistan', 'terrain'] = 251.80 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Tajikistan', 'terrain'] = 530.10
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Pakistan', 'terrain'] = 194.90
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Yugoslavia', 'terrain'] = 167.30 # Add value for Serbia
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Algeria', 'terrain'] = 51.00
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Angola', 'terrain'] = 85.80
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Israel', 'terrain'] = 170.50 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Azerbaijan', 'terrain'] = 167.20 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Myanmar', 'terrain'] = 198.80
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Bangladesh', 'terrain'] = 18.60 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'India', 'terrain'] = 101.30
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Bosnia and Herzegovina', 'terrain'] = 231.10
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Croatia', 'terrain'] = 126.70
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Burundi', 'terrain'] = 178.00 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Rwanda', 'terrain'] = 330.90
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Cambodia', 'terrain'] = 54.00 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Nigeria', 'terrain'] = 31.20 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Uganda', 'terrain'] = 91.30
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Central African Republic', 'terrain'] = 19.70
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Chad', 'terrain'] = 41.90
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Sudan', 'terrain'] = 44.20 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Colombia', 'terrain'] = 88.50
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Comoros', 'terrain'] = 332.80 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Congo-Brazzaville', 'terrain'] = 15.20 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Somalia', 'terrain'] = 65.00
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Djibouti', 'terrain'] = 243.20
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'DRC', 'terrain'] = 44.30
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Egypt', 'terrain'] = 72.30
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'El Salvador', 'terrain'] = 175.00 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Eritrea', 'terrain'] = 248.10 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Ethiopia', 'terrain'] = 157.00 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Spain', 'terrain'] = 168.90
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Sri Lanka', 'terrain'] = 65.00 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Georgia', 'terrain'] = 365.90
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Russia', 'terrain'] = 94.00
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Iran', 'terrain'] = 244.50 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'UK', 'terrain'] = 56.80
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Guatemala', 'terrain'] = 180.70
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Guinea', 'terrain'] = 74.00 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Senegal', 'terrain'] = 24.40
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Guinea-Bissau', 'terrain'] = 49.10
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Haiti', 'terrain'] = 236.20
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Indonesia', 'terrain'] = 96.70
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Turkey', 'terrain'] = 262.00
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Liberia', 'terrain'] = 26.30
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Ivory Coast', 'terrain'] = 22.40
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Uzbekistan', 'terrain'] = 52.60 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Laos', 'terrain'] = 255.30 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Lebanon', 'terrain'] = 419.70 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Lesotho', 'terrain'] = 620.20
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Sierra Leone', 'terrain'] = 49.80 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Libya', 'terrain'] = 40.30
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Macedonia', 'terrain'] = 266.50 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Philippines', 'terrain'] = 202.80 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Mali', 'terrain'] = 14.70
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Mauritania', 'terrain'] = 11.50 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Mexico', 'terrain'] = 173.20 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Moldova', 'terrain'] = 79.90
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Morocco', 'terrain'] = 241.30
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Mozambique', 'terrain'] = 61.20
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Nepal', 'terrain'] = 504.30
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Nicaragua', 'terrain'] = 99.20
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Panama', 'terrain'] = 154.00
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Niger', 'terrain'] = 17.80
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Papua New Guinea', 'terrain'] = 158.90 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Paraguay', 'terrain'] = 24.50
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Peru', 'terrain'] = 134.70 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Romania', 'terrain'] = 126.70 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'USSR', 'terrain'] = 94.00 # Add value for Russia
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Thailand', 'terrain'] = 105.10 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Trinidad and Tobago', 'terrain'] = 60.30 
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Venezuela', 'terrain'] = 63.40
ucdp_forge_wb_qog_polity.loc[ucdp_forge_wb_qog_polity['cname'] == 'Yemen', 'terrain'] = 232.30

In [8]:
### Check for missing values in terrain variable -----------
missing = ucdp_forge_wb_qog_polity[['year',
 'cname', 'terrain']]
null_data = missing[missing.isnull().any(axis=1)]
null_data

Unnamed: 0,year,cname,terrain


In [9]:
### Which variables are in data -------------
dat_var = []
for col in ucdp_forge_wb_qog_polity.columns:
    dat_var.append(col)
dat_var

['year',
 'StateAbb',
 'ccode',
 'cname',
 'side_a',
 'side_a_new_id',
 'side_b',
 'side_b_new_id',
 'dyad_name',
 'dyad_new_id',
 'active_year',
 'best',
 'high',
 'low',
 'foundloc',
 'foundyear',
 'foundmo',
 'foundday',
 'fightyear',
 'fightmo',
 'fightday',
 'goalnominal',
 'goalindep',
 'goalauto',
 'goalrights',
 'goalrep',
 'goalchange',
 'goaldem',
 'goalother',
 'goalnote',
 'ideology',
 'ideolcom',
 'ideolleft',
 'ideolright',
 'ideolnat',
 'ideolanti',
 'ideolrel',
 'ideoloth',
 'ideolnote',
 'religious',
 'religion',
 'ethnic',
 'ethnicity',
 'preorg',
 'preorgno',
 'preorgreb',
 'preorgter',
 'preorgpar',
 'preorgmvt',
 'preorgyou',
 'preorglab',
 'preorgmil',
 'preorggov',
 'preorgfmr',
 'preorgrel',
 'preorgfor',
 'preorgref',
 'preorgeth',
 'preorgoth',
 'preorgname',
 'merger',
 'splinter',
 'splinterUCDP',
 'foundloc_cat',
 'foundloc_cat_lab',
 'foundloc_cat_cat',
 'foundloc_cat_cat_lab',
 'age_formation',
 'age_active',
 'goal_territory',
 'goal_gov_represent',
 'go

In [10]:
### Reset index -----------
ucdp_forge_wb_qog_polity.reset_index(drop=False, inplace=False)
ucdp_forge_wb_qog_polity.head(3)

Unnamed: 0,year,StateAbb,ccode,cname,side_a,side_a_new_id,side_b,side_b_new_id,dyad_name,dyad_new_id,...,ccode_wb,oil,forest_cov,rural,internet_use,gov_qual,milexp_in,milper_in,polity2,terrain
0,1990,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,IRN,21.474983,5.572374,43.67,0.0,0.305556,1.84,0.52,-6.0,244.5
1,1991,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,IRN,,5.587699,43.135,0.0,0.37037,1.52,0.52,-6.0,244.5
2,1992,IRN,630,Iran,Government of Iran,114,KDPI,164,Government of Iran - KDPI,406,...,IRN,,5.603023,42.347,0.0,0.467593,1.34,0.51,-6.0,244.5


In [11]:
### Save dataset -----------
print(len(ucdp_forge_wb_qog_polity))
ucdp_forge_wb_qog_polity.to_csv(os.path.join(out_paths["data"], "ucdp_forge_wb_qog_polity_owid.csv"), index=False, sep=',')

2065
