In [2]:
import pandas as pd

# The class, the object, the data and the methods

## Defining the class

In [3]:
class CountryLevelDataTransformation:
    def loading_the_manually_coded_data(self):
        self.codes = pd.read_csv('./country_level_source_data/manually_coded/manually_coded_variables.csv')
        
    def loading_the_country_level_predictors(self):
        self.hdi = pd.read_csv('./country_level_source_data/human_development_index/human-development-index.csv')
        self.vdem = pd.read_csv('./country_level_source_data/varieties_of_democracy/V-Dem-CY-Core-v13.csv')
        self.population = pd.read_csv('./country_level_source_data/population/population.csv')
        
    def extracted_hdi(self):
        self.extracted_hdi = self.hdi.loc[self.hdi["Year"] == 2000, ["Code", "Human Development Index"]]
        
    def extracted_vdem(self):
        self.extracted_vdem = self.vdem.loc[self.vdem["year"] == 2000, ["country_text_id", "v2x_libdem"]]
        
    def extracted_population(self):
        self.extracted_population = self.population.loc[self.population["Year"] == 2010, ["Code", "Population"]]
        
    def changing_the_column_names(self):
        self.extracted_hdi.columns = ['iso_alpha3', 'hdi2010']
        self.extracted_vdem.columns = ['iso_alpha3', 'v2x_libdem']
        self.extracted_population.columns = ['iso_alpha3', 'population']
        
    def merging_country_data(self):
        self.country_data = pd.merge(self.codes, self.extracted_hdi, on ='iso_alpha3')
        self.country_data = pd.merge(self.country_data, self.extracted_vdem, on ='iso_alpha3')
        self.country_data = pd.merge(self.country_data, self.extracted_population, on ='iso_alpha3')        
        
    def saving_country_data(self):
        self.country_data.to_csv('./country_level_processed_data/country_predictors.csv', sep=',', index=False, encoding='utf-8')

## Instantiating the class

In [4]:
CountryLevelDataTransformer = CountryLevelDataTransformation()

## Caling the methods

In [5]:
CountryLevelDataTransformer.loading_the_manually_coded_data()

In [6]:
CountryLevelDataTransformer.loading_the_country_level_predictors()

In [7]:
CountryLevelDataTransformer.extracted_hdi()

In [8]:
CountryLevelDataTransformer.extracted_vdem()

In [9]:
CountryLevelDataTransformer.extracted_population()

In [10]:
CountryLevelDataTransformer.changing_the_column_names()

In [11]:
CountryLevelDataTransformer.merging_country_data()

In [12]:
CountryLevelDataTransformer.saving_country_data()

# The results

## The loading_the_manually_coded_data() method

In [13]:
CountryLevelDataTransformer.codes

Unnamed: 0,iso_numeric,iso_alpha_2,iso_alpha3,country_name,authoritarian_socialism,culture_zone
0,8,AL,ALB,Albania,1.0,0.00
1,12,DZ,DZA,Algeria,0.5,0.00
2,20,AD,AND,Andorra,0.0,0.66
3,31,AZ,AZE,Azerbaijan,1.0,0.00
4,32,AR,ARG,Argentina,0.0,0.33
...,...,...,...,...,...,...
113,862,VE,VEN,Venezuela,0.5,0.33
114,887,YE,YEM,Yemen,0.0,0.00
115,894,ZM,ZMB,Zambia,0.5,0.00
116,909,,,,,


## The loading_the_country_level_predictors() method

In [14]:
CountryLevelDataTransformer.hdi

Unnamed: 0,Entity,Code,Year,Human Development Index
0,Afghanistan,AFG,1990,0.273
1,Afghanistan,AFG,1991,0.279
2,Afghanistan,AFG,1992,0.287
3,Afghanistan,AFG,1993,0.297
4,Afghanistan,AFG,1994,0.292
...,...,...,...,...
5918,Zimbabwe,ZWE,2017,0.594
5919,Zimbabwe,ZWE,2018,0.602
5920,Zimbabwe,ZWE,2019,0.601
5921,Zimbabwe,ZWE,2020,0.600


In [15]:
CountryLevelDataTransformer.vdem

Unnamed: 0,country_name,country_text_id,country_id,year,historical_date,project,historical,histname,codingstart,codingend,...,v2xme_altinf_sd,v2xps_party,v2xps_party_codelow,v2xps_party_codehigh,v2x_divparctrl,v2x_feduni,v2xca_academ,v2xca_academ_codelow,v2xca_academ_codehigh,v2xca_academ_sd
0,Mexico,MEX,3,1789,1789-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.670,,,,,0.000,,,,
1,Mexico,MEX,3,1790,1790-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.670,,,,,0.000,,,,
2,Mexico,MEX,3,1791,1791-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.670,,,,,0.000,,,,
3,Mexico,MEX,3,1792,1792-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.670,,,,,0.000,,,,
4,Mexico,MEX,3,1793,1793-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.670,,,,,0.000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27550,Piedmont-Sardinia,SPD,373,1857,1857-12-31,1,1,Kingdom of Piedmont-Sardinia,1789,1861,...,0.663,0.142,0.037,0.333,,0.109,,,,
27551,Piedmont-Sardinia,SPD,373,1858,1858-12-31,1,1,Kingdom of Piedmont-Sardinia,1789,1861,...,0.663,0.142,0.037,0.333,,0.109,,,,
27552,Piedmont-Sardinia,SPD,373,1859,1859-12-31,1,1,Kingdom of Piedmont-Sardinia [incorporates Lom...,1789,1861,...,0.663,0.142,0.037,0.333,,0.109,,,,
27553,Piedmont-Sardinia,SPD,373,1860,1860-12-31,1,1,,1789,1861,...,0.663,0.142,0.037,0.333,,0.096,,,,


In [16]:
CountryLevelDataTransformer.population

Unnamed: 0,Entity,Code,Year,Population
0,Afghanistan,AFG,-10000,14737
1,Afghanistan,AFG,-9000,20405
2,Afghanistan,AFG,-8000,28253
3,Afghanistan,AFG,-7000,39120
4,Afghanistan,AFG,-6000,54166
...,...,...,...,...
58094,Zimbabwe,ZWE,2017,14751101
58095,Zimbabwe,ZWE,2018,15052191
58096,Zimbabwe,ZWE,2019,15354606
58097,Zimbabwe,ZWE,2020,15669663


## The extracted_hdi(), extracted_vdem() and changing_the_column_names() methods

In [17]:
CountryLevelDataTransformer.extracted_hdi

Unnamed: 0,iso_alpha3,hdi2010
10,AFG,0.335
42,ALB,0.677
74,DZA,0.649
96,AND,0.818
119,AGO,0.375
...,...,...
5773,VNM,0.588
5805,OWID_WRL,0.645
5837,YEM,0.450
5869,ZMB,0.418


In [18]:
CountryLevelDataTransformer.extracted_vdem

Unnamed: 0,iso_alpha3,v2x_libdem
211,MEX,0.480
334,SUR,0.678
568,SWE,0.880
793,CHE,0.839
914,GHA,0.573
...,...,...
25526,SLB,0.245
25649,VUT,0.511
25701,ARE,0.073
25966,HUN,0.756


In [19]:
CountryLevelDataTransformer.extracted_population

Unnamed: 0,iso_alpha3,population
247,AFG,28189672
506,,1055233717
578,,1055233408
840,ALB,2913402
1099,DZA,35856348
...,...,...
56928,ESH,413307
57187,OWID_WRL,6985603072
57446,YEM,24743944
57828,ZMB,13792095


## The merging_country_data() method

In [20]:
CountryLevelDataTransformer.country_data

Unnamed: 0,iso_numeric,iso_alpha_2,iso_alpha3,country_name,authoritarian_socialism,culture_zone,hdi2010,v2x_libdem,population
0,8,AL,ALB,Albania,1.0,0.00,0.677,0.341,2913402
1,12,DZ,DZA,Algeria,0.5,0.00,0.649,0.162,35856348
2,31,AZ,AZE,Azerbaijan,1.0,0.00,0.622,0.084,9237203
3,32,AR,ARG,Argentina,0.0,0.33,0.779,0.662,41100124
4,36,AU,AUS,Australia,0.0,1.00,0.896,0.849,22019166
...,...,...,...,...,...,...,...,...,...
101,858,UY,URY,Uruguay,0.0,0.33,0.753,0.801,3352653
102,860,UZ,UZB,Uzbekistan,1.0,0.00,0.607,0.035,28614234
103,862,VE,VEN,Venezuela,0.5,0.33,0.684,0.312,28715026
104,887,YE,YEM,Yemen,0.0,0.00,0.450,0.138,24743944


## The saving_country_data() method

In [21]:
saved_data = pd.read_csv('./country_level_processed_data/country_predictors.csv')

In [22]:
saved_data

Unnamed: 0,iso_numeric,iso_alpha_2,iso_alpha3,country_name,authoritarian_socialism,culture_zone,hdi2010,v2x_libdem,population
0,8,AL,ALB,Albania,1.0,0.00,0.677,0.341,2913402
1,12,DZ,DZA,Algeria,0.5,0.00,0.649,0.162,35856348
2,31,AZ,AZE,Azerbaijan,1.0,0.00,0.622,0.084,9237203
3,32,AR,ARG,Argentina,0.0,0.33,0.779,0.662,41100124
4,36,AU,AUS,Australia,0.0,1.00,0.896,0.849,22019166
...,...,...,...,...,...,...,...,...,...
101,858,UY,URY,Uruguay,0.0,0.33,0.753,0.801,3352653
102,860,UZ,UZB,Uzbekistan,1.0,0.00,0.607,0.035,28614234
103,862,VE,VEN,Venezuela,0.5,0.33,0.684,0.312,28715026
104,887,YE,YEM,Yemen,0.0,0.00,0.450,0.138,24743944
