In [1]:
import numpy as np
import pandas as pd
import itertools as it

## Defining the class

In [2]:
class DataTransformationIndividualLevel:
    wvs_path = 'wvs_waves_one_seven.csv'

    evs_path = 'evs_waves_one_five.dta'

    demographics_wvs = ['S003', 'X001', 'X003', 'X025', 'X045', 'X047_WVS', 'X049']

    demographics_evs = ['S003', 'X001', 'X003', 'X025', 'X045', 'X047_EVS', 'X049']

    freedom_names = ['A029', 'A034', 'A040', 'A042', 'E018', 'E001', 'E002', 'E003', 'E004', 'E114', 'E116', 'E117']

    socialism_names = ['E035', 'E036', 'E037', 'E039']

    family_names = ['A124_09', 'F118', 'F119', 'F120', 'F121']

    ethnicity_names = ['A124_02', 'A124_06', 'C002']

    gender_names = ['D059', 'C001', 'D060']

    nationalism_names = ['E012', 'G006']

    religiosity_names = ['A006', 'A040', 'F063', 'F028', 'F034']

    variable_groups = [freedom_names, socialism_names, family_names, ethnicity_names, gender_names, nationalism_names, religiosity_names]

    group_names = ['freedom', 'socialism', 'family', 'ethnicity', 'gender', 'nationalism', 'religiosity']

    def loading_individual_level_data(self, which_path):
        if which_path == 'wvs':
            self.wvs_data = pd.read_csv(self.wvs_path)
            return self.wvs_data
        elif which_path == 'evs':
            self.evs_data = pd.read_stata(self.evs_path, convert_categoricals = False)
            return self.evs_data
        else:
            return 'Error. The name of the dataset was written incorrectly.'
        
    def extracting_individual_level_data(self):
        self.demographics_wvs_data = self.wvs_data[self.demographics_wvs]
        self.demographics_evs_data = self.evs_data[self.demographics_evs]
        
        for (x, y) in zip(self.variable_groups, self.group_names):
            exec(f'self.wvs_{y}_data = self.wvs_data[{x}]')
            exec(f'self.evs_{y}_data = self.evs_data[{x}]')
            
    def extracting_individual_level_data(self):
        self.demographics_wvs_data = self.wvs_data[self.demographics_wvs]
        self.demographics_evs_data = self.evs_data[self.demographics_evs]
        for (x, y) in zip(self.variable_groups, self.group_names):
            exec(f'self.wvs_{y}_data = self.wvs_data[{x}]')
            exec(f'self.evs_{y}_data = self.evs_data[{x}]')
            
    def merging_individual_level_data(self):
        self.demographics_wvs_data.columns = ['S003', 'X001', 'X003', 'X025', 'X045', 'X047_IVS', 'X049']
        self.demographics_evs_data.columns = ['S003', 'X001', 'X003', 'X025', 'X045', 'X047_IVS', 'X049']
        self.wvs_part_names = ['demographics_wvs_data']
        self.evs_part_names = ['demographics_evs_data']
        for x in self.group_names:
            self.wvs_part_names.append(f'wvs_{x}_data')
            self.evs_part_names.append(f'evs_{x}_data')
        self.group_names.insert(0, 'demographics')
        for (x, y, z) in zip(self.wvs_part_names, self.evs_part_names, self.group_names):
            exec(f'self.{z}_data = pd.concat([self.{x}, self.{y}])')
            
    def extracted_merged_data_saver(self):
        self.extracted_merged_dataset_names = []
        for x in self.group_names:
            self.extracted_merged_dataset_names.append(f'{x}_data')
        for x in self.extracted_merged_dataset_names:
            exec(f"self.{x}.to_csv('{x}.csv', sep=',', index=False, encoding='utf-8')")

## Instantiating the class and calling the methods

#### Creating the object

In [3]:
TransformationObject = DataTransformationIndividualLevel()

#### Caling the methods

In [4]:
TransformationObject.loading_individual_level_data(which_path = 'wvs')

Unnamed: 0,version,doi,S002VS,S003,COUNTRY_ALPHA,COW_NUM,COW_ALPHA,S004,S006,S007,...,Y023A,Y023B,Y023C,Y024A,Y024B,Y024C,survself,tradrat5,TradAgg,SurvSAgg
0,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,1,80420001,...,,,0.000000,,,,,,,
1,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,2,80420002,...,0.000000,0.000000,0.000000,,,,,,,
2,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,3,80420003,...,,0.000000,0.000000,,,,,,,
3,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,4,80420004,...,0.333333,1.000000,0.555556,,,,,,,
4,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,5,80420005,...,0.000000,0.000000,0.222222,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450864,4-0-0,doi:10.14281/18241.22,7,909,NIR,202,NIRL,-4,909070443,909070443,...,0.444444,0.222222,0.222222,0.33,0.0,0.165,0.984535,-1.182408,-2.003676,1.820009
450865,4-0-0,doi:10.14281/18241.22,7,909,NIR,202,NIRL,-4,909070444,909070444,...,1.000000,0.666667,0.777778,0.33,0.0,0.165,1.527666,0.768249,1.136881,2.803075
450866,4-0-0,doi:10.14281/18241.22,7,909,NIR,202,NIRL,-4,909070445,909070445,...,0.000000,0.000000,0.000000,0.33,0.0,0.165,0.399649,-1.394810,-2.345644,0.761364
450867,4-0-0,doi:10.14281/18241.22,7,909,NIR,202,NIRL,-4,909070446,909070446,...,1.000000,1.000000,0.444444,0.33,0.5,0.415,,,,


In [5]:
TransformationObject.loading_individual_level_data(which_path = 'evs')

Unnamed: 0,studyno,version,doi,stdyno_w,versn_w,S001,S002EVS,s002vs,S003,COW_NUM,...,X048H_N1,X048I_N2,X049,x049a,X049CS,X050,X051,X052,Y001,Y002
0,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,2
1,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,2
2,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,2
3,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,3
4,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224429,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,915,347,...,-4,-4,-5,-2,-4,-4,-4,-4,-4,2
224430,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,915,347,...,-4,-4,-5,-1,-4,-4,-4,-4,-4,2
224431,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,915,347,...,-4,-4,-5,-1,-4,-4,-4,-4,-4,2
224432,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,915,347,...,-4,-4,-5,-1,-4,-4,-4,-4,-4,2


In [6]:
TransformationObject.extracting_individual_level_data()

In [7]:
TransformationObject.merging_individual_level_data()

In [8]:
TransformationObject.extracted_merged_data_saver()

## Checking the results

#### Looking at the WVS and EVS datasets

In [13]:
TransformationObject.wvs_data

Unnamed: 0,version,doi,S002VS,S003,COUNTRY_ALPHA,COW_NUM,COW_ALPHA,S004,S006,S007,...,Y023A,Y023B,Y023C,Y024A,Y024B,Y024C,survself,tradrat5,TradAgg,SurvSAgg
0,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,1,80420001,...,,,0.000000,,,,,,,
1,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,2,80420002,...,0.000000,0.000000,0.000000,,,,,,,
2,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,3,80420003,...,,0.000000,0.000000,,,,,,,
3,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,4,80420004,...,0.333333,1.000000,0.555556,,,,,,,
4,4-0-0,doi:10.14281/18241.22,4,8,ALB,339,ALB,-4,5,80420005,...,0.000000,0.000000,0.222222,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450864,4-0-0,doi:10.14281/18241.22,7,909,NIR,202,NIRL,-4,909070443,909070443,...,0.444444,0.222222,0.222222,0.33,0.0,0.165,0.984535,-1.182408,-2.003676,1.820009
450865,4-0-0,doi:10.14281/18241.22,7,909,NIR,202,NIRL,-4,909070444,909070444,...,1.000000,0.666667,0.777778,0.33,0.0,0.165,1.527666,0.768249,1.136881,2.803075
450866,4-0-0,doi:10.14281/18241.22,7,909,NIR,202,NIRL,-4,909070445,909070445,...,0.000000,0.000000,0.000000,0.33,0.0,0.165,0.399649,-1.394810,-2.345644,0.761364
450867,4-0-0,doi:10.14281/18241.22,7,909,NIR,202,NIRL,-4,909070446,909070446,...,1.000000,1.000000,0.444444,0.33,0.5,0.415,,,,


In [14]:
TransformationObject.evs_data

Unnamed: 0,studyno,version,doi,stdyno_w,versn_w,S001,S002EVS,s002vs,S003,COW_NUM,...,X048H_N1,X048I_N2,X049,x049a,X049CS,X050,X051,X052,Y001,Y002
0,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,2
1,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,2
2,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,2
3,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,3
4,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,8,339,...,-4,-4,-5,5,-4,-4,-4,-4,-4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224429,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,915,347,...,-4,-4,-5,-2,-4,-4,-4,-4,-4,2
224430,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,915,347,...,-4,-4,-5,-1,-4,-4,-4,-4,-4,2
224431,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,915,347,...,-4,-4,-5,-1,-4,-4,-4,-4,-4,2
224432,7503,3.0.0 (2022-12-14),doi:10.4232/1.14021,4800,5.0.0 (2022-06-08),1,4,5,915,347,...,-4,-4,-5,-1,-4,-4,-4,-4,-4,2


#### Looking at the extracted variables grouped by the dimension of political orientation

In [15]:
TransformationObject.wvs_freedom_data

Unnamed: 0,A029,A034,A040,A042,E018,E001,E002,E003,E004,E114,E116,E117
0,0,0,0,1,2,-1,-1,-1,-1,-1,-1,1
1,0,0,0,1,2,-1,-1,-1,-1,-1,-1,-1
2,0,0,0,1,2,-1,-1,-1,-1,-1,-1,-1
3,1,1,0,1,2,-1,-1,-1,-1,4,4,1
4,0,0,0,1,2,1,-1,-1,-1,3,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...
450864,0,0,1,0,1,4,1,1,2,-1,4,1
450865,0,1,0,0,3,1,2,3,4,2,3,3
450866,1,0,1,1,2,1,2,3,2,3,2,1
450867,1,0,0,0,3,1,3,3,2,4,4,-1


In [16]:
TransformationObject.evs_freedom_data

Unnamed: 0,A029,A034,A040,A042,E018,E001,E002,E003,E004,E114,E116,E117
0,0,0,0,0,2,-4,-4,3,2,3,4,2
1,0,0,0,0,1,-4,-4,3,2,2,3,1
2,0,0,0,0,2,-4,-4,3,2,-1,-1,-1
3,0,0,0,1,3,-4,-4,2,4,-1,-1,1
4,0,0,0,0,2,-4,-4,3,2,2,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...
224429,1,0,1,1,1,-4,-4,1,4,1,2,2
224430,0,0,1,0,1,-4,-4,1,4,2,2,2
224431,1,0,1,1,1,-4,-4,3,2,-1,1,1
224432,0,0,0,1,1,-4,-4,2,3,-1,1,1


In [17]:
TransformationObject.wvs_socialism_data

Unnamed: 0,E035,E036,E037,E039
0,7,3,4,3
1,7,3,10,2
2,7,1,9,-1
3,9,2,8,3
4,4,6,8,6
...,...,...,...,...
450864,8,4,5,4
450865,10,5,10,4
450866,10,5,6,8
450867,2,1,10,9


In [12]:
TransformationObject.evs_socialism_data

Unnamed: 0,E035,E036,E037,E039
0,9,2,10,3
1,10,3,4,2
2,6,7,6,4
3,10,1,10,10
4,10,9,7,3
...,...,...,...,...
224429,5,10,5,4
224430,10,10,10,5
224431,10,10,10,5
224432,1,-1,10,7


In [13]:
TransformationObject.wvs_family_data

Unnamed: 0,A124_09,F118,F119,F120,F121
0,1,-1,1,-1,1
1,1,1,1,1,1
2,1,-1,1,1,1
3,1,4,4,10,6
4,1,1,1,1,3
...,...,...,...,...,...
450864,0,5,1,3,3
450865,0,10,2,7,8
450866,0,1,1,1,1
450867,0,10,2,10,5


In [14]:
TransformationObject.evs_family_data

Unnamed: 0,A124_09,F118,F119,F120,F121
0,1,6,6,4,4
1,1,5,4,7,8
2,1,4,2,3,3
3,1,-1,1,8,8
4,1,2,7,5,6
...,...,...,...,...,...
224429,0,2,5,1,4
224430,0,1,10,1,5
224431,0,7,5,5,10
224432,1,1,1,1,1


In [15]:
TransformationObject.wvs_ethnicity_data

Unnamed: 0,A124_02,A124_06,C002
0,1,0,1
1,0,0,1
2,0,0,1
3,0,0,3
4,1,1,1
...,...,...,...
450864,0,0,1
450865,0,1,2
450866,0,0,1
450867,0,0,3


In [17]:
TransformationObject.evs_ethnicity_data

Unnamed: 0,A124_02,A124_06,C002
0,-1,-1,1
1,0,0,2
2,1,0,2
3,1,0,2
4,0,0,2
...,...,...,...
224429,0,0,1
224430,0,0,1
224431,1,1,1
224432,0,1,1


In [18]:
TransformationObject.wvs_gender_data

Unnamed: 0,D059,C001,D060
0,2,3,3
1,-1,1,-1
2,-1,1,-1
3,3,2,4
4,1,1,1
...,...,...,...
450864,-1,3,3
450865,3,2,3
450866,4,2,4
450867,4,2,4


In [19]:
TransformationObject.evs_gender_data

Unnamed: 0,D059,C001,D060
0,-4,2,-4
1,-4,2,-4
2,-4,1,-4
3,-4,2,-4
4,-4,2,-4
...,...,...,...
224429,-4,2,-4
224430,-4,2,-4
224431,-4,2,-4
224432,-4,2,-4


In [20]:
TransformationObject.wvs_nationalism_data

Unnamed: 0,E012,G006
0,0,2
1,-1,2
2,-1,1
3,0,3
4,1,1
...,...,...
450864,-4,1
450865,-4,1
450866,-4,1
450867,-4,-3


In [21]:
TransformationObject.evs_nationalism_data

Unnamed: 0,E012,G006
0,-4,-2
1,-4,2
2,-4,4
3,-4,1
4,-4,2
...,...,...
224429,-4,-3
224430,-4,-3
224431,-4,-3
224432,-4,-3


In [22]:
TransformationObject.wvs_religiosity_data

Unnamed: 0,A006,A040,F063,F028,F034
0,2,0,10,8,1
1,3,0,5,4,1
2,2,0,6,6,1
3,4,0,8,8,3
4,3,0,10,8,1
...,...,...,...,...,...
450864,1,1,10,1,1
450865,4,0,1,8,2
450866,3,1,10,1,1
450867,3,0,5,4,2


In [23]:
TransformationObject.evs_religiosity_data

Unnamed: 0,A006,A040,F063,F028,F034
0,2,0,7,4,1
1,3,0,-2,8,3
2,4,0,5,8,3
3,2,0,2,8,1
4,3,0,1,8,3
...,...,...,...,...,...
224429,1,1,10,4,1
224430,1,1,10,4,1
224431,1,1,10,1,1
224432,1,0,10,4,1


#### Looking at the extracted tables with demographic variables

In [25]:
TransformationObject.demographics_wvs_data

Unnamed: 0,S003,X001,X003,X025,X045,X047_WVS,X049
0,8,2,18,6,-1,3,3
1,8,1,28,4,4,6,1
2,8,1,67,-3,4,7,1
3,8,2,33,7,2,9,4
4,8,1,38,6,5,3,4
...,...,...,...,...,...,...,...
450864,909,2,83,-4,-4,-2,2
450865,909,1,34,-4,-4,4,5
450866,909,2,19,-4,-4,5,2
450867,909,2,19,-4,-4,9,7


In [26]:
TransformationObject.demographics_evs_data

Unnamed: 0,S003,X001,X003,X025,X045,X047_EVS,X049
0,8,2,56,8,-4,7,-5
1,8,2,19,4,-4,8,-5
2,8,2,21,3,-4,5,-5
3,8,2,40,7,-4,-2,-5
4,8,1,22,6,-4,-1,-5
...,...,...,...,...,...,...,...
224429,915,2,24,5,-4,6,-5
224430,915,1,50,5,-4,4,-5
224431,915,2,23,5,-4,10,-5
224432,915,2,34,4,-4,9,-5


#### Looking at the merged data

In [25]:
TransformationObject.freedom_data

Unnamed: 0,A029,A034,A040,E018,E001,E002,E003,E004,E114,E116,E117
0,0,0,0,2,-1,-1,-1,-1,-1,-1,1
1,0,0,0,2,-1,-1,-1,-1,-1,-1,-1
2,0,0,0,2,-1,-1,-1,-1,-1,-1,-1
3,1,1,0,2,-1,-1,-1,-1,4,4,1
4,0,0,0,2,1,-1,-1,-1,3,4,2
...,...,...,...,...,...,...,...,...,...,...,...
224429,1,0,1,1,-4,-4,1,4,1,2,2
224430,0,0,1,1,-4,-4,1,4,2,2,2
224431,1,0,1,1,-4,-4,3,2,-1,1,1
224432,0,0,0,1,-4,-4,2,3,-1,1,1


In [26]:
TransformationObject.socialism_data

Unnamed: 0,E035,E036,E037,E039
0,7,3,4,3
1,7,3,10,2
2,7,1,9,-1
3,9,2,8,3
4,4,6,8,6
...,...,...,...,...
224429,5,10,5,4
224430,10,10,10,5
224431,10,10,10,5
224432,1,-1,10,7


In [27]:
TransformationObject.family_data

Unnamed: 0,A124_09,F118,F119,F120,F121
0,1,-1,1,-1,1
1,1,1,1,1,1
2,1,-1,1,1,1
3,1,4,4,10,6
4,1,1,1,1,3
...,...,...,...,...,...
224429,0,2,5,1,4
224430,0,1,10,1,5
224431,0,7,5,5,10
224432,1,1,1,1,1


In [28]:
TransformationObject.ethnicity_data

Unnamed: 0,A124_02,A124_06,C002
0,1,0,1
1,0,0,1
2,0,0,1
3,0,0,3
4,1,1,1
...,...,...,...
224429,0,0,1
224430,0,0,1
224431,1,1,1
224432,0,1,1


In [29]:
TransformationObject.gender_data

Unnamed: 0,D059,C001,D060
0,2,3,3
1,-1,1,-1
2,-1,1,-1
3,3,2,4
4,1,1,1
...,...,...,...
224429,-4,2,-4
224430,-4,2,-4
224431,-4,2,-4
224432,-4,2,-4


In [30]:
TransformationObject.nationalism_data

Unnamed: 0,E012,G006
0,0,2
1,-1,2
2,-1,1
3,0,3
4,1,1
...,...,...
224429,-4,-3
224430,-4,-3
224431,-4,-3
224432,-4,-3


In [31]:
TransformationObject.religiosity_data

Unnamed: 0,A006,A040,F063,F028,F034
0,2,0,10,8,1
1,3,0,5,4,1
2,2,0,6,6,1
3,4,0,8,8,3
4,3,0,10,8,1
...,...,...,...,...,...
224429,1,1,10,4,1
224430,1,1,10,4,1
224431,1,1,10,1,1
224432,1,0,10,4,1


In [11]:
TransformationObject.demographics_data

Unnamed: 0,S003,X001,X003,X025,X045,X047_IVS,X049
0,8,2,18,6,-1,3,3
1,8,1,28,4,4,6,1
2,8,1,67,-3,4,7,1
3,8,2,33,7,2,9,4
4,8,1,38,6,5,3,4
...,...,...,...,...,...,...,...
224429,915,2,24,5,-4,6,-5
224430,915,1,50,5,-4,4,-5
224431,915,2,23,5,-4,10,-5
224432,915,2,34,4,-4,9,-5
