In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

In [2]:
pd.set_option('display.max_columns', 500)
raw = pd.read_csv('../data/MULTI_attitudes_towards_immigration_round9_raw.csv')

In [3]:
raw.head()

Unnamed: 0,essround,idno,cntry,imbgeco,imueclt,imwbcnt,Unnamed: 6
0,9,17,AT,5,6,0,
1,9,31,AT,6,5,5,
2,9,58,AT,5,6,6,
3,9,69,AT,2,1,2,
4,9,98,AT,5,4,5,


In [4]:
countries = ['DE','FR','IT','PL','HU']

In [5]:
raw = raw[raw['cntry'].isin(countries)].drop(columns='Unnamed: 6').reset_index(drop=True)

In [7]:
cols = {'essround':'round', 
        'cntry':'country', 
        'imbgeco':'g_b_eco',
        'imueclt':'u_e_cul',
        'imwbcnt':'b_w_cou'}

In [8]:
imm_9 = raw.rename(columns=cols)

In [9]:
imm_9.head()

Unnamed: 0,round,idno,country,g_b_eco,u_e_cul,b_w_cou
0,9,1,DE,8,8,8
1,9,63,DE,5,7,7
2,9,108,DE,3,2,3
3,9,117,DE,6,7,5
4,9,134,DE,8,5,5


#### g_b_eco: Immigration bad or good for country´s economy 
##### 0 (bad) - 10 (good), 77(refusal), 88(don´t know), 99(no answer)

#### u_e_cul: Country´s cultural life undermined or enriched by immigrants
##### 0 (undermined) - 10 (enriched), 77(refusal), 88(don´t know), 99(no answer)

#### b_w_cou: Immigrants make country worse or better place to live
##### 0 (worse) - 10 (better), 77(refusal), 88(don´t know), 99(no answer)

In [13]:
imm_9.shape

(10311, 6)

In [19]:
imm_9 = imm_9[(~imm_9['g_b_eco'].isin([77,88,99]))
      &(~imm_9['u_e_cul'].isin([77,88,99]))
      &(~imm_9['b_w_cou'].isin([77,88,99]))]

In [1]:
imm_9.shape

NameError: name 'imm_9' is not defined

In [53]:
imm_9 = imm_9[['country','round','idno','g_b_eco','u_e_cul','b_w_cou']]

In [54]:
imm_9.dtypes

country    object
round       int64
idno        int64
g_b_eco     int64
u_e_cul     int64
b_w_cou     int64
dtype: object

In [30]:
# Now we do the same with our df with historical data

In [32]:
raw_h=pd.read_csv('../data/MULTI_historic_attitudes_immigration_raw.csv')

In [33]:
raw_h.head()

Unnamed: 0,cntry,cname,cedition,cproddat,cseqno,name,essround,edition,idno,dweight,pspwght,pweight,imbgeco,imueclt,imwbcnt
0,DE,ESS1-8e01,1.0,12.12.2018,66876,ESS1e06_6,1,6.6,101114,1.2572,2.358261,2.392155,0,0,0
1,DE,ESS1-8e01,1.0,12.12.2018,66877,ESS1e06_6,1,6.6,101120,1.2572,2.358261,2.392155,5,7,5
2,DE,ESS1-8e01,1.0,12.12.2018,66878,ESS1e06_6,1,6.6,101126,1.2572,1.885563,2.392155,0,0,1
3,DE,ESS1-8e01,1.0,12.12.2018,66879,ESS1e06_6,1,6.6,101304,1.2572,1.499388,2.392155,5,6,6
4,DE,ESS1-8e01,1.0,12.12.2018,66880,ESS1e06_6,1,6.6,101322,1.2572,2.358261,2.392155,0,4,2


In [35]:
raw_h.columns

Index(['cntry', 'cname', 'cedition', 'cproddat', 'cseqno', 'name', 'essround',
       'edition', 'idno', 'dweight', 'pspwght', 'pweight', 'imbgeco',
       'imueclt', 'imwbcnt'],
      dtype='object')

In [38]:
raw_h = raw_h.drop(columns=['cname', 
                            'cedition', 
                            'cproddat', 
                            'cseqno', 
                            'name',
                            'edition',
                            'dweight', 
                            'pspwght', 
                            'pweight'])

In [39]:
raw_h = raw_h[raw_h['cntry'].isin(countries)].reset_index(drop=True)

In [43]:
imm_h = raw_h.rename(columns=cols)

In [49]:
imm_h.head()

Unnamed: 0,country,round,idno,g_b_eco,u_e_cul,b_w_cou
0,DE,1,101114,0,0,0
1,DE,1,101120,5,7,5
2,DE,1,101126,0,0,1
3,DE,1,101304,5,6,6
4,DE,1,101322,0,4,2


In [45]:
imm_h.shape

(70442, 6)

In [46]:
imm_h = imm_h[(~imm_h['g_b_eco'].isin([77,88,99]))
      &(~imm_h['u_e_cul'].isin([77,88,99]))
      &(~imm_h['b_w_cou'].isin([77,88,99]))]

In [47]:
imm_h.shape

(64056, 6)

In [51]:
imm_h.dtypes

country    object
round       int64
idno        int64
g_b_eco     int64
u_e_cul     int64
b_w_cou     int64
dtype: object

### Now we concatenate the two dfs to have one single df with values for our variables over time (2002-2018)

In [66]:
at_im = pd.concat([imm_9,imm_h]).sort_values(['round','country']).reset_index(drop=True)

In [76]:
at_im.shape

(73593, 6)

In [77]:
at_im.dtypes

country    object
round       int64
idno        int64
g_b_eco     int64
u_e_cul     int64
b_w_cou     int64
dtype: object

In [72]:
at_im.to_csv('../data/attitudes_towards_immigration_multi_2002_2018_DF.csv',index=False)