In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

In [4]:
raw = pd.read_csv("../data/HU_hv_parties_78_raw.csv")
pd.set_option('display.max_columns', 500)

In [7]:
raw.head()

Unnamed: 0,cntry,cname,cedition,cproddat,cseqno,name,essround,edition,idno,dweight,pspwght,pweight,lrscale,prtvtehu,ipcrtiv,imprich,ipeqopt,ipshabt,impsafe,impdiff,ipfrule,ipudrst,ipmodst,ipgdtim,impfree,iphlppl,ipsuces,ipstrgv,ipadvnt,ipbhprp,iprspot,iplylfr,impenv,imptrad,impfun
0,HU,ESS1-8e01,1.0,12.12.2018,201554,ESS7e02_2,7,2.2,1001,1.0,1.187696,0.497736,5,66,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
1,HU,ESS1-8e01,1.0,12.12.2018,201555,ESS7e02_2,7,2.2,1004,1.0,0.698451,0.497736,77,77,3,3,2,3,3,2,4,3,2,3,2,2,3,3,5,3,2,1,2,3,3
2,HU,ESS1-8e01,1.0,12.12.2018,201556,ESS7e02_2,7,2.2,1005,1.0,1.099583,0.497736,77,66,3,5,2,2,1,3,5,3,3,2,2,2,3,3,4,4,3,1,2,3,3
3,HU,ESS1-8e01,1.0,12.12.2018,201557,ESS7e02_2,7,2.2,1006,1.0,1.097284,0.497736,10,1,1,3,1,1,2,2,4,1,2,1,2,1,2,2,2,1,2,1,1,1,1
4,HU,ESS1-8e01,1.0,12.12.2018,201558,ESS7e02_2,7,2.2,1008,1.0,0.957079,0.497736,7,1,3,4,3,3,2,4,5,3,4,2,3,2,3,2,5,2,2,2,2,2,2


In [6]:
raw.columns

Index(['cntry', 'cname', 'cedition', 'cproddat', 'cseqno', 'name', 'essround',
       'edition', 'idno', 'dweight', 'pspwght', 'pweight', 'lrscale',
       'prtvtehu', 'ipcrtiv', 'imprich', 'ipeqopt', 'ipshabt', 'impsafe',
       'impdiff', 'ipfrule', 'ipudrst', 'ipmodst', 'ipgdtim', 'impfree',
       'iphlppl', 'ipsuces', 'ipstrgv', 'ipadvnt', 'ipbhprp', 'iprspot',
       'iplylfr', 'impenv', 'imptrad', 'impfun'],
      dtype='object')

In [8]:
raw_clean = raw.drop(columns=['cname', 
                              'cedition', 
                              'cproddat', 
                              'cseqno', 
                              'name',
                              'edition',
                              'dweight', 
                              'pspwght', 
                              'pweight'])

In [9]:
cols = {'ipcrtiv':'new_ideas',
        'imprich':'rich',
        'ipeqopt':'equality',
        'ipshabt':'abilities',
        'impsafe':'safety',
        'impdiff':'new_diff',
        'ipfrule':'obey',
        'ipudrst':'understand_diff',
        'ipmodst':'modesty',
        'ipgdtim':'good_time',
        'impfree':'freedom',
        'iphlppl':'help_others',
        'ipsuces':'success',
        'ipstrgv':'strong_gov',
        'ipadvnt':'adventures',
        'ipbhprp':'behave',
        'iprspot':'get_respect',
        'iplylfr':'loyalty',
        'impenv':'nature',
        'imptrad':'traditions',
        'impfun':'fun_pleasure',
        'cntry':'country',
        'essround':'round',
        'lrscale':'leftright_scale',
        'prtvtehu':'party'}

In [10]:
hu_hv= raw_clean.rename(columns=cols)

In [13]:
hu_hv["party"].value_counts()

66    1031
1      994
77     512
4      385
2      259
3       67
88      46
5       11
55       7
Name: party, dtype: int64

### We divide our df in 3: 
### hu_ext: those who voted for Fidesz (party == 3) 
### hu_novot: those who didn´t vote (party == 66)
### hu_noext: the rest (also excluding those who didn´t answer(77) or didn´t know(88)

In [14]:
hu_ext = hu_hv[hu_hv["party"]==3]

In [16]:
hu_novot = hu_hv[hu_hv["party"]==66]

In [18]:
hu_noext = hu_hv[~hu_hv["party"].isin([3,66,77,88])]

In [20]:
hu_ext.describe()

Unnamed: 0,round,idno,leftright_scale,party,new_ideas,rich,equality,abilities,safety,new_diff,obey,understand_diff,modesty,good_time,freedom,help_others,success,strong_gov,adventures,behave,get_respect,loyalty,nature,traditions,fun_pleasure
count,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0
mean,7.283582,2715.477612,6.179104,3.0,2.776119,3.38806,2.626866,2.925373,2.268657,3.134328,3.895522,2.985075,3.238806,2.686567,2.402985,2.835821,3.089552,2.58209,3.880597,3.164179,3.119403,2.328358,2.492537,2.895522,2.80597
std,0.454138,1391.276669,10.257186,0.0,1.807567,1.800296,1.790851,1.909414,1.871493,1.874029,1.939496,1.796398,1.724068,1.768486,1.834515,1.887736,1.823511,1.859857,1.879091,1.943108,1.95031,1.820656,1.94915,1.835131,1.876803
min,7.0,223.0,0.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,7.0,1843.0,4.0,3.0,2.0,2.0,1.0,2.0,1.0,2.0,2.5,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0
50%,7.0,2274.0,5.0,3.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,4.0,2.0,3.0,2.0,2.0,2.0,2.0
75%,8.0,3494.5,6.0,3.0,3.0,4.0,3.0,4.0,3.0,4.0,5.0,3.0,4.0,3.0,3.0,3.0,4.0,3.0,5.0,4.0,4.0,3.0,3.0,3.0,3.0
max,8.0,6200.0,88.0,3.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0


In [21]:
hu_noext.describe()

Unnamed: 0,round,idno,leftright_scale,party,new_ideas,rich,equality,abilities,safety,new_diff,obey,understand_diff,modesty,good_time,freedom,help_others,success,strong_gov,adventures,behave,get_respect,loyalty,nature,traditions,fun_pleasure
count,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0,1656.0
mean,7.512681,2645.116546,8.844203,2.108696,2.987923,3.707729,2.650966,2.996981,2.362923,3.244565,3.716184,2.968599,2.991546,2.942633,2.581522,2.716184,3.279589,2.525362,4.052536,2.82186,3.080314,2.36715,2.42029,2.762077,2.809179
std,0.49999,1564.460674,15.372353,3.668798,1.975952,1.93872,2.013888,1.990458,2.041989,1.980868,1.978526,1.954523,1.947325,1.981417,2.016996,1.987971,1.970101,2.054092,1.98764,1.997649,2.057822,2.035155,2.014971,2.001851,1.953493
min,7.0,4.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,7.0,1523.5,4.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,3.0,2.0,2.0,1.0,1.0,1.0,2.0
50%,8.0,2386.5,7.0,1.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,4.0,2.0,3.0,2.0,2.0,2.0,2.0
75%,8.0,3456.75,8.0,2.0,4.0,5.0,3.0,4.0,3.0,4.0,5.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0,5.0,3.0,4.0,3.0,3.0,3.0,3.0
max,8.0,8031.0,88.0,55.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0


In [24]:
hu_novot.describe()

Unnamed: 0,round,idno,leftright_scale,party,new_ideas,rich,equality,abilities,safety,new_diff,obey,understand_diff,modesty,good_time,freedom,help_others,success,strong_gov,adventures,behave,get_respect,loyalty,nature,traditions,fun_pleasure
count,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0,1031.0
mean,7.489816,2656.408341,31.808923,66.0,3.243453,3.803104,2.811833,3.258002,2.630456,3.472357,3.879728,3.259942,3.313288,3.126091,2.810863,3.050436,3.489816,2.873909,4.041707,3.1484,3.411251,2.694471,2.768186,3.212415,3.038797
std,0.500139,1445.442232,37.675819,0.0,2.125639,2.0109,2.116852,2.084006,2.1825,2.091815,1.982224,2.024207,2.005286,2.074255,2.089762,2.044974,2.06676,2.15911,2.073692,2.073947,2.085928,2.162962,2.147384,2.111108,2.080993
min,7.0,3.0,0.0,66.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,7.0,1557.5,5.0,66.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0
50%,7.0,2611.0,5.0,66.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,3.0,2.0,4.0,3.0,3.0,2.0,2.0,3.0,2.0
75%,8.0,3449.5,77.0,66.0,4.0,5.0,3.0,4.0,3.0,4.0,5.0,4.0,4.0,4.0,3.0,3.0,4.0,3.0,6.0,4.0,4.0,3.0,3.0,4.0,4.0
max,8.0,8036.0,88.0,66.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
