### Code for processing V-DEM v.9 dataset

### Sanittawan Tan

In [1]:
import numpy as np
import pandas as pd

In [2]:
YEAR = 2005

In [3]:
vdem = pd.read_csv('V-Dem-CY-Full+Others-v9.csv', dtype={'year': 'Int64'}, low_memory=False)

In [4]:
vdem = vdem.drop(vdem[vdem['year'] < 2005].index)

In [5]:
vdem = vdem.rename(columns={'country_name':'country'})

In [6]:
vdem['country'] = vdem['country'].apply(lambda x: x.strip().lower())

In [7]:
vdem.country.unique()

array(['afghanistan', 'angola', 'albania', 'united arab emirates',
       'argentina', 'armenia', 'australia', 'austria', 'azerbaijan',
       'burundi', 'belgium', 'benin', 'burkina faso', 'bangladesh',
       'bulgaria', 'bahrain', 'bosnia and herzegovina', 'belarus',
       'bolivia', 'brazil', 'barbados', 'bhutan', 'botswana',
       'central african republic', 'canada', 'switzerland', 'chile',
       'china', 'ivory coast', 'cameroon',
       'democratic republic of the congo', 'republic of the congo',
       'colombia', 'comoros', 'cape verde', 'costa rica', 'cuba',
       'cyprus', 'czech republic', 'germany', 'djibouti', 'denmark',
       'dominican republic', 'algeria', 'ecuador', 'egypt', 'eritrea',
       'spain', 'estonia', 'ethiopia', 'finland', 'fiji', 'france',
       'gabon', 'united kingdom', 'georgia', 'ghana', 'guinea',
       'the gambia', 'guinea-bissau', 'equatorial guinea', 'greece',
       'guatemala', 'guyana', 'hong kong', 'honduras', 'croatia', 'haiti',
     

In [8]:
len(vdem.country.unique())

179

In [9]:
cols_to_keep = ['country', 'year',
            'v2lgfunds_ord', 'v2lgamend',
            'v2exhoshog', 'v2exaphogp', 'v2exaphos', 'v2ddlexci',
            'v2ddlexrf', 'v2ddlexpl', 'v2lginello', 'v2lginelup',
            'v2exl_legitlead_ord', 'v2pepwrgeo_ord', 'v2clgencl_ord',
            'v2clpolcl_ord', 'v2peapssoc_ord', 'v2peapsgen_ord',
            'v2peapsecon_ord', 'v2peapspol_ord', 'v2peapsgeo_ord',
            'v2pepwrses_ord', 'v2pepwrsoc_ord', 'v2pepwrgen_ord',
            'v2peedueq_ord', 'v2pehealth_ord', 'v2mecenefm_ord',
            'v2mecenefi_ord', 'v2mecrit_ord', 'v2meslfcen_ord',
            'v2mebias_ord','v2mecorrpt_ord','v2cseeorgs_ord',
            'v2csreprss_ord', 'v2clrelig_ord'
            'v2clacfree_ord', 'v2cltrnslw_ord', 'v2clrspct_ord',
            'v2cldiscm_ord','v2cldiscw_ord','v2jureform_ord',
            'v2jupoatck_ord', 'v2jupack_ord', 'v2juhcind_ord',
            'v2juncind_ord', 'v2jureview_ord', 'v2lgotovst_ord',
            'v2lgoppart_ord', 'v2exremhsp_ord',
            'v2exremhog_ord', 'v2exrescon_ord', 'v2exbribe_ord',
            'v2psbars_ord', 'v2psoppaut_ord', 'v2psplats_ord',
            'v2pscohesv_ord', 'v2eldonate_ord', 'v2elpubfin_ord',
            'v2elembaut_ord', 'v2elmulpar_ord', 'v2elvotbuy_ord',
            'v2elfrcamp_ord', 'v2elfrfair_ord', 'v2elaccept_ord',
            'v2elasmoff_ord', 'v2eltrnout_ord', 'v2elintim'
]

In [10]:
nominal = ['v2lgfunds', 'v2lgamend',
            'v2exhoshog', 'v2exaphogp', 'v2exaphos', 'v2ddlexci',
            'v2ddlexrf', 'v2ddlexpl'
]

In [11]:
ordinal = ['v2exl_legitlead_ord', 'v2pepwrgeo_ord', 'v2clgencl_ord',
            'v2clpolcl_ord', 'v2peapssoc_ord', 'v2peapsgen_ord',
            'v2peapsecon_ord', 'v2peapspol_ord', 'v2peapsgeo_ord',
            'v2pepwrses_ord', 'v2pepwrsoc_ord', 'v2pepwrgen_ord',
            'v2peedueq_ord', 'v2pehealth_ord', 'v2mecenefm_ord',
            'v2mecenefi_ord', 'v2mecrit_ord', 'v2meslfcen_ord',
            'v2mebias_ord','v2mecorrpt_ord','v2cseeorgs_ord',
            'v2csreprss_ord', 'v2clrelig_ord'
            'v2clacfree_ord', 'v2cltrnslw_ord', 'v2clrspct_ord',
            'v2cldiscm_ord','v2cldiscw_ord','v2jureform_ord',
            'v2jupoatck_ord', 'v2jupack_ord', 'v2juhcind_ord',
            'v2juncind_ord', 'v2jureview_ord', 'v2lgotovst_ord',
            'v2lgoppart_ord', 'v2exremhsp_ord',
            'v2exremhog_ord', 'v2exrescon_ord', 'v2exbribe_ord',
            'v2psbars_ord', 'v2psoppaut_ord', 'v2psplats_ord',
            'v2pscohesv_ord', 'v2eldonate_ord', 'v2elpubfin_ord',
            'v2elembaut_ord', 'v2elmulpar_ord', 'v2elvotbuy_ord',
            'v2elfrcamp_ord', 'v2elfrfair_ord', 'v2elaccept_ord',
            'v2elasmoff_ord', 'v2eltrnout_ord', 'v2elintim'
]

In [12]:
continuous = ['v2lginello', 'v2lginelup'
]

In [13]:
ss_vdem = vdem.filter(items=cols_to_keep)

In [14]:
ss_vdem.head()

Unnamed: 0,country,year,v2lgfunds_ord,v2lgamend,v2exhoshog,v2exaphogp,v2exaphos,v2ddlexci,v2ddlexrf,v2ddlexpl,...,v2eldonate_ord,v2elpubfin_ord,v2elembaut_ord,v2elmulpar_ord,v2elvotbuy_ord,v2elfrcamp_ord,v2elfrfair_ord,v2elaccept_ord,v2elasmoff_ord,v2elintim
216,afghanistan,2005,0.0,0.0,1.0,,,0.0,0.0,2.0,...,2.0,0.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,-0.24
217,afghanistan,2006,0.0,0.0,1.0,,,0.0,0.0,2.0,...,2.0,0.0,2.0,,,,,,,
218,afghanistan,2007,0.0,0.0,1.0,,,0.0,0.0,2.0,...,2.0,0.0,2.0,,,,,,,
219,afghanistan,2008,0.0,0.0,1.0,,,0.0,0.0,2.0,...,2.0,0.0,2.0,,,,,,,
220,afghanistan,2009,0.0,0.0,1.0,,,0.0,0.0,2.0,...,2.0,0.0,2.0,3.0,0.0,2.0,2.0,3.0,2.0,-0.421


In [15]:
ss_vdem.shape

(2498, 64)

In [16]:
ss_vdem['country'].replace({'united states of america': 'united states'}, inplace=True)

In [17]:
ss_vdem.to_csv('./cleaned_VDEM.csv', index=False)