In [1]:
# importing the necessary libraries
import pandas as pd
import altair as alt
import numpy as np

In [2]:
#reading in the dataset
df = pd.read_csv('../data/data_raw/V-Dem-CY-Core_csv_v13/V-Dem-CY-Core-v13.csv')
df.head()

Unnamed: 0,country_name,country_text_id,country_id,year,historical_date,project,historical,histname,codingstart,codingend,...,v2xme_altinf_sd,v2xps_party,v2xps_party_codelow,v2xps_party_codehigh,v2x_divparctrl,v2x_feduni,v2xca_academ,v2xca_academ_codelow,v2xca_academ_codehigh,v2xca_academ_sd
0,Mexico,MEX,3,1789,1789-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.67,,,,,0.0,,,,
1,Mexico,MEX,3,1790,1790-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.67,,,,,0.0,,,,
2,Mexico,MEX,3,1791,1791-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.67,,,,,0.0,,,,
3,Mexico,MEX,3,1792,1792-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.67,,,,,0.0,,,,
4,Mexico,MEX,3,1793,1793-12-31,1,1,Viceroyalty of New Spain,1789,2022,...,0.67,,,,,0.0,,,,


In [3]:
#reducing dataframe to only include data points from 2000 and later
dfr=df[df['year']>=2000]
dfr.head()

Unnamed: 0,country_name,country_text_id,country_id,year,historical_date,project,historical,histname,codingstart,codingend,...,v2xme_altinf_sd,v2xps_party,v2xps_party_codelow,v2xps_party_codehigh,v2x_divparctrl,v2x_feduni,v2xca_academ,v2xca_academ_codelow,v2xca_academ_codehigh,v2xca_academ_sd
211,Mexico,MEX,3,2000,2000-12-31,0,1,United Mexican States,1789,2022,...,0.638,0.919,0.842,0.96,-1.575,0.986,0.897,0.837,0.944,0.62
212,Mexico,MEX,3,2001,2001-12-31,0,1,United Mexican States,1789,2022,...,0.638,0.919,0.842,0.96,-1.575,0.986,0.925,0.877,0.962,0.622
213,Mexico,MEX,3,2002,2002-12-31,0,1,United Mexican States,1789,2022,...,0.647,0.919,0.842,0.96,-1.575,0.986,0.935,0.885,0.967,0.627
214,Mexico,MEX,3,2003,2003-12-31,0,1,United Mexican States,1789,2022,...,0.647,0.919,0.842,0.96,-1.575,0.986,0.935,0.885,0.967,0.627
215,Mexico,MEX,3,2004,2004-12-31,0,1,United Mexican States,1789,2022,...,0.647,0.919,0.842,0.96,-1.575,0.986,0.927,0.875,0.961,0.621


In [4]:
# displaying a list of all columns in the dataset and their data types
df.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27555 entries, 0 to 27554
Data columns (total 1818 columns):
 #     Column                           Non-Null Count  Dtype  
---    ------                           --------------  -----  
 0     country_name                     27555 non-null  object 
 1     country_text_id                  27555 non-null  object 
 2     country_id                       27555 non-null  int64  
 3     year                             27555 non-null  int64  
 4     historical_date                  27555 non-null  object 
 5     project                          27555 non-null  int64  
 6     historical                       27555 non-null  int64  
 7     histname                         27453 non-null  object 
 8     codingstart                      27555 non-null  int64  
 9     codingend                        27555 non-null  int64  
 10    codingstart_contemp              26156 non-null  float64
 11    codingend_contemp                26156 non-null  f

In [5]:
# building a small variable descriptor dictionary, based on the information from the codebook
variable_descriptions_list=[{'indicator':'v2x_libdem',
                             'question':['To what extent is the ideal of liberal democracy achieved?'],
                             'clarification':['The liberal principle of democracy emphasizes the importance of protecting individual and minority rights against the tyranny of the state and the tyranny of the majority.', 'The liberal model takes a "negative" view of political power insofar as it judges the quality of democracy bythe limits placed on government.','This is achieved by constitutionally protected civil liberties,strong rule of law, an independent judiciary, and effective checks and balances that, together,limit the exercise of executive power.','To make this a measure of liberal democracy, the index also takes the level of electoral democracy into account.']},
                            
                            {'indicator':'v2x_regime',
                             'question':['How can the political regime overall be classified considering the competitiveness of access to power (polyarchy) as well as liberal principles?'], 
                             'clarification':["0: Closed autocracy: No multiparty elections for the chief executive or the legislature.","1: Electoral autocracy: De-jure multiparty elections for the chief executive and the legislature, but failing to achieve that elections are free and fair, or de-facto multiparty, or a minimum level of Dahl’s institutional prerequisites of polyarchy as measured by V-Dem’s Electoral Democracy Index (v2x_polyarchy)."," 2: Electoral democracy: De-facto free and fair multiparty elections and a minimum level of Dahl’s institutional prerequisites for polyarchy as measured by V- Dem’s Electoral Democracy Index (v2x_polyarchy), but either access to justice, or transparent law enforcement, or liberal principles of respect for personal liberties, rule of law, and judicial as well as legislative constraints on the executive not satisfied as measured by V-Dem’s Liberal Component Index (v2x_liberal).","3: Liberal democracy: De-facto free and fair multiparty elections and a minimum level of Dahl’s institutional prerequisites for polyarchy as measured by V- Dem’s Electoral Democracy Index (v2x_polyarchy) are guaranteed as well as access to justice, transparent law enforcement and the liberal principles of respect for personal liberties, rule of law, and judicial as well as legislative constraints on the executive satisfied as measured by V-Dem’s Liberal Component Index (v2x_liberal)."]},
                           ]
variable_df=pd.DataFrame(variable_descriptions_list)

In [6]:
# in order to filter the dataset by region, reading in an additional dataset to later merge with existing one
regions = pd.read_csv('../data/data_raw/VDEM-adjusted_country-and-continent-codes-list.csv', delimiter=';', usecols=['continent','region','subregion','M49 code', 'ISO-alpha3 code'])
regions.head()

Unnamed: 0,continent,region,subregion,M49 code,ISO-alpha3 code
0,Asia,Southern Asia,Southern Asia,4.0,AFG
1,Europe,Southern Europe,Southern Europe,8.0,ALB
2,Americas,Northern America,Northern America,10.0,ATA
3,Africa,Northern Africa,Northern Africa,12.0,DZA
4,Oceania,Polynesia,Polynesia,16.0,ASM


In [7]:
#number of countries before merge
len(dfr.country_text_id.unique())

179

In [8]:
# merging the two datasets
dfrr=dfr.merge(regions,left_on='country_text_id', right_on='ISO-alpha3 code')

In [9]:
#number of countries after merge
len(dfrr.country_text_id.unique())

179

In [10]:
#print codes for those that are missing after merge
for country in dfr.country_text_id.unique():
    if country in dfrr.country_text_id.unique():
        pass
    else:
        print(country)

In [11]:
# formatting the year information into datetime for correct display in chart
dfrr['year_formatted']=pd.to_datetime(dfrr['year'],format='%Y')

In [12]:
# adding a new column that converts numeric regime classification into verbal classification
dfrr['v2x_regime_text']= np.where(dfrr['v2x_regime'] == 0, 'Closed autocracy', (np.where(dfrr['v2x_regime'] == 1, 'Electoral autocracy',(np.where(dfrr['v2x_regime'] == 2, 'Electoral democracy',(np.where(dfrr['v2x_regime'] == 3, 'Liberal democracy', 'sth went wrong')))))))                                                                                                                                                

In [13]:
#defining a function to display a chart by continent and for the selected indicator
def chart_index_line(selected_continent,selected_index):
    
    #reduce dataframe to only include data for selected continent and the columns that are needed for display
    df_reduced = dfrr[dfrr['continent']==selected_continent][['country_name','year_formatted','v2x_regime','v2x_regime_text',selected_index]]
    
    #creating the chart from the reduced dataframe
    charted_index = alt.Chart(df_reduced).mark_circle(size=30, opacity=1).encode(
        
        # coloring by regime type
        alt.Color('v2x_regime_text:O',
                  # assigning a color per regime type
                    scale = alt.Scale(domain=['Closed autocracy','Electoral autocracy','Electoral democracy','Liberal democracy'], 
                                      range=['#BE232D','#EE8C0A','#00A5FF','#002D5A']),
                  # refining the displayed legend
                    legend=alt.Legend(values=['Liberal democracy','Electoral democracy','Electoral autocracy','Closed autocracy'], title='Classification of regime'),
                ), 
       
        alt.Y(selected_index, # on the y-axis, display the selected indicator
              scale=alt.Scale(domain=[0, 1]), # set y-scale to range from 0 to 1
              axis=alt.Axis(title=' ') # display no axis title
             ),
        
        alt.X('year_formatted:T', # on the x-axis, display the year
             axis=alt.Axis(title='  ') # display no axis title
             ),
        
        tooltip=['year_formatted', selected_index], # creating a tooltip for inspection
        
        facet=alt.Facet('country_name', # make one chart per country
                        columns=3, # arrange small multiple with 3 columns
                        sort=alt.EncodingSortField(selected_index, # sort them based on the selected indicator
                                                   op='mean', # create an average (mean) over all values for that country
                                                   order='descending')), # sort the charts based on the mean in descending order
    ).properties(
        width=140, #define chart width
        height=70, #define chart height
        title={
            # use the dict created earlier for title and subhead information
            'text':variable_df[variable_df['indicator']==selected_index]['question'].item(), 
            'fontSize':24,
            'subtitle':variable_df[variable_df['indicator']==selected_index]['clarification'].item(),
        })
    
    return charted_index.display()

In [14]:
chart_index_line('Europe','v2x_libdem')

In [15]:
chart_index_line('Asia','v2x_libdem')

In [16]:
chart_index_line('Africa','v2x_libdem')

In [17]:
chart_index_line('Americas','v2x_libdem')

## Map

In [18]:
# Extracting the data for 2022 to use in mapping software
dfrr[dfrr['year']==dfrr.year.max()][['ISO-alpha3 code', 'country_name','v2x_libdem','v2x_regime_text']].to_csv('data-export_libdem_index_2022.csv', index = False)
