# Polity5

In [203]:
import pandas as pd

print("Load Polity5 dataset")
polity_df = pd.read_excel('../datasets/raw/democracy/polity/POLITY5-PRC.xlsx', sheet_name='Data')

print("Rename columns")
polity_df.rename(columns={'Economy ISO3': 'country_iso', 'Economy Name': 'country_name'}, inplace=True)

polity_df['Indicator ID'] = polity_df['Indicator ID'].str.replace(r'^POLITY5\.PRC\.', '', regex=True)


print("Drop the columns: Attribute 1, Attribute 2, and Attribute 3")
polity_df.drop(columns=['Attribute 1', 'Attribute 2', 'Attribute 3'], inplace=True)

print("Set the years as identifiers instead of columns. Transpose the dataframe")
years = [str(year) for year in range(1776, 2019)]
polity_df_long = polity_df.melt(
    id_vars=['country_iso', 'country_name', 'Indicator ID'],
    value_vars=years,
    var_name='Year',
    value_name='Value'
)

print("Convert Year to numeric")
polity_df_long['Year'] = pd.to_numeric(polity_df_long['Year'])

print("Set the Indicator as the column")
polity_df_final = polity_df_long.pivot_table(
    index=['country_iso', 'country_name', 'Year'],
    columns='Indicator ID',
    values='Value'
).reset_index()


Load Polity5 dataset
Rename columns
Drop the columns: Attribute 1, Attribute 2, and Attribute 3
Set the years as identifiers instead of columns. Transpose the dataframe
Convert Year to numeric
Set the Indicator as the column


In [205]:
print("Verify that fields don't have NA values")
print(f"NA values in country_iso: {len(polity_df_final[polity_df_final['country_iso'].isna()])}")
print(f"NA values in autoc: {len(polity_df_final[polity_df_final['autoc'].isna()])}")
print(f"NA values in democ: {len(polity_df_final[polity_df_final['democ'].isna()])}")
print(f"NA values in exconst: {len(polity_df_final[polity_df_final['exconst'].isna()])}")
print(f"NA values in exrec: {len(polity_df_final[polity_df_final['exrec'].isna()])}")
print(f"NA values in parcomp: {len(polity_df_final[polity_df_final['parcomp'].isna()])}")
print(f"NA values in parreg: {len(polity_df_final[polity_df_final['parreg'].isna()])}")
print(f"NA values in polcomp: {len(polity_df_final[polity_df_final['polcomp'].isna()])}")
print(f"NA values in polity: {len(polity_df_final[polity_df_final['polity'].isna()])}")
print(f"NA values in polity2: {len(polity_df_final[polity_df_final['polity2'].isna()])}")
print(f"NA values in xconst: {len(polity_df_final[polity_df_final['xconst'].isna()])}")
print(f"NA values in xrcomp: {len(polity_df_final[polity_df_final['xrcomp'].isna()])}")
print(f"NA values in xropen: {len(polity_df_final[polity_df_final['xropen'].isna()])}")
print(f"NA values in xrreg: {len(polity_df_final[polity_df_final['xrreg'].isna()])}")


print(f"\nNA values in durable: {len(polity_df_final[polity_df_final['durable'].isna()])}")
print(f"NA values in durable after 1940: {len(polity_df_final[polity_df_final['durable'].isna() & polity_df_final["Year"].astype(int) > 1940])}")
print("Half of the records have NA values in durable predictor. Dropping the column.")

print(f"\nNA values in exrec after 1940: {len(polity_df_final[polity_df_final["exrec"].isna() & polity_df_final["Year"].astype(int) > 1940])}")
print("Some countries have NA values in exrec. Those are countries before 1940, which is already out of the scope of this analysis. Reduce the scope to 1940-2018")

print(f"\nNA values in polcomp after 1940: {len(polity_df_final[polity_df_final["polcomp"].isna() & polity_df_final["Year"].astype(int) > 1940])}")
print("Same happens for polcomp")

print(f"\npolity2 is a predictor built from polity")
# Only get columns polity and polity2
print(f"\nValues of polity when polity2 is NA:")
print(polity_df_final[["polity"]][polity_df_final["polity2"].isna()].value_counts())

print(f"Looking at the values of polity we have identified that polity2 is NA when polity is -66. Based on the codebook, 77 (interregnum) is set to 0 while -66 (interruption) is set to NA. We adjust 66 to 0.")
print(f"Records with polity = -88 should be prorated between years. Except when they're at the beginning of start of the dataset, as it's the case for India")
print("Dropping the polity2 column as the -66, -77, and -88 will be excluded at the time of the analysis")

Verify that fields don't have NA values
NA values in country_iso: 0
NA values in autoc: 0
NA values in democ: 0
NA values in exconst: 0
NA values in exrec: 183
NA values in parcomp: 0
NA values in parreg: 0
NA values in polcomp: 126
NA values in polity: 0
NA values in polity2: 228
NA values in xconst: 0
NA values in xrcomp: 0
NA values in xropen: 0
NA values in xrreg: 0

NA values in durable: 1165
NA values in durable after 1940: 0
Half of the records have NA values in durable predictor. Dropping the column.

NA values in exrec after 1940: 0
Some countries have NA values in exrec. Those are countries before 1940, which is already out of the scope of this analysis. Reduce the scope to 1940-2018

NA values in polcomp after 1940: 0
Same happens for polcomp

polity2 is a predictor built from polity

Values of polity when polity2 is NA:
polity
-66.0     223
-88.0       5
Name: count, dtype: int64
Looking at the values of polity we have identified that polity2 is NA when polity is -66. Based

In [206]:
print("Dropping the durable column")
polity_df_final.drop(columns=['durable'], inplace=True)

print("Reduce the scope to 1940-2018")
polity_df_final = polity_df_final[polity_df_final["Year"].astype(int) > 1940]

print(f"Dropping the polity2 column")
polity_df_final.drop(columns=['polity2'], inplace=True)

print("Transform the predictors from float64 to int64")
polity_df_final = polity_df_final.astype({'autoc': 'int64', 'democ': 'int64', 'exconst': 'int64', 'exrec': 'int64', 'parcomp': 'int64', 'parreg': 'int64', 'polcomp': 'int64', 'polity': 'int64', 'xconst': 'int64', 'xrcomp': 'int64', 'xrreg': 'int64', 'xropen': 'int64'})

print("Save the dataset")
polity_df_final.to_csv('../datasets/processed/democracy/polity/polity5.csv', index=False)

print(polity_df_final.info())

Dropping the durable column
Reduce the scope to 1940-2018
Dropping the polity2 column
Transform the predictors from float64 to int64
Save the dataset
<class 'pandas.core.frame.DataFrame'>
Index: 9845 entries, 141 to 16345
Data columns (total 15 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   country_iso   9845 non-null   object
 1   country_name  9845 non-null   object
 2   Year          9845 non-null   int64 
 3   autoc         9845 non-null   int64 
 4   democ         9845 non-null   int64 
 5   exconst       9845 non-null   int64 
 6   exrec         9845 non-null   int64 
 7   parcomp       9845 non-null   int64 
 8   parreg        9845 non-null   int64 
 9   polcomp       9845 non-null   int64 
 10  polity        9845 non-null   int64 
 11  xconst        9845 non-null   int64 
 12  xrcomp        9845 non-null   int64 
 13  xropen        9845 non-null   int64 
 14  xrreg         9845 non-null   int64 
dtypes: int64(13), object(2)
me

In [208]:
countries = polity_df_final["country_name"].unique()

print(f"Nr of countries: {len(countries)} - Countries: {', '.join(countries)}")
print(f"From years {polity_df_final['Year'].min()} to {polity_df_final['Year'].max()}")


Nr of countries: 166 - Countries: Afghanistan, Angola, Albania, United Arab Emirates, Argentina, Armenia, Australia, Austria, Azerbaijan, Burundi, Belgium, Benin, Burkina Faso, Bangladesh, Bulgaria, Bahrain, Bosnia and Herzegovina, Belarus, Bolivia, Brazil, Bhutan, Botswana, Central African Republic, Canada, Switzerland, Chile, China, Cote d'Ivoire, Cameroon, Congo, Rep., Colombia, Comoros, Cabo Verde, Costa Rica, Cuba, Cyprus, Czechia, Germany, Djibouti, Denmark, Dominican Republic, Algeria, Ecuador, Egypt, Arab Rep., Eritrea, Spain, Estonia, Ethiopia, Finland, Fiji, France, Gabon, United Kingdom, Georgia, Ghana, Guinea, Gambia, The, Guinea-Bissau, Equatorial Guinea, Greece, Guatemala, Guyana, Honduras, Croatia, Haiti, Hungary, Indonesia, India, Ireland, Iran, Islamic Rep., Iraq, Israel, Italy, Jamaica, Jordan, Japan, Kazakhstan, Kenya, Kyrgyz Republic, Cambodia, Korea, Rep., Kuwait, Lao PDR, Lebanon, Liberia, Libya, Sri Lanka, Lesotho, Lithuania, Luxembourg, Latvia, Morocco, Moldova,

# Freedom in the World

In [212]:
print("Load dataset")
freedom_df = pd.read_excel('../datasets/raw/democracy/freedom-world/All_data_FIW_2013-2024.xlsx', sheet_name='FIW13-25', header=1)

print("Rename columns")
freedom_df.rename(columns={'Country/Territory': 'country', 'Edition': 'year'}, inplace=True)

print("Drop the Region column. We're only interested in the country")
freedom_df.drop(columns=['Region'], inplace=True)

print("Drop the rows where the country is not a country")
freedom_df = freedom_df[freedom_df['C/T'] == 'c']

print("Drop the columns: Add Q, Add A, and C/T")
freedom_df.drop(columns=['Add Q', 'Add A', 'C/T'], inplace=True)

print("Rename columns with lower case and underscore")
freedom_df.columns = freedom_df.columns.str.strip().str.lower().str.replace(' ', '_')

print("Save the dataset")
freedom_df.to_csv('../datasets/processed/democracy/freedom-world/freedom-world.csv', index=False)

print(f"Rows with country or status with NA values: {len(freedom_df[freedom_df['country'].isna() | freedom_df['status'].isna()])}")
print("All the quantitative predictors are parsed as int64 as they don't have NA values")
print(freedom_df.info())

freedom_df_countries = freedom_df["country"].unique()

print(f"Nr of countries: {len(freedom_df_countries)} - Countries: {', '.join(freedom_df_countries)}")

print(f"From years {freedom_df['year'].min()} to {freedom_df['year'].max()}")

Load dataset
Rename columns
Drop the Region column. We're only interested in the country
Drop the rows where the country is not a country
Drop the columns: Add Q, Add A, and C/T
Rename columns with lower case and underscore
Save the dataset
Rows with country or status with NA values: 0
All the quantitative predictors are parsed as int64 as they don't have NA values
<class 'pandas.core.frame.DataFrame'>
Index: 2535 entries, 1 to 2722
Data columns (total 40 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   country    2535 non-null   object
 1   year       2535 non-null   int64 
 2   status     2535 non-null   object
 3   pr_rating  2535 non-null   int64 
 4   cl_rating  2535 non-null   int64 
 5   a1         2535 non-null   int64 
 6   a2         2535 non-null   int64 
 7   a3         2535 non-null   int64 
 8   a          2535 non-null   int64 
 9   b1         2535 non-null   int64 
 10  b2         2535 non-null   int64 
 11  b3         2535 no

# Autocratic Regime Data

In [223]:
print("Load the Cases dataset")
autocratic_cases_df = pd.read_excel('../datasets/raw/democracy/autocratic/GWF Autocratic Regimes 1.2/GWF Autocratic Regimes.xlsx', sheet_name='Autocratic Regimes Case List', header=0)

print("Transform the dates to datetime in day/month/year format. By default Pandas uses month/day/year format.")
autocratic_cases_df['gwf_startdate'] = pd.to_datetime(autocratic_cases_df['gwf_startdate'], dayfirst=True, errors='coerce')
autocratic_cases_df['gwf_enddate'] = pd.to_datetime(autocratic_cases_df['gwf_enddate'], dayfirst=True, errors='coerce')

print("\nVerify NA values")
print(f"Rows with cowcode NA: {len(autocratic_cases_df[autocratic_cases_df['cowcode'].isna()])}")
print(f"Rows with gwf_country NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_country'].isna()])}")
print(f"Rows with gwf_startdate NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_startdate'].isna()])}")
print(f"Rows with gwf_enddate NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_enddate'].isna()])}")
print(f"Rows with gwf_startyr NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_startyr'].isna()])}")
print(f"Rows with gwf_endyr NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_endyr'].isna()])}")
print(f"Rows with gwf_subsreg NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_subsreg'].isna()])}")
print(f"Rows with gwf_howend NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_howend'].isna()])}")
print(f"Rows with gwf_violent NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_violent'].isna()])}")
print(f"Rows with gwf_regimetype NA: {len(autocratic_cases_df[autocratic_cases_df['gwf_regimetype'].isna()])}")

print("\nThe only case with Regime Type NA is Iran79, as it cannot be classified. We leave it as is. We don't want to exclude it from the analysis.")
print("\nEnd date is NA when the regime is still ongoing. We set it to -1 to be able to parse it as an integer")
autocratic_cases_df['gwf_endyr'] = autocratic_cases_df['gwf_endyr'].fillna(-1).astype(int)

print(autocratic_cases_df.info())

autocratic_cases_df.to_csv('../datasets/processed/democracy/autocratic/autocratic-regime-case-list.csv', index=False)

autocratic_cases_countries = autocratic_cases_df["gwf_country"].unique()

print(f"Autocratic Regimes - Nr of countries: {len(autocratic_cases_countries)} - Countries: {', '.join(autocratic_cases_countries)}")
print(f"Autocratic Regimes - From years {autocratic_cases_df['gwf_startyr'].min()} to {autocratic_cases_df['gwf_endyr'].max()}")

Load the Cases dataset
Transform the dates to datetime in day/month/year format. By default Pandas uses month/day/year format.

Verify NA values
Rows with cowcode NA: 0
Rows with gwf_country NA: 0
Rows with gwf_startdate NA: 0
Rows with gwf_enddate NA: 0
Rows with gwf_startyr NA: 0
Rows with gwf_endyr NA: 57
Rows with gwf_subsreg NA: 0
Rows with gwf_howend NA: 0
Rows with gwf_violent NA: 0
Rows with gwf_regimetype NA: 1

The only case with Regime Type NA is Iran79, as it cannot be classified. We leave it as is. We don't want to exclude it from the analysis.

End date is NA when the regime is still ongoing. We set it to -1 to be able to parse it as an integer
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   cowcode         280 non-null    int64         
 1   gwf_country     280 non-null    object        
 2   gwf_casename   

In [224]:
print("Load the TSCS Data")
print("The TSCS Data contains the autocratic regimes data over the years")
autocratic_years_df = pd.read_excel('../datasets/raw/democracy/autocratic/GWF Autocratic Regimes 1.2/GWF Autocratic Regimes.xlsx', sheet_name='TSCS data', header=0)

print("Transform the dates to datetime in day/month/year format. By default Pandas uses month/day/year format.")
autocratic_years_df['gwf_startdate'] = pd.to_datetime(autocratic_years_df['gwf_startdate'], dayfirst=True, errors='coerce')
autocratic_years_df['gwf_enddate'] = pd.to_datetime(autocratic_years_df['gwf_enddate'], dayfirst=True, errors='coerce')

print("\nVerify NA values")
print(f"Rows with cowcode NA: {len(autocratic_years_df[autocratic_years_df['cowcode'].isna()])}")
print(f"Rows with year NA: {len(autocratic_years_df[autocratic_years_df['year'].isna()])}")
print(f"Rows with gwf_country NA: {len(autocratic_years_df[autocratic_years_df['gwf_country'].isna()])}")
print(f"Rows with gwf_casename NA: {len(autocratic_years_df[autocratic_years_df['gwf_casename'].isna()])}")
print(f"Rows with gwf_startdate NA: {len(autocratic_years_df[autocratic_years_df['gwf_startdate'].isna()])}")
print(f"Rows with gwf_enddate NA: {len(autocratic_years_df[autocratic_years_df['gwf_enddate'].isna()])}")
print(f"Rows with gwf_spell NA: {len(autocratic_years_df[autocratic_years_df['gwf_spell'].isna()])}")
print(f"Rows with gwf_duration NA: {len(autocratic_years_df[autocratic_years_df['gwf_duration'].isna()])}")
print(f"Rows with gwf_fail NA: {len(autocratic_years_df[autocratic_years_df['gwf_fail'].isna()])}")
print(f"Rows with gwf_fail_subsregime NA: {len(autocratic_years_df[autocratic_years_df['gwf_fail_subsregime'].isna()])}")
print(f"Rows with gwf_fail_type NA: {len(autocratic_years_df[autocratic_years_df['gwf_fail_type'].isna()])}")
print(f"Rows with gwf_fail_violent NA: {len(autocratic_years_df[autocratic_years_df['gwf_fail_violent'].isna()])}")
print(f"Rows with gwf_regimetype NA: {len(autocratic_years_df[autocratic_years_df['gwf_regimetype'].isna()])}")
print(f"Rows with gwf_party NA: {len(autocratic_years_df[autocratic_years_df['gwf_party'].isna()])}")
print(f"Rows with gwf_personal NA: {len(autocratic_years_df[autocratic_years_df['gwf_personal'].isna()])}")
print(f"Rows with gwf_military NA: {len(autocratic_years_df[autocratic_years_df['gwf_military'].isna()])}")
print(f"Rows with gwf_monarch NA: {len(autocratic_years_df[autocratic_years_df['gwf_monarch'].isna()])}")

print("Similarly to the case list, Iran79 doesn't have a regime type. We leave it as is.")

print(autocratic_years_df.info())

autocratic_years_df.to_csv('../datasets/processed/democracy/autocratic/autocratic-regime-country-years.csv', index=False)



autocratic_years_countries = autocratic_years_df["gwf_country"].unique()

print(f"Autocratic Years - Nr of countries: {len(autocratic_years_countries)} - Countries: {', '.join(autocratic_years_countries)}")
print(f"Autocratic Years - From years {autocratic_years_df['year'].min()} to {autocratic_years_df['year'].max()}")

Load the TSCS Data
The TSCS Data contains the autocratic regimes data over the years
Transform the dates to datetime in day/month/year format. By default Pandas uses month/day/year format.

Verify NA values
Rows with cowcode NA: 0
Rows with year NA: 0
Rows with gwf_country NA: 0
Rows with gwf_casename NA: 0
Rows with gwf_startdate NA: 0
Rows with gwf_enddate NA: 0
Rows with gwf_spell NA: 0
Rows with gwf_duration NA: 0
Rows with gwf_fail NA: 0
Rows with gwf_fail_subsregime NA: 0
Rows with gwf_fail_type NA: 0
Rows with gwf_fail_violent NA: 0
Rows with gwf_regimetype NA: 31
Rows with gwf_party NA: 0
Rows with gwf_personal NA: 0
Rows with gwf_military NA: 0
Rows with gwf_monarch NA: 0
Similarly to the case list, Iran79 doesn't have a regime type. We leave it as is.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4591 entries, 0 to 4590
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   co

# Boix-Miller-Rosato Dichotomous Coding of Democracy

In [228]:
# Load CSV
print("Load the dichotomous democracy dataset")
dichotomous_df = pd.read_csv('../datasets/raw/democracy/dichotomous/democracy-v4.0.csv')

print("Some country aren't modern countries (E.g.: Bavaria), if the abbreviation field is not 3 characters, drop the row")
dichotomous_df = dichotomous_df[dichotomous_df['abbreviation'].str.len() == 3]

print("Some countries don't have democracy data (They should), filter those out")
dichotomous_df = dichotomous_df[dichotomous_df['democracy'].notna()]
dichotomous_df['democracy'] = dichotomous_df['democracy'].fillna(-1).astype(int)

print("Transform columns to int")
dichotomous_df['democracy_trans'] = dichotomous_df['democracy_trans'].astype(int)
dichotomous_df['democracy_breakdowns'] = dichotomous_df['democracy_breakdowns'].astype(int)
dichotomous_df['democracy_duration'] = dichotomous_df['democracy_duration'].astype(int)
dichotomous_df['democracy_femalesuffrage'] = dichotomous_df['democracy_femalesuffrage'].astype(int)

print("Column democracy_omitteddata is allowed to have NA values, Codebook recommends to set it to 0 or other value if needed. We set it to -1")
dichotomous_df['democracy_omitteddata'] = dichotomous_df['democracy_omitteddata'].fillna(-1).astype(int)

print(dichotomous_df.info())

print(f"Rows with country NA: {len(dichotomous_df[dichotomous_df['country'].isna()])}")
print(f"Rows with abbreviation NA: {len(dichotomous_df[dichotomous_df['abbreviation'].isna()])}")
print(f"Rows with abbreviation_undp NA: {len(dichotomous_df[dichotomous_df['abbreviation_undp'].isna()])}")

dichotomous_countries = dichotomous_df["country"].unique()

print(f"Dichotomous Democracy - Nr of countries: {len(dichotomous_countries)} - Countries: {', '.join(dichotomous_countries)}")
print(f"Dichotomous Democracy - From years {dichotomous_df['year'].min()} to {dichotomous_df['year'].max()}")

dichotomous_df.to_csv('../datasets/processed/democracy/dichotomous/dichotomous-democracy.csv', index=False)

Load the dichotomous democracy dataset
Some country aren't modern countries (E.g.: Bavaria), if the abbreviation field is not 3 characters, drop the row
Some countries don't have democracy data (They should), filter those out
Transform columns to int
Column democracy_omitteddata is allowed to have NA values, Codebook recommends to set it to 0 or other value if needed. We set it to -1
<class 'pandas.core.frame.DataFrame'>
Index: 18295 entries, 0 to 19774
Data columns (total 11 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   country                   18295 non-null  object
 1   ccode                     18295 non-null  int64 
 2   abbreviation              18295 non-null  object
 3   abbreviation_undp         18295 non-null  object
 4   year                      18295 non-null  int64 
 5   democracy                 18295 non-null  int64 
 6   democracy_trans           18295 non-null  int64 
 7   democracy_breakdow

# Lexical Index of Electoral Democracy (LIED)

In [229]:
print("Load Dataset")
lied_df = pd.read_excel('../datasets/raw/democracy/lied/LIED_6.6.xlsx', sheet_name='Sheet1', header=0)

print("\nDrop the regions column as it's not part of the codebook and it's included in the version 6.6 only")
lied_df.drop(columns=['region'], inplace=True)

print("\nDrop the rows where vdem is NA as it means it's not a country")
lied_df = lied_df[lied_df['vdem'].notna()]

print("\nTransform the year to int. There is one record with an invalid quote in the year -> ¨2023, we transform it to 2023") 
lied_df['year'] = lied_df['year'].replace('¨2023', '2023').astype(int)

print(lied_df.info())

print("\nAll the fields are parsed as integers and don't have NA values")

lied_countries = lied_df["countryn"].unique()

print(f"LIED - Nr of countries: {len(lied_countries)} - Countries: {', '.join(lied_countries)}")
print(f"LIED - From years {lied_df['year'].min()} to {lied_df['year'].max()}")


Load Dataset
Drop the regions column as it's not part of the codebook and it's included in the version 6.6 only
Drop the rows where vdem is NA as it means it's not a country
Transform the year to int. There is one record with an invalid quote in the year -> ¨2023, we transform it to 2023
<class 'pandas.core.frame.DataFrame'>
Index: 32276 entries, 0 to 32691
Data columns (total 21 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   countryn                           32276 non-null  object 
 1   cow                                32276 non-null  int64  
 2   vdem                               32276 non-null  float64
 3   year                               32276 non-null  int64  
 4   male_suffrage                      32276 non-null  int64  
 5   female_suffrage                    32276 non-null  int64  
 6   executive_elections                32276 non-null  int64  
 7   legislative_elections     