In [274]:
# import packages

import pandas as pd
import numpy as np
import csv
import math
import plotly
import dash
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import plotly.graph_objects as go

In [275]:
# read in datasets

vdem = pd.read_csv('V-Dem-CY-Full+Others-v14.csv', encoding= 'utf-8')
bti = pd.read_csv('free-fair-elections-bti.csv', quoting=csv.QUOTE_NONE, encoding= 'utf-8')
fiw = pd.read_excel('Country_and_Territory_Ratings_and_Statuses_FIW_1973-2024.xlsx', sheet_name = 'Country Ratings, Statuses ', skiprows = 2)
fiw_years = pd.read_excel('Country_and_Territory_Ratings_and_Statuses_FIW_1973-2024.xlsx', sheet_name = 'Country Ratings, Statuses ', skiprows=1, nrows=1, header = None)
wrp = pd.read_csv('mutated_NWR.csv', quoting=csv.QUOTE_NONE, encoding= 'utf-8')


Columns (364,365,366,399,415,804,836,837,924,1240,1257,1486,3094,3168,3169,3341,3342,3344,3345,3347,3350,3352) have mixed types. Specify dtype option on import or set low_memory=False.



In [276]:
# Remove uneccesary quotation marks from the vdem and wrp datasets

vdem = vdem.replace('"', '', regex=True)
new_headers_v = []
for header in vdem.columns: 
    header = header.strip('"') 
    new_headers_v.append(header)
vdem.columns = new_headers_v
new_headers_w = []
for header in wrp.columns: 
    header = header.strip('"') 
    new_headers_w.append(header)
wrp.columns = new_headers_w

In [277]:
# Reshape the FIW dataset to align with the format of the other datasets

PR = [col for col in fiw.columns if col.startswith('PR')]
CL = [col for col in fiw.columns if col.startswith('CL')]
Status = [col for col in fiw.columns if col.startswith('Status')]
fiw_r = pd.lreshape(fiw, {'PR':PR, 'CL':CL, 'Status':Status})

In [278]:
# Align country names to match among all datasets in the appropriate years

country_rename = {
    "Burma/Myanmar": "Myanmar (Burma)",
    "Myanmar": "Myanmar (Burma)",
    "Republic of Vietnam": "South Vietnam",
    "Vietnam, S.": "South Vietnam",
    "Vietnam, N.": "North Vietnam", # add code that says if its before a certain year, change some Vietnams to North Vietnam
    "Yemen, S.": "South Yemen",
    "Yemen, N.": "North Yemen", # add code that says if its before a certain year, change some Yemens to North Yemen
    "Republic of the Congo": "Congo (Brazzaville)",
    "Congo": "Congo (Brazzaville)",
    "Democratic Republic of the Congo ": "Congo (Kinshasa)",
    "Germany, W.": "West Germany", # add code that says if its before a certain year, change some Germanies to West Germany
    "Germany, E.": "East Germany", 
    "German Democratic Republic": "East Germany",
    "Czechia": "Czech Republic", # add code that says if its before a certain year, change it to Czechoslovakia
    "United States": "United States of America",
    "Cote d'Ivoire": "Ivory Coast",
    "Cabo Verde": "Cape Verde",
    "T√ºrkiye": "Turkey",
    "East Timor": "Timor-Leste",
    "The Gambia": "Gambia"
}

vdem["country_name"] = vdem["country_name"].replace(country_rename) 
bti["Entity"] = bti["Entity"].replace(country_rename) 
fiw_r["Country"] = fiw_r["Country"].replace(country_rename) 


for i in range(len(vdem)):
    if (vdem.loc[i,'country_name'] == 'Vietnam') and (1945 < vdem.loc[i,'year'] <= 1976):
        vdem.loc[i,'country_name'] = 'North Vietnam'
    if (vdem.loc[i,'country_name'] == 'Yemen') and (1918 < vdem.loc[i,'year'] <= 1990):
        vdem.loc[i,'country_name'] = 'North Yemen'
    if (vdem.loc[i,'country_name'] == 'Germany') and (1949 < vdem.loc[i,'year'] <= 1990):
        vdem.loc[i,'country_name'] = 'West Germany'
    if (vdem.loc[i,'country_name'] == 'Czech Republic') and (vdem.loc[i,'year'] < 1993):
        vdem.loc[i,'country_name'] = 'Czechoslovakia'

In [279]:
# Merge vdem and bti datasets

data = vdem.merge(bti[['Entity', 'Year', 'Free and fair elections']], left_on = ['country_name', 'year'], right_on = ['Entity', 'Year'], how = 'left')

In [280]:
# Insert Year column back into FIW dataset

fiw_r.insert(1, 'Year', '')

In [281]:
# Reshape FIW Years dataset to prepare for merging

fiw_years = fiw_years.transpose()
fiw_years.columns = fiw_years.iloc[0]  
fiw_years = fiw_years[1:]

In [282]:
# Remove NAs from FIW years dataset

fiw_years.dropna(inplace=True)

In [283]:
# Insert FIW years data back into reshaped FIW dataset

mult = 1
i = 0
while i < len(fiw_r):
    while i < mult*205:
        fiw_r.loc[i, 'Year'] = fiw_years.iloc[mult-1, 0]
        i += 1
    mult += 1

In [284]:
# Merge FIW dataset into master dataset

# maybe dont left join, fiw has data on smaller countries that v-dem doesnt
data = data.merge(fiw_r, left_on = ['country_name', 'year'], right_on = ['Country', 'Year'], how = 'left')

In [285]:
# Merge WRP dataset into master dataset

data = data.merge(wrp, left_on = ['COWcode', 'year'], right_on = ['state', 'year'], how = 'left')

In [286]:
# Write rows in wrp that do not match with the master dataset to csv for analysis

#non_matching_rows = data[data['name'].isna()]
#print(non_matching_rows)
#with open('non_matching_rows.txt', 'w+', newline='') as file:
#   non_matching_rows.to_csv('non_matching_rows.txt', sep = '\t', index=False)

In [287]:
data = data.drop(columns=data.loc[:, 'chrstprot':'pop'].columns)
data = data.drop(columns=data.loc[:, 'datatype':'sourcecode'].columns)
data = data.drop(columns=data.loc[:, 'historical_date':'gap_index'].columns)
data = data.drop(columns=['Entity', 'Year_x', 'Country', 'Year_y', 'state', 'name'])

In [288]:
data = data.rename(columns={'v2x_civlib':'CivLib', 'v2x_clphy':'Violence', 'v2x_clpol':'PolLib', 'v2x_freexp_altinf':'FreeExpress', 'v2xcl_rol':'LawEqual', 'v2xeg_eqprotec':'EqProtect', 'v2x_corr':'Corrupt_vdem', 'v2x_execorr':'Ecorrupt', 'v2x_pubcorr':'Pubcorrupt', 'v2x_rule':'RuleLaw', 'v2xcl_acjst':'Justice', 'v2xcl_prpty':'Property', 'v2juhcind':'CourtIndep'})

In [310]:
for i in range(len(data)):
    if (data.loc[i,'country_name'] == 'Russia'):
        if (data.loc[i,'year'] <= 1917):
            data.loc[i,'country_name'] = 'Russian Empire'
        elif (data.loc[i,'year'] <= 1991):
            data.loc[i,'country_name'] = 'USSR'
    if (data.loc[i,'country_name'] == 'United States of America') and (data.loc[i,'year'] >= 1914):
        data.loc[i,'country_name'] = 'United States'
    if (data.loc[i,'country_name'] == 'United Kingdom') and (data.loc[i,'year'] < 1938):
        data.loc[i,'country_name'] = 'United Kingdom of Great Britain and Ireland'
    if (data.loc[i,'country_name'] == 'Brazil') and (data.loc[i,'year'] < 1914):
        data.loc[i,'country_name'] = 'Kingdom of Brazil'
    if (data.loc[i,'country_name'] == 'Germany') and (data.loc[i,'year'] >= 1914 and data.loc[i,'year'] < 1920):
        data.loc[i,'country_name'] = 'German Empire'
#    if (data.loc[i,'country_name'] == 'East Germany'):
#            data.loc[i,'country_name'] = 'Germany (Soviet)'

In [290]:
# Write master dataset to csv

data = data[data['year'] >= 1900]
data.reset_index(drop=True, inplace=True)
with open('capstone_data.csv', 'w+', newline='') as file:
   data.to_csv('capstone_data.csv')

In [291]:
# RENAME indicato variables
#data = data.rename()

In [306]:
# visualization 

from dash import Dash, dcc, html, Input, Output
import plotly.express as px

app = Dash(__name__)

app.layout = html.Div([
    html.H4('Vdem Indicators'),
    html.P("Select a indicator:"),
    dcc.RadioItems(
        id='indicator', 
        options=["CivLib", "Violence", "PolLib", "FreeExpress", "LawEqual", "EqProtect", "Corrupt_vdem", "Ecorrupt", "Pubcorrupt", "RuleLaw", "Justice", "Property", "CourtIndep", "nonreligpct", "Status"],
        value="CivLib",
        inline=True
    ),
    dcc.Graph(id="graph"),
])

@app.callback(
    Output("graph", "figure"), 
    Input("indicator", "value"))
def display_choropleth(indicator):
    df = data 
    fig = px.choropleth(
        df, color=indicator, hover_name='country_name', locations="country_text_id", animation_frame='year', animation_group ='year',
        projection="natural earth")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    return fig

app.run_server(debug=True)

In [312]:
# visualization 

from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import geopandas as gpd

app = Dash(__name__)

def load_geojson(year):
    with open(f'world_{year}.geojson') as f: 
        return json.load(f)

app.layout = html.Div([
    html.H4('Vdem Indicators'),
    html.P("Select a indicator:"),
    dcc.RadioItems(
        id='indicator', 
        options=["CivLib", "Violence", "PolLib", "FreeExpress", "LawEqual", "EqProtect", "Corrupt_vdem", "Ecorrupt", "Pubcorrupt", "RuleLaw", "Justice", "Property", "CourtIndep", "nonreligpct", "Status"],
        value="CivLib",
        inline=True
    ),
    dcc.Slider(
        id='year',
        min=1900,  
        max=2020,
        step=1,
        value=1900,  
        marks={year: str(year) for year in range(1900, 2021)},
        tooltip={"placement": "bottom", "always_visible": True}
    ),
    dcc.Graph(id="graph"),
])

update_map_years = np.array([1900, 1914, 1920, 1930, 1938, 1945, 1960, 1994, 2000, 2010])
@app.callback(
    Output("graph", "figure"), 
    [Input("indicator", "value"), Input("year", "value")])
def display_choropleth(indicator, year): 
    if (year in update_map_years):
        geo = load_geojson(year)
    else:
        i = len(update_map_years) - 1
        while i >= 0 and update_map_years[i] > year:
             i -= 1               
        geo = load_geojson(update_map_years[i])
    df = data
    df_year = df[df['year'] == year]
    fig = go.Figure(go.Choropleth(
        z=df_year[indicator],  
        hoverinfo='location+z',  
        locations=df_year['country_name'],  #country_text_id #country_name #        locationmode='country names',  #ISO-3 #country names
        featureidkey='properties.NAME',
        geojson=geo,
        colorbar_title=indicator,  
    ))
    fig.update_geos(fitbounds="locations", projection_type="natural earth")
    fig.update_layout(
        margin={"r":0, "t":0, "l":0, "b":0},
        title=f'{indicator} for Year {year}'
    )
    return fig


app.run_server(debug=True)

In [None]:
geojson = load_geojson(1945)
geojson
print(geojson["features"][0]["properties"])
print(geojson["features"][0]["properties"])
#df = data
#df_year = df[df['year'] == 1945]
#locations = df_year['country_name']
#locations