Importing libraries

pip install dash plotly pandas pycountry pycountry-convert scikit-learn dash-ag-grid statsmodels

In [1]:
from dash import Dash, html, dcc, Output, Input, State
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import pycountry
import pycountry_convert
from sklearn import linear_model
import dash_ag_grid as dag
from statsmodels.tsa.holtwinters import ExponentialSmoothing

Reading datasets

In [2]:
trend = pd.read_csv('worldriskindex-trend.csv')

In [3]:
countriesCleaned = pd.read_csv('cleanedCountries.csv')

In [4]:
# Remove feature normalized columns, not used for project
trend = trend.drop(list(trend.filter(regex='Norm')), axis=1)
trend.columns = trend.columns.str.replace("_Base", "") # Rename base variables for easier accessing and renaming

In [5]:
# Meta dataset just for renaming variables
meta = pd.read_excel('worldriskindex-meta.xlsx')[['Code', 'Variable']]
rename = dict(zip(meta['Code'], meta['Variable'])) # Dictionary for renaming columns

Defining functions, constants, and other variables needed for displaying

In [6]:
def convert_ISO3_Continent(ISO3):
    '''Helper function that takes ISO3 code and uses pycountry and pycountry convert functions to get continent names. 
    Used for standardizing continent names.'''
    try:
        ISO2 = pycountry.countries.get(alpha_3=ISO3).alpha_2
        code = pycountry_convert.country_alpha2_to_continent_code(ISO2)
        continent = pycountry_convert.convert_continent_code_to_continent_name(code)
        return continent
    except:
        return pd.NA

In [7]:
exposureSubCol = ["EI_0" + str(i+1) for i in range(7)] # Exposure subcolumns, these are the different types of disasters. Further subcolumns include strength of disaster.  

In [8]:
# Country statistics dataframe, used for forming an aggregate through ISO3 from which the information is displayed
stats = countriesCleaned
stats = stats.set_index('ISO3')
stats = stats.drop(['Country', 'Region'], axis=1)

In [9]:
def formAggregate(lowerBound):
    '''Forms an aggregate dataset from lowerBound to last year collected'''
    # Filtering to be within bounds
    aggregate = trend[trend['Year'] >= lowerBound]
    
    # Years 
    aggregate = aggregate.groupby(aggregate['ISO3.Code']).agg(
        Risk = ('W', 'mean'),
        Exposure = ('E', 'mean'),
        Vulnerability = ('V', 'mean'))
    
    # Add country and continent
    aggregate['Country'] = aggregate.index.map(lambda x: pycountry.countries.get(alpha_3=x).name)
    aggregate['Continent'] = aggregate.index.map(lambda x: convert_ISO3_Continent(x))
    
    # Add disaster columns
    disasterAggregate = trend.groupby(trend['ISO3.Code'])[exposureSubCol].mean()
    aggregate = aggregate.join(disasterAggregate)

    # Add selected country stats
    aggregate = aggregate.join(stats)

    return aggregate

In [10]:
def generateMap(dataFrame):
    '''Generates a choropleth map using Dash's built-in function and country map.'''
    fullFig = px.choropleth(dataFrame, 
                            locations=dataFrame.index, # Uses ISO3 as index
                            color="Risk", # Displays risk 
                            hover_name="Country", # Displays country name when hovered over.
                            hover_data={'Population': True, # Displays additional statistics. 
                                        'Area (sq. mi.)': True, 
                                        'GDP ($ per capita)': True, 
                                        'Literacy (%)': True, 
                                        'Phones (per 1000)':True, 
                                        'Birthrate': True, 
                                        'Deathrate': True}, 
                            color_continuous_scale="Viridis") # Dash's built-in colour scale. 
    fullFig.update_layout(margin=dict(l=0,r=0,t=0,b=0)) # Remove margins from map, allows larger display.
    return fullFig

In [11]:
def generateExposureInd(ISO, dataFrame):
    '''Generates exposure indicator for the selected country from the given aggregate dataset'''
    continent = dataFrame['Continent'][ISO]
    continentDf = dataFrame.loc[dataFrame['Continent'] == continent,:] # dataframe of all countries in the continent
    maxExposure = continentDf['Exposure'].max() 
    iqrExposure = continentDf['Exposure'].quantile([0.25, 0.5, 0.75]) # IQR stored as key-value pairs. Why?

    fig = go.Figure(go.Indicator(
        mode = "gauge+number",
        value = dataFrame['Exposure'][ISO],
        gauge = {'bar': {'color': 'black'},
             'axis': {'range': [0, maxExposure]}, # Sets total range. 
             'steps': [ # Sets subrange and colour for those ranges. Uses interquartile as comparison with neighboring countries.
                 {'range': [0, iqrExposure[0.25]], 'color': "green"},
                 {'range': [iqrExposure[0.25], iqrExposure[0.5]], 'color': "yellow"},
                 {'range': [iqrExposure[0.5], iqrExposure[0.75]], 'color': "orange"},
                 {'range': [iqrExposure[0.75], maxExposure], 'color': "red"}
             ] 
             }
        ),
        layout = {
            'title': 'Exposure Indicator',
        }
    )
    return fig

def generateVulnerabilityInd(ISO, dataFrame):
    '''Generates exposure indicator for the selected country from the given aggregate dataset'''
    continent = dataFrame['Continent'][ISO]
    continentDf = dataFrame.loc[dataFrame['Continent'] == continent,:] # dataframe of all countries in the continent
    maxVulnerability = continentDf['Vulnerability'].max()
    iqrVulnerability = continentDf['Vulnerability'].quantile([0.25, 0.5, 0.75]) # IQR stored as key-value pairs. Why?

    fig = go.Figure(go.Indicator(
        mode = "gauge+number",
        value = dataFrame['Vulnerability'][ISO],
        gauge = {'bar': {'color': 'black'},
             'axis': {'range': [0, maxVulnerability]}, # Sets total range. 
             'steps': [ # Sets subrange and colour for those ranges. Uses interquartile as comparison with neighboring countries.
                 {'range': [0, iqrVulnerability[0.25]], 'color': "green"},
                 {'range': [iqrVulnerability[0.25], iqrVulnerability[0.5]], 'color': "yellow"},
                 {'range': [iqrVulnerability[0.5], iqrVulnerability[0.75]], 'color': "orange"},
                 {'range': [iqrVulnerability[0.75], maxVulnerability], 'color': "red"}
             ]
             }
        ),
        layout = {
            'title': 'Vulnerability Indicator',
        }
    )
    return fig

In [12]:
def generateDisasterGraph(countryCode, dataFrame):
    '''Generates bargraph of disasters for selected country using current aggregate'''
    countryExp = dataFrame.loc[countryCode, exposureSubCol].astype('float64') # Get disasters (exposure subcolumns) of selected country
    continentExp = dataFrame.loc[dataFrame['Continent'] == dataFrame.loc[countryCode, 'Continent'], exposureSubCol].mean() # Get mean of disasters of all countries in selected continent
    exposure = pd.DataFrame({
        'country': countryExp,
        'continent': continentExp
    })
    exposure['disaster'] = exposure.index.map(lambda x: rename[x]) # rename disasters from short code to actual name
    
    fig = px.bar(exposure, x='disaster', y=['country', 'continent'], barmode='overlay') # Generate bar graph with overlay.
    return fig



In [13]:
# Dictionary for continent bookmarks.
continents = {
    "North America" : (55, -105),
    "South America" : (-9,-56),
    "Asia" : (34, 101),
    "Africa" : (9, 35),
    "Europe" : (55, 15),
    "Oceania" : (-23, 140)
}

Mason's Segment(?)

TODO:
 - Dropdown for Vulnerability Type, Continent, Country DONE
 - Line graph indicating change in vulnerability (Time series) DONE
    - Prediction for future vulnerability? DONE
 - Top/Bottom 3 countries for the statistic DONE
 - Most correlated statistics for said vulnerability DONE

Copy of risk index with only the countries, years, and exposures

In [14]:
def subcategories(searchCategories, lengthOfCategories):
    '''Generate subcategories by iterating over the length of categories'''
    fixedSubstring = "_" # substring that is present in all subcategories
    res = [] # List of all subcategories 

    for i in range(len(searchCategories)):
        category = searchCategories[i]
        length = lengthOfCategories[i]

        for j in range(length):
            lengthFormater = "" # Add extra digit to string if needed

            if (j < 9):
                lengthFormater = "0"

            subCategory = category + fixedSubstring + lengthFormater + str(j+1) # Concatenate strings
            res.append(subCategory) # Append to results
    
    return res

In [15]:
search = ["S", "C", "A"] # Search for susceptibility, coping abilities, and adaptability aggregate categories; the subcategories of vulnerability.
categoryLength = [5, 3, 3] # The number of susceptibility, coping abilities, and adaptability aggregate categories. 

vul = subcategories(search, categoryLength)

In [16]:
trend_exposure = trend[['WRI.Country', 'ISO3.Code', 'Year'] + vul]

Line graph for specific vulnerability in select country

In [17]:
def vulnChart(vuln, ISO):
    df_temp = trend[['WRI.Country', 'ISO3.Code', 'Year', vuln]]
    df_temp = df_temp.loc[df_temp['ISO3.Code'] == ISO]
    
    fig = px.line(df_temp, x='Year', y=vuln, title=f'{rename[vuln]} Risk')
    fig.update_traces(mode='markers+lines')
    return fig

Find most and least correlated statistics

In [18]:
recentTrend = trend[trend['Year'] >= 2025]
recentTrend = recentTrend[['ISO3.Code', 'W', 'E', 'V']]

In [19]:
cloneCountries = countriesCleaned.copy()
recentTrend.rename(columns={'ISO3.Code': 'ISO3'}, inplace=True) # rename for merging

merged = recentTrend.merge(cloneCountries, on='ISO3', how='inner')  # inner join to ensure only countries with recent trend data are included

In [20]:
mergedClone = merged[['ISO3','Country', 'V', 'Infant mortality (per 1000 births)', 'Phones (per 1000)', 'Area (sq. mi.)','Literacy (%)', 'Agriculture']]

top3_2025 = mergedClone.nlargest(3, 'V')
bot3_2025 = mergedClone.nsmallest(3, 'V')

topbot3_2025 = pd.concat([top3_2025, bot3_2025]) # combine for display

In [21]:
# Lookup function for Dash table
def mergedlookup(ISO3):
    return mergedClone.loc[mergedClone['ISO3'] == ISO3]

In [22]:
train_range = ('2000-01-31', '2025-12-31') # training data range
forecast_range = ('2000-01-31', '2030-12-31') # forecast data range
test_range = ('2026-01-31', '2030-12-31') # testing data range

forecast_length = len(pd.date_range(start=forecast_range[0], end=forecast_range[1], freq='YE'))

trend_datetime = trend.copy()
trend_datetime['Date'] = pd.to_datetime(trend_datetime['Year'].astype(str) + '-01-31') # create datetime column

In [23]:
def generateHoltForecast(ISO3):
    target = trend_datetime.loc[trend_datetime['ISO3.Code'] == ISO3, ['Date', 'V']] # select target country with date and v columns
    
    target.set_index('Date', inplace=True)
    
    # Fit Holt's Linear Trend Model, Damped. No seasonality from data.
    holt_model = ExponentialSmoothing(
        target, trend='add', seasonal=None, damped_trend = True, initialization_method="estimated"
    ).fit()
    
    holt_forecast = holt_model.forecast(forecast_length)
    holt_test = holt_forecast.loc[test_range[0]:test_range[1]]
    return holt_test

In [24]:
def generatePredictedVulnerabilityGraph(ISO3):
    holt_forecast = generateHoltForecast(ISO3)
    
    # Plotting the forecasted vulnerability
    fig = px.line(holt_forecast, x=holt_forecast.index, y=holt_forecast.values, title='Predicted Vulnerability (Holt\'s Linear Trend Model with Damping)')
    fig.update_traces(mode='markers+lines')
    fig.update_xaxes(title='Year')
    fig.update_yaxes(title='Vulnerability Index')
    return fig

In [25]:
# Create dictionary for renaming code to country name.
isoNames = trend.loc[:,['ISO3.Code','WRI.Country']].drop_duplicates(subset='ISO3.Code')
isoToName = dict(zip(isoNames['ISO3.Code'], isoNames['WRI.Country']))

In [26]:
# Linear regression for 5 statistics selected through best subset selection using adjusted R squared as metric. 
x = ['Infant mortality (per 1000 births)', 'Phones (per 1000)', 'Area (sq. mi.)', 'Literacy (%)', 'Agriculture'] # These statistics were found to be the most useful in a 5 variable regression.
y = 'Vulnerability' # target variable
data = formAggregate(2000).dropna() # Use entire dataset as training
X = data.loc[:, x]
Y = data[y]

mdl = linear_model.LinearRegression().fit(X, Y) # Fit data to model
print(mdl.score(X, Y)) # Non-adjusted R squared score of resulting model. Lasso has better performance, but also requires almost all the features. 
print(mdl.coef_) # Coefficients of the linear regression predictors.

# mdl.predict(input)

0.6194223896232861
[ 9.54184646e-02 -1.70281462e-02  1.41290479e-06 -1.73957581e-01
  2.38420058e+01]


In [27]:
app = Dash(__name__) # Intialize Dash app object

app.layout = [
    html.Div( # Create Left-Right split
        style={'display': 'flex'},
        children=[
            html.Div( # Left Screen
                children=[
                    html.Div( # Dropdown selectors
                        style={'display': 'flex'},
                        children=[
                            dcc.Dropdown(id='continents', # Drop down for continents 
                                        options=[{'label': c, 'value': c} for c in continents], 
                                        value='North America', # Set North America as default
                                        style={'width': '25vw', 'height': '5vh'}),
                            dcc.Dropdown(id='aggregate', # Drop down for aggregate years
                                        options=[{'label': 'Last 25 years', 'value':2000}, # 3 options for aggregates
                                                {'label': 'Last 10 years', 'value':2015},
                                                {'label': 'Last 5 years', 'value':2020}],
                                        value=2000, # Default use entire dataset for aggregate
                                        style={'width': '25vw', 'height': '5vh'})
                        ]
                    ),
                    
                    dcc.Store(id='iso'), # Stores selected country's ISO3 code
                    dcc.Store(id='dataFrame', data=formAggregate(2000).to_dict()), # Stores aggregated dataframe as dictionary
                    dcc.Graph(id='map', style={'width': '50vw', 'height': '35vh'}, responsive=True), # Display choropleth map

                    html.Div( # Exposure and vulnerability indicators
                        style={'display': 'flex'},
                        children=[
                            dcc.Graph(id='exposure', style={'width': '25vw', 'height': '25vh'}, responsive=True), # Display exposure indicator on left
                            dcc.Graph(id='vulerability', style={'width': '25vw', 'height': '25vh'}, responsive=True) # Display vulnerability indicator on right
                        ]
                    ),

                    dcc.Graph(id='disaster', style={'width': '50vw', 'height': '35vh'}, responsive=True) # Display disaster bar graph last
                ]
            ),

            html.Div( # Right Screen
                children=[ # Dropdown selectors
                    html.Div(className='row', style={'display': 'flex'}, children=[
                        dcc.Dropdown(id='vulnType', options=[{'label': rename[col], 'value': col} for col in vul], value='S_01', style={'width': '25vw', 'height': '5vh'}),
                        dcc.Dropdown(id='vulnCountry', options=[{'label': isoToName[iso], 'value': iso} for iso in isoToName], value='CAN', style={'width': '25vw', 'height': '5vh'})]
                    ),

                    dcc.Graph(id='vulnDomainGraph', figure={}, style={'width': '50vw', 'height': '32.5vh'}, responsive=True), # Vulnerability domains
                    
                    dcc.Graph(id='predictedVulnerabilityGraph', figure={}, style={'width': '50vw', 'height': '32.5vh'}, responsive=True), # Vulnerability projection
                    
                    html.Div(style={'display': 'flex'}, children=[ # Tables
                        dag.AgGrid(id='countryData', columnDefs=[{'headerName': col, 'field': col} for col in topbot3_2025.columns], style={'width': '25vw', 'height': '20vh'}),
                        dag.AgGrid(rowData=topbot3_2025.to_dict('records'), columnDefs=[{'headerName': col, 'field': col} for col in topbot3_2025.columns], style={'width': '25vw', 'height': '20vh'})    
                    ]),

                    html.Div(children=[ # Regression Model
                        html.Div(style={'display': 'flex'}, children=[ # Text Input
                            dcc.Input(id='infant_mortality', type='number', placeholder='Infant Mortality', style={'width': '9.5vw', 'height': '5vh'}),
                            dcc.Input(id='phones', type='number', placeholder='Phones per 1000', style={'width': '9.5vw', 'height': '5vh'}),
                            dcc.Input(id='area', type='number', placeholder='Area', style={'width': '9.5vw', 'height': '5vh'}),
                            dcc.Input(id='literacy', type='number', placeholder='Literacy %', style={'width': '9.5vw', 'height': '5vh'}),
                            dcc.Input(id='agriculture', type='number', placeholder='Agriculture Industry %', style={'width': '9.5vw', 'height': '5vh'})
                        ]),
                        html.Div(style={'display': 'flex'}, children=[ # Text Output
                            html.Div(id='vulnerability_output', children="Predicted Vulnerabilty:", style={'width': '20vw', 'height': '5vh'}),
                            html.Button('Predict', id='prediction_button', n_clicks=0, style={'width': '10vw', 'height': '5vh'})
                        ])
                    ])
                ]
            )
        ]
    ) 
]

# Upon update of aggregate dropdown, run generateDataFrame and store dataframe 
@app.callback(
    Output('dataFrame', 'data'),
    Input('aggregate', 'value')
)
def generateDataFrame(bound):
    '''Runs formAggregate using selected bound and store dataframe as dictionary'''
    return formAggregate(bound).to_dict() 

# Upon selecting country on choropleth map, get ISO from clickdata and store 
@app.callback(
        Output('iso', 'data'),
        Input('map', 'clickData')
)
def getISOCode(clickData):
    '''Get ISO3 from clickData and returns it. If no country has been selected yet, use Canada as default.'''
    if clickData:
        ISO = clickData['points'][0]['location']
        return ISO
    return 'CAN'

# Upon changing continent bookmark or changing aggregate years, generate new choropleth map with the current bookmark and aggregate. 
@app.callback(
    Output('map', 'figure'),
    Input('continents', 'value'),
    Input('dataFrame', 'data')
)
def getMap(selected, data):
    '''Generates a new map using the selected continent and aggregate'''
    df = pd.DataFrame.from_dict(data) # Change dict back into dataframe.
    values = continents[selected] # Get longitude and latitude of continent.
    fig = generateMap(df) # Generates the map 

    fig.update_geos( # Centers the map using longitude and latitude.
        center_lat=values[0],
        center_lon=values[1],
        projection_scale=3
    )

    return fig

# Upon changing country or years, generate new exposure indicator. 
@app.callback(
    Output('exposure', 'figure'),
    Input('iso', 'data'),
    Input('dataFrame', 'data')
)
def getExposureInd(ISO, data):
    '''Callback function to generate exposure indicator using given country and years'''
    df = pd.DataFrame.from_dict(data) # Get dataframe from dict
    return generateExposureInd(ISO, df) # Call function to generate indicator
    
# Upon changing country or years, generate new vulnerability indicator.
@app.callback(
    Output('vulerability', 'figure'),
    Input('iso', 'data'),
    Input('dataFrame', 'data')
)
def getVulnerabilityInd(ISO, data):
    '''Callback function to generate vulnerability indicator using given country and years'''
    df = pd.DataFrame.from_dict(data) # Get dataframe from dict
    return generateVulnerabilityInd(ISO, df) # Call function to generate indicator

# Upon changing country or years, generate new disaster bar graph.
@app.callback(
    Output('disaster', 'figure'),
    Input('iso', 'data'),
    Input('dataFrame', 'data')
)
def getDisasterGraph(ISO, data):
    '''Callback function to generate disaster bar graph using given country and years'''
    df = pd.DataFrame.from_dict(data) # Get dataframe from dict
    return generateDisasterGraph(ISO, df) # Call function to bar graph
    
# Upon changing vulnerability domain or country, update line graph  
@app.callback(
    Output('vulnDomainGraph', 'figure'),
    Input('vulnCountry', 'value'),
    Input('vulnType', 'value')
)
def getVulnChart(countryISO, disasterType):
    return vulnChart(disasterType, countryISO)

# Upon changing country, update vulerability forecast 
@app.callback(
    Output('predictedVulnerabilityGraph', 'figure'),
    Input('vulnCountry', 'value')
)
def getPredictedVulnerabilityGraph(countryISO):
    return generatePredictedVulnerabilityGraph(countryISO)

# Upon changing country, update selected table.
@app.callback(
    Output('countryData', 'rowData'),
    Input('vulnCountry', 'value')
)
def updateCountryData(countryISO):
    data = mergedlookup(countryISO)
    return data.to_dict('records')

# Upon click of button, read text input
@app.callback(
    Output('vulnerability_output', 'children'),
    Input('prediction_button', 'n_clicks'),
    [State("{}".format(pred), "value") for pred in ['infant_mortality', 'phones', 'area', 'literacy', 'agriculture']],
    prevent_initial_call=True
)
def update_output(n_clicks, *vals):
    for v in vals:
        if v == None:
            return 'Predicted Vulnerability:' # If any value is missing, do nothing
        
    return f'Predicted Vulnerability: {mdl.predict(pd.DataFrame(dict(zip(x, vals)), index=[0]))}' # Else return regresion value

if __name__ == '__main__':
    #app.run(debug=True)
    app.run(jupyter_mode='external')


Dash app running on http://127.0.0.1:8050/



No frequency information was provided, so inferred frequency YE-JAN will be used.


No frequency information was provided, so inferred frequency YE-JAN will be used.


No frequency information was provided, so inferred frequency YE-JAN will be used.


No frequency information was provided, so inferred frequency YE-JAN will be used.


No frequency information was provided, so inferred frequency YE-JAN will be used.

