In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from dash import Dash, dcc, html, dash_table
import dash_bootstrap_components as dbc
from dash.dependencies import Output, Input
from dash.exceptions import PreventUpdate
from dash_bootstrap_templates import load_figure_template
from dash import html
import plotly.express as px

In [2]:
world_data = pd.read_excel('D:/OneDrive - Insightful Tech Consulting/Data/006 - Maven Analytics/11 - Guided Project - World Economic Report/WorldBank.xlsx')

Add Population (M) Column - Using Millions to make it easier to read

In [3]:
world_data['Population (M)'] = (world_data['GDP (USD)'] / world_data['GDP per capita (USD)']) / 1000000

In [4]:
# world_data.info() # Used for testing, can be removed

In [5]:
world_data.describe()

Unnamed: 0,Year,"Birth rate, crude (per 1,000 people)","Death rate, crude (per 1,000 people)",Electric power consumption (kWh per capita),GDP (USD),GDP per capita (USD),Individuals using the Internet (% of population),"Infant mortality rate (per 1,000 live births)",Life expectancy at birth (years),Population density (people per sq. km of land area),Unemployment (% of total labor force) (modeled ILO estimate),Population (M)
count,12449.0,11440.0,11416.0,5848.0,9578.0,9575.0,5064.0,9984.0,11176.0,11845.0,5208.0,9575.0
mean,1989.0,28.643276,10.588539,3175.294686,170074000000.0,8231.812259,23.334471,51.704437,64.044692,318.86137,8.295079,29.956728
std,17.03007,13.131893,5.489382,4467.139298,897986600000.0,16173.539954,28.319388,46.131039,11.491087,1593.406041,6.290703,115.803824
min,1960.0,6.9,1.127,0.0,8824450.0,34.7906,0.0,1.4,18.907,0.098625,0.14,0.008913
25%,1974.0,16.6,6.86375,390.38575,1393010000.0,513.1455,0.594949,14.475,55.91775,19.7834,3.687,1.051937
50%,1989.0,27.5455,9.2,1541.895,7275305000.0,1852.81,8.406225,37.0,67.276,64.0075,6.775,5.390515
75%,2004.0,40.88125,12.687,4313.7675,48577820000.0,7774.565,41.29595,78.2,72.69225,144.823,11.21225,17.075158
max,2018.0,58.227,54.444,54799.2,20500000000000.0,189171.0,100.0,279.4,85.4171,21389.1,37.94,1391.89528


Filter the data to 2014 and join it with the HDI csv file on "Country Code"

In [6]:
hdi_data = pd.read_csv('D:/OneDrive - Insightful Tech Consulting/Data/006 - Maven Analytics/11 - Guided Project - World Economic Report/HDI.csv')

Merge the hdi_2014 column with the hdi_data table

In [7]:
data_2014 = pd.merge(
    world_data.query('Year  in ([2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014])'), 
    hdi_data[['iso3',
              'hdi_2000',
              'hdi_2001',
              'hdi_2002',
              'hdi_2003',
              'hdi_2004',
              'hdi_2005',
              'hdi_2006',
              'hdi_2007',
              'hdi_2008',
              'hdi_2009',
              'hdi_2010',
              'hdi_2011',
              'hdi_2012',
              'hdi_2013',                         
              'hdi_2014']], 
    left_on='Country Code', 
    right_on='iso3', 
    how='left')
# Remove unneeded column
data_2014.drop('iso3',axis=1,inplace=True)

In [8]:
# data_2014.info() # Used for testing, can be removed

Create a "gdp_pivot" table with years as rows, regions as columns, sum of GDP as values

In [9]:
gdp_pivot = world_data.pivot_table(
    index = 'Year',
    columns = 'Region',
    values = 'GDP (USD)',   
    aggfunc = 'sum',
)
gdp_pivot.reset_index(inplace=True)

In [10]:
#Show the first 5 rows of the new table
# gdp_pivot.head() * Used for testing, can be removed

Create "pop_pivot" table with years as rows, regions as columns, and sum of population as values.

In [11]:
pop_pivot = world_data.pivot_table(
    index = 'Year',
    columns = 'Region',
    values = 'Population (M)',       
    aggfunc = 'sum',
)
pop_pivot.reset_index(inplace=True)

Create "wb_hdi_by_region" table by calculating the average HDI for each region and sorting from highest average HDI to lowest.

In [12]:
data_region = data_2014.groupby('Region').agg(
    # I added other years as a test for further development
    # Typically we would be using just 'hdi_2014'
    {
        'hdi_2000': 'mean',
        'hdi_2001': 'mean',
        'hdi_2002': 'mean',
        'hdi_2003': 'mean',
        'hdi_2004': 'mean',
        'hdi_2005': 'mean',
        'hdi_2006': 'mean',
        'hdi_2007': 'mean',
        'hdi_2008': 'mean',
        'hdi_2009': 'mean',
        'hdi_2010': 'mean',
        'hdi_2011': 'mean',
        'hdi_2012': 'mean',
        'hdi_2013': 'mean',
        'hdi_2014': 'mean'},
)
# As we have multiple columns we can't choose sort on hdi_2014
#.sort_values('hdi_2014',ascending=False)

In [13]:
# data_2014.head() # Used for testing can be removed

In [63]:
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"

# set colours
stack1_list = [
    "#F94144",
    "#F3722C",
    "#F8961E",
    "#F9C74F",
    "#90BE6D",
    "#43AA8B",
    "#577590",
]
stack2_list = [
    "#F94144",
    "#90BE6D",
    "#577590",
    "#F3722C",
    "#F9C74F",
    "#F8961E",
    "#43AA8B",
]
# bubble_list = ['#90BE6D','#F3722C','#43AA8B','#F94144','#577590','#F9C74F','#F8961E']
bubble_list = [
    "#43AA8B",
    "#F3722C",
    "#F9C74F",
    "#F94144",
    "#577590",
    "#F8961E",
    "#90BE6D",
]

app = Dash(
    __name__, external_stylesheets=[dbc.themes.PULSE, dbc_css]
)  # Temporarily commented out so tabs show somw shading if not selected

# We set the minsize and maxsize of the poplutation for the legend of the life expectany bubble chart
minsize = min(data_2014["Population (M)"])  # / 10
if minsize < 15:
    minsize = 15
maxsize = max(data_2014["Population (M)"]) / 20

load_figure_template("PULSE")
app.layout = dbc.Container(
    children=[
        dbc.Row(
            [
                dbc.Col(
                    [
                        html.H2(
                            id="report_title",
                            className="bg-primary text-white p-2 mb-2 text-center",
                        )
                    ]
                )
            ]
        ),
        dbc.Row(
            [
                dbc.Col(
                    [
                        dbc.Card(
                            [
                                dcc.Markdown("Select A Region:"),
                                dcc.Dropdown(
                                    id="region_dropdown",
                                    options=[
                                        {"label": region, "value": region}
                                        for region in data_2014["Region"].unique()
                                    ],
                                    value="all_values",
                                    multi=True,
                                    className="dbc",
                                ),
                            ],
                            style={"border": "none"},
                        )
                    ]
                ),
                dbc.Col(
                    [
                        dbc.Card(
                            [
                                dcc.Markdown("Select A Country"),
                                dcc.Dropdown(
                                    id="country_dropdown",
                                    options=[
                                        {"label": country, "value": country}
                                        for country in data_2014[
                                            "Country Name"
                                        ].unique()
                                    ],
                                    value="all_values",
                                    multi=True,
                                    className="dbc",
                                ),
                            ],
                            style={"border": "none"},
                        )
                    ]
                ),
                dbc.Col(
                    [
                        dbc.Card(
                            [
                                dcc.Markdown("Select A Year"),
                                dcc.Dropdown(
                                    id="year_dropdown",
                                    options=[
                                        {"label": str(year), "value": year}
                                        for year in data_2014["Year"].unique()
                                    ],
                                    value=2014,
                                    multi=True,
                                    className="dbc",
                                ),
                            ],
                            style={"border": "none"},
                        )
                    ]
                ),
            ]
        ),
        dbc.Row([            
            dbc.Col([
                dbc.Card([
                    dcc.Graph(id="gdp_chart")
                ],
                style={"border": "none"},
                )
            ]),            
            dbc.Col([
                dbc.Card([                     
                     dcc.Graph(id="pop_chart")
                 ],
                 style={"border": "none"},
                ),
            ]),
            dbc.Col([
                dbc.Card([
                    dbc.Row([
                        dbc.Col([
                            html.Div(
                                style={                                    
                                    'margin-left': '10px',
                                    "width": '20px',
                                    "height": '10px',
                                    "background-color": "#F94144",                                
                                    "display": "inline-block",
                                }
                            ),
                            html.Div("East Asia & Pacific",
                                style={"display": "inline-block", 'margin-left':'20px'},
                                  )
                        ]),                        
                    ],
                    style={'margin-bottom': '0px', 'margin-top': '0px'}
                    ),
                    dbc.Row([
                        dbc.Col([
                            html.Div(
                                style={
                                    'margin-left': '10px',
                                    "width": '20px',
                                    "height": '10px',
                                    "background-color": "#F3722C",                                
                                    "display": "inline-block",
                                }
                            ),
                            html.Div("Europe & Central Asia",
                                style={"display": "inline-block", 'margin-left':'20px'},
                                  )
                        ]),                        
                    ],
                    style={
                        'margin-bottom': '0px', 
                        'margin-top': '0px',
                        'height': '2%'}                        
                    ),
                    dbc.Row([
                        dbc.Col([
                            html.Div(
                                style={
                                    'margin-left': '10px',
                                    "width": '20px',
                                    "height": '10px',
                                    "background-color": "#F8961E",
                                    "display": "inline-block",
                                }
                            ),
                            html.Div("Latin America & Caribbean",
                                style={"display": "inline-block", 'margin-left':'20px'},
                                  )
                        ]),
                        
                    ],                        
                    style={
                        'margin-bottom': '0px', 
                        'margin-top': '0px',
                        'height': '2%'}                        
                    ),
                    dbc.Row([
                        dbc.Col([
                            html.Div(
                                style={
                                    'margin-left': '10px',
                                    "width": '20px',
                                    "height": '10px',
                                    "background-color": "#F9C74F",
                                    "display": "inline-block",
                                }
                            ),
                            html.Div("Middle East & North Africa",
                                style={"display": "inline-block", 'margin-left':'20px'},
                                  )
                        ]),                        
                    ],
                    style={
                        'margin-bottom': '0px', 
                        'margin-top': '0px',
                        'height': '2%'}                        
                    ),
                     dbc.Row([
                        dbc.Col([
                            html.Div(
                                style={
                                    'margin-left': '10px',
                                    "width": '20px',
                                    "height": '10px',
                                    "background-color": "#90BE6D",
                                    "display": "inline-block",
                                }
                            ),
                            html.Div("North America",
                                style={"display": "inline-block", 'margin-left':'20px'},
                                  )
                        ]),
                        
                    ],
                    style={
                        'margin-bottom': '0px', 
                        'margin-top': '0px',
                        'height': '2%'}
                    ),                        
                     dbc.Row([
                        dbc.Col([
                            html.Div(
                                style={
                                    'margin-left': '10px',
                                    "width": '20px',
                                    "height": '10px',
                                    "background-color": "#43AA8B",
                                    "display": "inline-block",
                                }
                            ),
                            html.Div("South Asia",
                                style={"display": "inline-block", 'margin-left':'20px'},
                                  )
                        ]),                        
                    ],
                    style={
                        'margin-bottom': '0px', 
                        'margin-top': '0px',
                        'height': '2%'}
                    ),                        
                     dbc.Row([
                        dbc.Col([
                            html.Div(
                                style={
                                    'margin-left': '10px',
                                    "width": '20px',
                                    "height": '10px',
                                    "background-color": "#577590",
                                    "display": "inline-block",
                                }
                            ),
                            html.Div("Sub-Saharan Africa",
                                style={"display": "inline-block", 'margin-left':'20px'},
                                  )
                        ]),                        
                    ],
                    style={
                        'margin-bottom': '0px', 
                        'margin-top': '0px',
                        'height': '2%'}
                    ),
                ],
                style={'margin-top': '35px','margin-botton': '10px'}
                ),
                dbc.Row([
                    dbc.Card([
                        html.P(
                                '''
Economic growth in the 20th and early
21st centuries has been rapid, outpacing
population increases. In 1960, Europe and
North America generated 75% of global GDP
with only 20% of the world’s population.
'''
),
                        html.P(
                            '''
By 2018, East Asia surpassed both in GDP,
driven by Japan, South Korea, and China.
'''
),
                        html.P(
                            '''
In the coming decades, economists predict
that South Asia and Sub-Saharan Africa
will lead global economic growth.
'''                                
                            )                                                                      
                        ],
                        style={
                        'margin-bottom': '10px', 
                        'margin-top': '10px',                        
                        'border': 'none'}
                        ),
                    ],
                    style={'border': 'none'}
                    ),
                ],
                style={'border': 'none'}
                ),            
        ],
        style={'margin-top': '0px'}
        ),
        dbc.Row([
            dbc.Col([
                dbc.Card(
                    html.H6(
                        "Life Expectancy Increases as Countries get richer",
                        className="card-title",
                    ),
                    style={'border': 'none'}
                )
            ]),
        ]),
        dbc.Row([
            dbc.Card(
                dbc.Row([
                    dbc.Col(
                        html.H6("Population (M)", className="card-title"),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.Div(
                            style={
                                "width": f"{maxsize * .2}px",
                                "height": f"{maxsize * .2}px",
                                "background-color": "black",
                                "border-radius": "50%",
                                "display": "inline-block",
                            }
                        ),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.H6("250"),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.Div(
                            style={
                                "width": f"{maxsize * .4}px",
                                "height": f"{maxsize * .4}px",
                                "background-color": "black",
                                "border-radius": "50%",
                                "display": "inline-block",
                            }
                        ),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.H6("500"),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.Div(
                            style={
                                "width": f"{maxsize * .6}px",
                                "height": f"{maxsize * .6}px",
                                "background-color": "black",
                                "border-radius": "50%",
                                "display": "inline-block",
                            }
                        ),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.H6("750"),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.Div(
                            style={
                                "width": f"{maxsize * .8}px",
                                "height": f"{maxsize * .8}px",
                                "background-color": "black",
                                "border-radius": "50%",
                                "display": "inline-block",
                            }
                        ),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.H6("1,000"),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.Div(
                            style={
                                "width": f"{maxsize}px",
                                "height": f"{maxsize}px",
                                "background-color": "black",
                                "border-radius": "50%",
                                "display": "inline-block",
                            }
                        ),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                    dbc.Col(
                        html.H6("1,250"),
                        width="auto",
                        style={"display": "inline-block"},
                    ),
                ],
                align="center",
                justify="start",
                ),
                style={"border": "none"},
            )
        ]),
        dbc.Row([
            dbc.Col([dcc.Graph(id="life_expectancy_chart")])
        ]),
    ]
)




@app.callback(Output("country_dropdown", "options"), Input("region_dropdown", "value"))
def update_country_dropdown(selected_regions):
    # If no region is selected or 'all_values' is selected, return all countries
    if not selected_regions or "all_values" in selected_regions:
        return [
            {"label": country, "value": country}
            for country in data_2014["Country Name"].unique()
        ]

    # Filter the countries based on the selected region(s)
    filtered_countries = data_2014[data_2014["Region"].isin(selected_regions)][
        "Country Name"
    ].unique()
    return [{"label": country, "value": country} for country in filtered_countries]


@app.callback(
    Output("report_title", "children"),
    Output("gdp_chart", "figure"),
    Output("pop_chart", "figure"),
    Output("life_expectancy_chart", "figure"),
    Input("region_dropdown", "value"),
    Input("country_dropdown", "value"),
    Input("year_dropdown", "value"),
)
def display_infomation(
    region_dropdown_value, country_dropdown_value, year_dropdown_value
):
    hdi_column = f"hdi_{year_dropdown_value}"
    title = "Tracing Global Growth and Development, 1960 – 2018"

    # If the region list is empty then assign all_values to the value for further checks.
    if region_dropdown_value == []:
        region_dropdown_value = "all_values"

    # Filter the data based on dropdown values
    if region_dropdown_value == "all_values" or region_dropdown_value is None:
        dataframe = data_2014.query(f"Year == {year_dropdown_value}")
    else:
        dataframe = data_2014[
            data_2014["Region"].str.contains("|".join(region_dropdown_value))
        ].query(f"Year == {year_dropdown_value}")

    if country_dropdown_value != "all_values" and country_dropdown_value is not None:
        dataframe = dataframe[
            dataframe["Country Name"].str.contains("|".join(country_dropdown_value))
        ]

    columns_to_keep = dataframe.columns[:16].tolist()
    if hdi_column in dataframe.columns:
        columns_to_keep.append(hdi_column)

    dataframe = dataframe[columns_to_keep].reset_index(drop=True)

    # Initialize gdp_pivot_filtered to the default gdp_pivot
    gdp_pivot_filtered = gdp_pivot.copy()

    # Filter the GDP Pivot table to the selected region
    if region_dropdown_value == "all_values" or region_dropdown_value is None:
        gdp_pivot_filtered = gdp_pivot.reset_index(drop=True)  # Reset index
    else:
        columns_to_keep = ["Year"]  # Always include the 'Year' column
        for region in region_dropdown_value:
            if region in gdp_pivot.columns:
                columns_to_keep.append(region)
        gdp_pivot_filtered = gdp_pivot[columns_to_keep].reset_index(
            drop=True
        )  # Reset index

    gdp_pivot_filtered.iloc[:, 1:] = (
        gdp_pivot_filtered.iloc[:, 1:] / 1_000_000_000_000
    )  # Divide all GDP values by 1 trillion

    # Create the GDP chart
    gdp_fig = px.area(
        gdp_pivot_filtered,
        # [gdp_pivot_filtered[region] / 1_000_000_000_000 for region in gdp_pivot_filtered.iloc[-1].sort_values(ascending=False).index],
        x="Year",  # Specify 'Year' as x-axis
        y=[
            region for region in gdp_pivot_filtered.columns if region != "Year"
        ],  # Specify all regions as y-axis
        labels={"value": "GDP (Trillions)", "variable": "Region"},  # Custom labels
        title="GDP Has Grown Exponentially Over Time",
    )
    gdp_fig.update_layout(showlegend=False)

    # Set the chart colours
    for i, region in enumerate(
        gdp_pivot_filtered.columns[1:]
    ):  # Skip the 'Year' column
        gdp_fig.data[i].line.color = stack1_list[
            i % len(stack1_list)
        ]  # Use modulo for safety in case of more regions

    gdp_fig.update_layout(yaxis_title="GDP (Trillions)", xaxis_title="Year", showlegend=False)
    gdp_fig.update_xaxes(showgrid=False)
    gdp_fig.update_yaxes(showgrid=False)
    # Initialize pop_pivot_filtered to the default pop_pivot
    pop_pivot_filtered = pop_pivot.copy()

    # Filter the GDP Pivot table to the selected region
    if region_dropdown_value == "all_values" or region_dropdown_value is None:
        pop_pivot_filtered = pop_pivot.reset_index(drop=True)  # Reset index
    else:
        columns_to_keep = ["Year"]  # Always include the 'Year' column
        for region in region_dropdown_value:
            if region in pop_pivot.columns:
                columns_to_keep.append(region)
        pop_pivot_filtered = pop_pivot[columns_to_keep].reset_index(
            drop=True
        )  # Reset index

    first_year = pop_pivot_filtered["Year"].min()
    last_year = pop_pivot_filtered["Year"].max()

    # Get total population for the first and last years
    total_first_year = (
        pop_pivot_filtered[pop_pivot_filtered["Year"] == first_year]
        .iloc[:, 1:]
        .sum(axis=1)
        .values[0]
        / 1_000
    )
    total_last_year = (
        pop_pivot_filtered[pop_pivot_filtered["Year"] == last_year]
        .iloc[:, 1:]
        .sum(axis=1)
        .values[0]
        / 1_000
    )

    # Set the wording in the title
    if (total_last_year - total_first_year >= 0) and (
        total_last_year - total_first_year < 0.6
    ):
        title_phrase = "Grown"
    elif total_last_year - total_first_year < 0:
        title_phrase = "Reduced"
    else:
        title_phrase = "Surged"

    # Create the Population chart
    pop_fig = px.area(
        pop_pivot_filtered,
        x="Year",  # Specify 'Year' as x-axis
        y=[
            region for region in gdp_pivot_filtered.columns if region != "Year"
        ],  # Specify all regions as y-axis
        labels={
            "value": "Population (Billions)",
            "variable": "Region",
        },  # Custom labels
        title=f"Population has {title_phrase} from {total_first_year:,.1f} Billion to {total_last_year:,.1f} Billion",
    )

    for i, region in enumerate(
        gdp_pivot_filtered.columns[1:]
    ):  # Skip the 'Year' column
        pop_fig.data[i].line.color = stack1_list[
            i % len(stack1_list)
        ]  # Use modulo for safety in case of more regions

    # Set the chart colours
    region_list = dataframe["Region"].unique()
    colour_map = {
        region: bubble_list[i % len(bubble_list)]
        for i, region in enumerate(region_list)
    }

    pop_fig.update_xaxes(showgrid=False)
    pop_fig.update_yaxes(showgrid=False)
    pop_fig.update_layout(yaxis_title="Population (Billions)", xaxis_title="Year",showlegend=False)

    # Create the bubble chart

    minsize = min(data_2014["Population (M)"])
    maxsize = max(data_2014["Population (M)"])
    
    # Drop NaN values from the dataframe before using them in the scatter plot
    dataframe = dataframe.dropna(subset=["Population (M)"])
    dataframe = dataframe.rename(columns={hdi_column: "HDI"})
    dataframe = dataframe.query(f"Year == {year_dropdown_value}")

    life_fig = px.scatter(
        dataframe,
        x="Life expectancy at birth (years)",
        y="GDP per capita (USD)",
        size="Population (M)",
        size_max=2.0 * maxsize / 50,
        color="Region",
        color_discrete_map=colour_map,
    )
    life_fig.update_yaxes(
        title_text="GDP per capita (USD)", tickmode="linear", showgrid=False
    )
    life_fig.update_xaxes(showgrid=False)
    life_fig.update_layout(
        yaxis_type="log",
        showlegend=False        
    )
    return title, gdp_fig, pop_fig, life_fig


if __name__ == "__main__":
    app.run_server(debug=True, mode="inline", port=8657)
