In [1]:
# PANDAS IS FOR DATA WRANGLING
import pandas as pd
import numpy as np

# SEABORN IS A PLOTTING LIBRARY
import seaborn as sns

# MATPLOT LIB IS ALSO A PLOTTING LIBRARY
import matplotlib.pyplot as plt

# SKLEARN IS OUR MACHINE LEARNING PACKAGE
from sklearn.linear_model import LinearRegression

# IMPORT OUR RANDOM FOREST REGERSSOR
from sklearn.ensemble import RandomForestRegressor

# METRICS HELP US SCORE OUR MODEL
from sklearn import metrics

# HELP US SPLIT OUR DATA INTO TESTING A TRAINING
from sklearn.model_selection import train_test_split

# Good ol statsmodels
import statsmodels.api as sm

# Specific root mean squared error for stats models
from statsmodels.tools.eval_measures import rmse


from statsmodels.stats.outliers_influence import variance_inflation_factor


from statsmodels.api import qqplot

import CTPLIB as ctp


In [2]:
df_countries = pd.read_csv('data/Countries.csv')

In [3]:
df_countries.head()

Unnamed: 0,Country Name,Country Code,Year,Agriculture (% GDP),Ease of Doing Business,Education Expenditure (% GDP),Export (% GDP),GDP,Health Expenditure (% GDP),Import (% GDP),...,Population,Land,Continent Name,Export,Import,Education Expenditure,Health Expenditure,Net Trade,GDP Per Capita,Population Density
0,Afghanistan,AFG,2000,27.501127,40.717968,13.670101,,14151970000.0,10.90258,,...,19542982.0,652860.0,Asia,,,1934589000.0,1542930000.0,,724.14591,29.934415
1,Afghanistan,AFG,2001,27.501127,40.717968,13.670101,,14151970000.0,10.90258,,...,19688632.0,652860.0,Asia,,,1934589000.0,1542930000.0,,718.788917,30.15751
2,Afghanistan,AFG,2002,38.627892,40.717968,13.670101,,3854235000.0,9.443391,,...,21000256.0,652860.0,Asia,,,526877900.0,363970500.0,,183.532775,32.166553
3,Afghanistan,AFG,2003,37.418855,40.717968,13.670101,,4539497000.0,8.941258,,...,22645130.0,652860.0,Asia,,,620553800.0,405888100.0,,200.462376,34.686043
4,Afghanistan,AFG,2004,29.721067,40.717968,13.670101,,5220825000.0,9.808474,,...,23553551.0,652860.0,Asia,,,713692100.0,512083200.0,,221.657662,36.077491


In [4]:
df_eu = pd.read_csv('data/Countries-Europe.csv')

In [5]:
df_eu.head()

Unnamed: 0,zoom,name,abbreviation,ISO alpha 2,ISO alpha 3,ISO numeric,land area km,population,latitude,longitude,continent
0,3,Ukraine,Ukr.,UA,UKR,804,603700.0,45415596,49.0,32.0,eu
1,3,France,Fr.,FR,FRA,250,547030.0,64768389,46.0,2.0,eu
2,3,Spain,Spain,ES,ESP,724,504782.0,46505963,40.0,-4.0,eu
3,3,Sweden,Swe.,SE,SWE,752,449964.0,9045000,62.0,15.0,eu
4,3,Germany,Ger.,DE,DEU,276,357021.0,82369000,51.5,10.5,eu


In [6]:
list(df_eu.columns)

['zoom',
 'name',
 'abbreviation',
 'ISO alpha 2',
 'ISO alpha 3',
 'ISO numeric',
 'land area km',
 'population',
 'latitude',
 'longitude',
 'continent']

In [7]:
df_list_of_eu = df_eu['name']
list(df_list_of_eu)

['Ukraine',
 'France',
 'Spain',
 'Sweden',
 'Germany',
 'Finland',
 'Norway',
 'Poland',
 'Italy',
 'United Kingdom',
 'Romania',
 'Belarus',
 'Greece',
 'Bulgaria',
 'Iceland',
 'Portugal',
 'Czech Republic',
 'Denmark',
 'Hungary',
 'Serbia',
 'Austria',
 'Ireland',
 'Lithuania',
 'Latvia',
 'Croatia',
 'Bosnia and Herzegovina',
 'Slovakia',
 'Estonia',
 'Netherlands',
 'Switzerland',
 'Moldova',
 'Belgium',
 'Albania',
 'Macedonia',
 'Slovenia',
 'Montenegro',
 'Cyprus',
 'Luxembourg',
 'Faroe Is.',
 'Andorra',
 'Malta',
 'Liechtenstein',
 'Guernsey',
 'San Marino',
 'Gibraltar',
 'Monaco',
 'Vatican City']

In [34]:
from dash import Dash, dash_table, html, dcc, Input, Output, State
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px # Used to make choropleth map

gdp_df=pd.read_csv('data/Countries.csv')

df_eu = pd.read_csv('data/Countries-Europe.csv')
list(df_eu.columns)
df_list_of_eu = df_eu['name']
df_countries = gdp_df.rename(columns={
    "Country Name": "country_name", 
    "Country Code": "country_code",
    "Year": "year",
    "Agriculture (% GDP)": "agriculture_percentage_gdp",
    "Ease of Doing Business": "ease_of_doing_business",
    "Education Expenditure (% GDP)": "education_expenditure_percentage_gdp",
    "Export (% GDP)": "export_percentage_gdp",
    "GDP": "gdp",
    "Health Expenditure (% GDP)": "health_expenditure_percentage_gdp",
    "Import (% GDP)": "import_percentage_gdp",
    "Industry (% GDP)": "industry_percentage_gdp",
    "Inflation Rate": "inflation_rate",
    "R&D": "research_and_development",
    "Service (% GDP)": "service_percentage_gdp",
    "Unemployment": "unemployment",
    "Population": "population",
    "Land": "land",
    "Continent Name": "continent_land",
    "Export": "export",
    "Import": "import",
    "Education Expenditure": "education_expenditure",
    "Health Expenditure": "health_expenditure",
    "Net Trade": "net_trade",
    "GDP Per Capita": "gdp_per_capita",
    "Population Density": "population_density"
    
})
df_eu_countries = df_countries[df_countries['country_name'].isin(df_list_of_eu)]

app = Dash(external_stylesheets=[dbc.themes.SOLAR])

min_year = df_eu_countries['year'].min()
max_year = df_eu_countries['year'].max()



##########################################################################################

app.layout = html.Div([
    html.H1('EU GDP Dashboard'),
    dcc.RangeSlider(id='year-slider',
                    min=df_eu_countries['year'].min(),
                    max=df_eu_countries['year'].max(),
                    value=[min_year, max_year],
                    marks={i:str(i) for i in range (min_year, max_year+1)}
    ),
    dcc.Checklist(id='country-stats',
                options = df_eu_countries.columns.unique(),
                          value = 'gdp',
                inline=True
    ),
    dcc.Graph(id='map-graph'),
    dcc.Graph(id='line-graph'),
    dash_table.DataTable(id='gdp-info')
])

##########################################################################################

@app.callback(
    Output(component_id='line-graph', component_property='figure'),
    Input(component_id='country-stats', component_property='value')
)
def update_linegraph(checked_stats):
    dff = df_eu_countries.columns.isin(checked_stats)
    dff.drop(['year'])
    print('dff: ' + dff)
    line_fig = px.line(
        dff,
        x='year',
        y=dff.columns,
        title='Country Stats',
        color='country_name'
    )
    return line_fig
    
##########################################################################################


@app.callback(
    Output(component_id='map-graph', component_property='figure'),
    Input(component_id='year-slider', component_property='value')
)
def update_map_graph(selected_years):
    filtered_gdp = df_eu_countries[
        (df_eu_countries['year']>=selected_years[0]) &
        (df_eu_countries['year']<=selected_years[1])
    ]
    average_gdp_df = df_eu_countries.groupby('country_name')['gdp'].mean().reset_index()
    map_fig = px.choropleth(
        average_gdp_df,
        locations='country_name',
        locationmode='country names',
        color = 'gdp',
        scope = 'europe',
        height=900,
        width=2100
    )
    return map_fig


##########################################################################################


# When the user hasn't clicked a country on the map yet, the data table's property will be None. When they do click a country, we update the data table to be specific to the selected country.
@app.callback(
    Output('gdp-info', 'data'),
    Input('map-graph', 'clickData'),
    Input('year-slider', 'value')
)
def update_datatable(clicked_data, selected_years):
    if clicked_data is None:
        return []
    eu_country = clicked_data['points'][0]['location']
    filtered_eu_countries = df_eu_countries[
        (df_eu_countries['year']>=selected_years[0]) &
        (df_eu_countries['year']<=selected_years[1]) &
        (df_eu_countries['country_name']==eu_country)
        ]
    return filtered_eu_countries.to_dict('records')

##########################################################################################

lines = px.line(df_eu_countries,
                x = 'year',
                y = ['gdp', 'ease_of_doing_business'],
                color = 'country_name'
)

lines.show()




In [18]:
app.run(jupyter_mode="external")

Dash app running on http://127.0.0.1:8050/


In [33]:
df_eu_countries.isnull

<bound method DataFrame.isnull of         country_name country_code  year  agriculture_percentage_gdp  \
23           Albania          ALB  2000                   24.515412   
24           Albania          ALB  2001                   22.716164   
25           Albania          ALB  2002                   22.025114   
26           Albania          ALB  2003                   21.978257   
27           Albania          ALB  2004                   20.537486   
...              ...          ...   ...                         ...   
4848  United Kingdom          GBR  2018                    0.566930   
4849  United Kingdom          GBR  2019                    0.604508   
4850  United Kingdom          GBR  2020                    0.643773   
4851  United Kingdom          GBR  2021                    0.674773   
4852  United Kingdom          GBR  2022                    0.709487   

      ease_of_doing_business  education_expenditure_percentage_gdp  \
23                 64.763660               