Visualization babysteps. 
We start with the code of Ch 3 of 'Data Science from scratch' by Joel Grus
But we'll work with real gdp data

In [4]:
# all imports at the top
from matplotlib import pyplot as plt
import ipywidgets as widgets
import pandas as pd
import requests
import zipfile
from io import BytesIO
from pathlib import Path
from IPython.display import clear_output, display


In [5]:
# helper functions
def download_extract_gdp_data(url = 'https://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.PP.CD?downloadformat=csv', destination_file : Path = Path('../data/gdp_ppp.csv'))-> None:
    """
    Download and extract GDP data from the World Bank API.
    
    Parameters:
    url (str): The URL to download the data from.
    destination_file (Path): The path to the destination file where it saves the extracted file. Defaults to '../data/gdp_ppp.csv'.
    
    Returns:
    None
    """
    print(f"Downloading data from the World Bank API {url} to {destination_file} ...")
    # download the csv file from the World Bank API
    response = requests.get(url)
    # unzip the contents, find the csv file that starts with API, and save it to the data folder
    with zipfile.ZipFile(BytesIO(response.content)) as z:
        for filename in z.namelist():
            if filename.startswith('API_') and filename.endswith('.csv'):
                with open(destination_file, 'wb') as f:
                    f.write(z.read(filename))
                # save the csv file to the data folder as gdp_ppp.csv
                print(f"Downloaded {filename} to {destination_file}")
                break
    

In [7]:
# check if data folder has the gdp-ppp csv file
# if not, download it from the World Bank API

data_folder = Path("../data")
data_folder.mkdir(exist_ok=True)
csv_file = data_folder.joinpath("gdp_pcap_ppp.csv")
#TODO: look at the lates data on the World Bank API and compare its metadata with the one in the csv file, 
# if they are different, download the new csv file
if not csv_file.exists():
    download_extract_gdp_data(destination_file=csv_file) 
# read the csv file into a pandas dataframe
df = pd.read_csv(csv_file, skiprows=4)
# ask the user for the country name
# get the list of countries from the dataframe
countries = df['Country Name'].unique()
# sort the countries alphabetically
countries = sorted(countries)
# create a dropdown widget for the country name
country_dropdown = widgets.Dropdown(
    options=countries,
    description='Country:',
    disabled=False,
)

output = widgets.Output()

#create a 1-line table all available gdp data for the selected country
def display_country_data(country_name):
    """
    Display the GDP data for the selected country.
    
    Parameters:
    country_name (str): The name of the country to display data for.
    
    Returns:
    None
    """
    # filter the dataframe for the first row + the row of the  selected country
    country_data = df[df['Country Name'] == country_name]
    country_data = country_data.iloc[0]
    years = df.columns[4:]
    # filter the dataframe for the years columns
    country_data = country_data[years]
    # rename the columns to be the years
    country_data.columns = years
    #drop the missing values
    country_data = country_data.dropna()
    # plot the data as a line chart
    plt.figure(figsize=(12, 8))
    plt.plot(country_data.index, country_data.values, marker='o', linestyle='-', color='violet')
    plt.title(f"GDP per capita (PPP) in {country_name}")
    plt.xlabel('Year')
    plt.ylabel('GDP per capita (current international $)')
    plt.xticks(rotation=45)
    plt.grid()
    plt.show()
    
    ##create a bar chart of the GDP data
    #plt.figure(figsize=(12, 8))
    #plt.bar(country_data.index, country_data.values, color='violet')
    #plt.title(f"GDP per capita in {country_name}")
    #plt.xlabel('Year')
    #plt.ylabel('GDP per capita (PPP), (current international $)')
    #plt.xticks(rotation=45)
    #plt.grid()
    #plt.show()
    
    
    
# a function to update the table when the country is changed
def update_country_data(change):
    """
    Update the GDP data for the selected country.
    
    Parameters:
    change (dict): The change event from the dropdown widget.
    
    Returns:
    None
    """
    with output:
        clear_output(wait=True)
        display_country_data(change['new'])
# bind the update function to the dropdown widget
country_dropdown.observe(update_country_data, names='value')
# display the dropdown widget
display(country_dropdown, output)

Dropdown(description='Country:', options=('Afghanistan', 'Africa Eastern and Southern', 'Africa Western and Ce…

Output()

In [8]:
#add a widget to select two years and display a bar diagram for countiries' gdp change (%) between the selected years
#TODO: clean the county lines, make the plot readable
def display_gdp_change(year1:int, year2:int)->None:   
    """
    Display the GDP change between two selected years for all the countires.
    This function filters the dataframe for the two selected years and calculates the percentage change in GDP.
    It then plots the data as a bar chart.
    The function takes two parameters: year1 and year2, which are the years to compare.
    
    Parameters:
    year1 (int): The first year to compare.
    year2 (int): The second year to compare.
    
    Returns:
    None
    """
    # filter the data frame for the two selected years  
    # get the list of countries from the dataframe
    countries = df['Country Name'].unique()   
    # sort the countries alphabetically
    countries = sorted(countries)
    # filter the dataframe for the two selected years
    gdp_change = df[['Country Name', str(year1), str(year2)]]
    gdp_change = gdp_change.rename(columns={year1: 'Year1', year2: 'Year2'})
    gdp_change = gdp_change.dropna()
    # calculate the percentage change between the two years
    gdp_change['Change (%)'] = ((gdp_change['Year2'] - gdp_change['Year1']) / gdp_change['Year1']) * 100
    # sort the data frame by the percentage change
    gdp_change = gdp_change.sort_values(by='Change (%)', ascending=False)
    # plot the data as a bar chart
    plt.figure(figsize=(20, 8))
    plt.bar(gdp_change['Country Name'], gdp_change['Change (%)'], color='violet')
    plt.title(f"GDP Change from {year1} to {year2}")
    plt.xlabel('Country')
    plt.ylabel('GDP Change (%)')
    plt.xticks(rotation=45)
    plt.grid()
    plt.show()
# create a dropdown widget for the years
# get the list of years from the dataframe that have data
years = [y for y in df.columns[4:].unique() if df[y].notna().any()]
# sort the years
years = sorted(years)
year1_dropdown = widgets.Dropdown(
    options=years,
    description='Year 1:',
    disabled=False,
)
year2_dropdown = widgets.Dropdown(
    options=years,
    description='Year 2:',
    disabled=False,
)
# create a button to display the GDP change
gdp_change_button = widgets.Button(
    description='Display GDP Change',
    button_style='success',
    tooltip='Click to display GDP change',
    icon='check'
)
# bind the button to the display_gdp_change function
def on_button_click(b):
    """
    Handle the button click event to display GDP change.
    
    Parameters:
    b (Button): The button that was clicked.
    
    Returns:
    None
    """
    with output:
        clear_output(wait=True)
        display_gdp_change(str(year1_dropdown.value), str(year2_dropdown.value))
gdp_change_button.on_click(on_button_click)
# display the dropdown widgets and the button
display(year1_dropdown, year2_dropdown, gdp_change_button, output)

Dropdown(description='Year 1:', options=('1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998…

Dropdown(description='Year 2:', options=('1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998…

Button(button_style='success', description='Display GDP Change', icon='check', style=ButtonStyle(), tooltip='C…

Output()