# Plotting drastically different time series

Plotting multiple time series on a single graph is always a challenge. The challenge becomes especially difficult when the time series are very different one from another.
In this exercise, we will plot the per-capita Gross Domestic Product (GDPPC) of OECD countries. OECD (Organisation for Economic Co-operation and Development) is an intergovernmental economic organization with 35 member countries with very different economy types and sizes.

In [None]:
import pandas as pd
import os
from matplotlib import pylab as plt
import seaborn as sns
import numpy as np
import ipywidgets
import matplotlib as mpl
from statsmodels.nonparametric.kde import KDEUnivariate
%matplotlib inline

In [None]:
dir_data = '../data/'

In [None]:
df_gdppp = pd.read_excel('../data/GDP_per_capita.xlsx', sheet_name='Data')
df_gdppp.head()

In [None]:
df_groups = pd.read_csv('../data/country_groups.csv')
oecd_countries = df_groups.loc[df_groups.GroupCode == 'OED'].CountryCode.values

In [None]:
df_oecd_wide = df_gdppp.loc[df_gdppp['Country Code'].isin(oecd_countries)]
df_oecd_wide.head()

In [None]:
df_oecd = df_oecd_wide.melt(id_vars=['Country Name', 'Country Code'], var_name='year', value_name='GDPPC')
df_oecd.year = df_oecd.year.astype(int)
df_oecd = df_oecd.loc[df_oecd.year >= 1990]
df_oecd.sample(10)

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))
for (name, code), df in df_oecd.groupby(['Country Name', 'Country Code']):
    ax.plot(df.year, df.GDPPC, '-', label=name)
ax.legend()
ax.set_ylabel('GDP per capita')
ax.set_title('Not really!')

In [None]:
def plot_gdppc():
    fig, ax = plt.subplots(figsize=(10, 4))
    for (name, code), df in df_oecd.groupby(['Country Name', 'Country Code']):
        ax.plot(df.year, df.GDPPC, '-', color='gray', label=name)
    ax.set_ylabel('GDP per capita')
    return ax
plot_gdppc()

In [None]:
#! Look at what's important
def plot_gdppc(country_name):
    fig, ax = plt.subplots(figsize=(10, 4))
    for (name, code), df in df_oecd.groupby(['Country Name', 'Country Code']):
        if country_name == name:
            clr = 'C1'
            lw = 2.5
            zorder = 9 # show this line above the others
            lbl = name
        else:
            clr = 'gray'
            lw = 0.75
            zorder = 0
            lbl = ''
        ax.plot(df.year, df.GDPPC, '-', color=clr, lw=lw, label=lbl)
    ax.set_ylabel('GDP per capita')
    ax.legend()
    return ax
plot_gdppc('Israel')

In [None]:
# Normalize the values
def normalize_to_2000(df):
    ref = df.loc[df.year == 2000].iloc[0]['GDPPC']
    df.GDPPC /= ref  
    df.GDPPC = (df.GDPPC * 100) - 100
    return df
df_oecd_normalized = df_oecd.groupby('Country Name').apply(normalize_to_2000)
df_oecd_normalized = df_oecd_normalized.loc[df_oecd_normalized.year >= 2000]

In [None]:
df_oecd_normalized.loc[df_oecd_normalized['Country Code'] == 'ISR']

In [None]:
#! Plot the normalized data to reduce the clutter
def plot_normalized(country_name):
    fig, ax = plt.subplots(figsize=(10, 4))
    for (name, code), df in df_oecd_normalized.groupby(['Country Name', 'Country Code']):
        if country_name == name:
            clr = 'C1'
            lw = 2.5
            zorder = 9 # show this line above the others
            lbl = name
        else:
            clr = 'gray'
            lw = 0.5
            zorder = 0
            lbl = ''
        ax.plot(df.year, df.GDPPC, '-', color=clr, lw=lw, label=lbl)
    ax.set_ylabel('GDP per capita\nrelative to 2000')
    ax.legend()
    return ax
plot_normalized('Israel')

In [None]:
last_GDPPC = df_oecd_normalized.groupby('Country Name').apply(
    lambda df: df.loc[df.year == df.year.max()]['GDPPC'].iloc[0]
).sort_values()
last_GDPPC.head()

In [None]:
#! Let's make this prettier (*)
def plot_normalized(country_name):
    fig, ax = plt.subplots(figsize=(10, 4))
    for (name, code), df in df_oecd_normalized.groupby(['Country Name', 'Country Code']):
        if country_name == name:
            clr = 'C1'
            lw = 2.5
            zorder = 9 # show this line above the others
            lbl = name
        else:
            clr = 'gray'
            lw = 0.5
            zorder = 0
            lbl = ''
        ax.plot(df.year, df.GDPPC, '-', color=clr, lw=lw, label=lbl)
        place_legend = False
        if name == country_name:
            last_value = df.GDPPC.iloc[-1]
            fontsize='x-large'
            place_legend = True
        elif name in (last_GDPPC.index[0], last_GDPPC.index[-1]):
            last_value = df.GDPPC.iloc[-1]
            fontsize='medium'
            place_legend = True
        if place_legend:
            txt_x = df.year.max()
            ax.text(
                txt_x, last_value, f'{name}: {last_value:+.1f}%',
                fontsize=fontsize, color=clr, va='center'
            )    
    ax.set_ylabel('GDP per capita\n% difference, relative to 2000')
    return ax
plot_normalized('Israel')

In [None]:
#! Let's make this EVEN MORE pretty (*)
def plot_normalized(country_name):

    fig, ax = plt.subplots(figsize=(10, 10))
    for (name, code), df in df_oecd_normalized.groupby(['Country Name', 'Country Code']):
        if country_name == name:
            clr = 'C1'
            lw = 3.5
            zorder = 9 # show this line above the others
            lbl = name
        else:
            clr = 'black'
            lw = 0.5
            zorder = 0
            lbl = ''
        ax.plot(df.year, df.GDPPC, '-', color=clr, zorder=zorder, lw=lw, label=lbl)
        place_legend = False
        if name == country_name:
            last_value = df.GDPPC.iloc[-1]
            fontsize='x-large'
            place_legend = True
        elif name in (last_GDPPC.index[0], last_GDPPC.index[-1]):
            last_value = df.GDPPC.iloc[-1]
            fontsize='medium'
            place_legend = True
        if place_legend:
            txt_x = df.year.max() + 0.5
            ax.text(
                txt_x, last_value, f'{name}: {last_value:+.1f}%',
                fontsize=fontsize, color=clr, va='center'
            )    
    ax.set_ylabel('GDP per capita', 
                  rotation=0, y=1.05, 
                  va='top', ha='right', ma='left', color='gray')
    ax.set_xticks(
        np.linspace(df_oecd_normalized.year.min(), df_oecd_normalized.year.max(), 5).astype(int)
    )

    tks = [-20, 0, 100, 200, 300]
    ax.set_yticks(tks)
    ax.set_yticklabels([f'{t:+.0f}%' for t in tks])

    ax.spines['bottom'].set_position('zero')
    for which in {'bottom', 'left'}:
        ax.spines[which].set_color('gray')
    ax.tick_params(colors='gray')
    sns.despine(ax=ax)
    return ax
plot_normalized('Mexico')

In [None]:
#! Add some interactivity
ipywidgets.interactive(
    plot_normalized,
    country_name=df_oecd_normalized['Country Name'].unique()
)

## Another approach

When we think of a time series plot, we usually think of an "evolution" line -- a line that plots the time on the X-axis, and the value on the Y-axis. This is not always the best approach. We can use stacked Kernel Density Estimation (KDE), to provide an information-rich and readable graphs. 

Sometimes, these graphs are called "[joyplots](https://seaborn.pydata.org/examples/kde_joyplot.html)"

In [None]:
def plot_filled_kde(data, clr='C0', baseline=0, x_from=None, x_to=None, log_transform=False, label=None, country=None, *args, **kwargs):
    data = data.dropna()
    ax = kwargs.pop('ax', None)
    if ax is None:
        ax = plt.gca()
    if x_from is None:
        x_from = data.min()
    if x_to is None:
        x_to = data.max()
    if log_transform:
        data = np.log10(data)
        x_from = np.log10(x_from)
        x_to = np.log10(x_to)
    kde = KDEUnivariate(
        data
    )
    kde.fit(bw=0.05)
    x = np.linspace(x_from, x_to, 100) # from 0 to 4. hours
    y = kde.evaluate(x)
    y += baseline
    ax.plot(x, y, '-', lw=0.8, color=clr, *args, **kwargs)
    ax.fill_between(x, y1=baseline, y2=y, color=clr, alpha=0.1)
    if label is not None:
        lbl_x = x[-1]
        lbl_y = y[-1]
        ax.text(x[-1], y[-1], label, color=clr, va='center')
    if country is not None:
        country_x = [data[country]]
        country_y = kde.evaluate(country_x) + baseline
        ax.plot(country_x, country_y, 'o', color=clr)
    return ax

In [None]:
#! 
fig, ax = plt.subplots(figsize=(10, 5))
years = np.arange(1995, 2016, 4)
(x_from, x_to) = (df_oecd.GDPPC.min(), df_oecd.GDPPC.max())
for i, y in enumerate(years):
    df = df_oecd.loc[df_oecd.year == y].set_index('Country Name')
    plot_filled_kde(df.GDPPC, baseline=i, x_from=x_from, x_to=x_to, log_transform=True, label=y)
sns.despine(ax=ax, left=True)
ax.set_yticks([])
xticks = (np.linspace(np.log10(x_from), np.log10(x_to), 3)).astype(int)
ax.set_xticks(xticks); ax.set_xticklabels([f'${10**t:,d}' for t in xticks])
ax.set_xlabel('GDP per capita (USD)')

In [None]:
#!
def follow_a_country(country):
    fig, ax = plt.subplots(figsize=(10, 5))
    years = np.arange(1995, 2016, 4)
    (x_from, x_to) = (df_oecd.GDPPC.min(), df_oecd.GDPPC.max())
    for i, y in enumerate(years):
        df = df_oecd.loc[df_oecd.year == y].set_index('Country Name')
        plot_filled_kde(df.GDPPC, baseline=i, x_from=x_from, x_to=x_to, log_transform=True, label=y, 
                        country=country, ax=ax)
    sns.despine(ax=ax, left=True)
    ax.set_yticks([])
    xticks = (np.linspace(np.log10(x_from), np.log10(x_to), 3)).astype(int)
    ax.set_xticks(xticks); ax.set_xticklabels([f'${10**t:,d}' for t in xticks])
    ax.set_xlabel('GDP per capita (USD)')
    ax.set_title(
        f"{country}'s GDP per  capita, compared to OECD",
        ma='left', x=0
                )
follow_a_country('Estonia')

In [None]:
ipywidgets.interact(follow_a_country, country=df_oecd['Country Name'].unique())