In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
from function import data_loader
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

In [None]:
dfs = data_loader(path='../data/', city='London')

In [None]:
for key, df_ in dfs.items():
    if 'sport' in key:
        print(key)
        df = df_.set_index('year')
        display(HTML(df.head(5).to_html()))

In [None]:
areas = df['area'].unique()
years = df.index.unique()
df_zero = df[df['sports_participation'] == 'zero']
df_one = df[df['sports_participation'] == 'one+']
df_three = df[df['sports_participation'] == 'three+']

### Plot % of zero activity per borough/region

In [None]:
do_all = False

with sns.axes_style('whitegrid'):
    for area in areas:
        if not do_all and area != 'Southwark':
            continue
        fig, ax = plt.subplots(figsize=(11, 7), nrows=1, ncols=1)
        df_area = df_zero[df_zero['area'] == area]
        if len(df_area) > 0:
            df_area.dropna()['percentage'].plot(ax=ax)
            ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.2%}'.format(y)))
            ax.legend(loc='lower left')
            ax.set_title(area)
            plt.tight_layout()
            plt.show()

### Curvature index

**Goal**: 
to be able to compare boroughs in terms of the impact of the Olympics on the sport participation; ideally, produce a single number to quantify the impact and its durability.

**Methodology**:
For many boroughs, it seems the year 2011-2012 witnessed a decrease in the 'zero' population, i.e a larger fraction of people exercised at least once in the week.
This surge in sport activity seems to have reverted in the years after.

We compute a curvature index:
* ((mean of 'zero' % after - 'zero' % @2011-2012) - ('zero' % @2011-2012 - mean of 'zero' % before)) / 2

In case of transient impact, the curvature at 2011-2012 should be high, indicating a peak in sport participation that quickly reverted.

**Results**:
The year 2012 corresponds indeed to a peak in sport participation, most likely driven by renewed interest in sports at the time of the Olympics and possibly local policies to promote sport and sport infrastructure. 
The curvature index is positive for many boroughs, indicating a transient impact (for instance the pro-sport policies may have only lasted for the summer 2012).

In [None]:
sport_min_year = dict()
# sport_slope = dict()
sport_curvature = dict()

for area in areas:
    df_area = df_zero[df_zero['area'] == area]
    if len(df_area) > 0:
        sport_min_year[area] = df_area['percentage'].idxmin()
#         sport_slope[area] = (df_area.iloc[:6].mean()['percentage'] - df_area.iloc[5:].mean()['percentage'])
        sport_curvature[area] = (
            (df_area.iloc[6:].mean()['percentage'] - df_area.iloc[5]['percentage']) - (df_area.iloc[5]['percentage'] - df_area.iloc[:5].mean()['percentage'])
        ) / 2
sport_min_year = pd.Series(sport_min_year).sort_values()
# sport_slope = pd.Series(sport_slope).sort_values()
sport_curvature = pd.Series(sport_curvature).sort_values()

with sns.axes_style('whitegrid'):
    fig, axes = plt.subplots(figsize=(11, 14), nrows=2, ncols=1)
    
    ax = axes[0]
    sport_min_year.hist(ax=ax, bins=len(years))
    ax.set_title('Sport Best Year')
    
#     ax = axes[1]
#     sport_slope.plot.bar(ax=ax)
#     ax.set_title('Sport Slope Index')

    ax = axes[1]
    sport_curvature.plot.bar(ax=ax)
    ax.set_title('Sport Curvature Index')
    plt.tight_layout()
    plt.show()

### Total plot

In [None]:
with sns.axes_style('whitegrid'):
    fig, ax = plt.subplots(figsize=(11, 7), nrows=1, ncols=1)
#     df_zero[df_zero['area'] == 'Southwark'].dropna().sort_values('year')[['year', 'percentage']].set_index('year').plot(ax=ax)
    df_zero.groupby('year').mean().sort_values('year')['percentage'].plot(ax=ax, color='r', label='zero')
    df_one.groupby('year').mean().sort_values('year')['percentage'].plot(ax=ax, color='b', label='one+')
    df_three.groupby('year').mean().sort_values('year')['percentage'].plot(ax=ax, color='g', label='three')
    ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.2%}'.format(y)))
    ax.legend(loc='lower left')
    plt.tight_layout()
    plt.show()