## Generate data for website

This Notebook is used to export data from cleaned DataFrame of Lausanne Marathon 2016, in order to use information directly in the website of the project.

In [None]:
import pandas as pd

PATH_TO_DATA = './Data/Lausanne_Marathon_2016_cleaned.pickle'
lausanne_marathon_2016_cleaned = pd.read_pickle(PATH_TO_DATA)
lausanne_marathon_2016_cleaned.head()

### Manage of age (x axis)

#### Speed (y axis)

In [None]:
df_speed_over_age = lausanne_marathon_2016_cleaned.groupby(['age']).mean()[['Speed (m/s)']]
df_speed_over_age.rename(columns={'Speed (m/s)': 'speed'}, inplace=True)
df_speed_over_age.head()

In [None]:
df_speed_over_age_distance = lausanne_marathon_2016_cleaned.groupby(['distance (km)', 'age']).mean()[['Speed (m/s)']]
df = {}

for distance in df_speed_over_age_distance.index.levels[0]:
    df[distance] = df_speed_over_age_distance.loc[distance].copy()
    df[distance].rename(columns={'Speed (m/s)': str(distance) + 'km speed'}, inplace=True)
    
df_speed_over_age_distance = pd.concat(df.values(), axis=1)
df_speed_over_age_distance = df_speed_over_age_distance.fillna(method='bfill')
df_speed_over_age_distance = df_speed_over_age_distance.fillna(method='pad')

df_speed_over_age_distance.head()

In [None]:
df_speed_over_age_sex = lausanne_marathon_2016_cleaned.groupby(['sex', 'age']).mean()[['Speed (m/s)']]
df = {}

for sex in df_speed_over_age_sex.index.levels[0]:
    df[sex] = df_speed_over_age_sex.loc[sex].copy()
    df[sex].rename(columns={'Speed (m/s)': str(sex) + ' speed'}, inplace=True)
    
df_speed_over_age_sex = pd.concat(df.values(), axis=1)
df_speed_over_age_sex = df_speed_over_age_sex.fillna(method='bfill')
df_speed_over_age_sex = df_speed_over_age_sex.fillna(method='pad')

df_speed_over_age_sex.head()

#### Number of runners (y axis)

In [None]:
df_nb_runners_over_age = lausanne_marathon_2016_cleaned.groupby(['age']).count()[['name']]
df_nb_runners_over_age.rename(columns={'name': 'count'}, inplace=True)
df_nb_runners_over_age.head()

In [None]:
df_nb_runners_over_age_distance = lausanne_marathon_2016_cleaned.groupby(['distance (km)', 'age']).count()[['name']]
df = {}

for distance in df_nb_runners_over_age_distance.index.levels[0]:
    df[distance] = df_nb_runners_over_age_distance.loc[distance].copy()
    df[distance].rename(columns={'name': str(distance) + 'km count'}, inplace=True)
    
df_nb_runners_over_age_distance = pd.concat(df.values(), axis=1)
df_nb_runners_over_age_distance = df_nb_runners_over_age_distance.fillna(0)
df_nb_runners_over_age_distance.head()

In [None]:
df_nb_runners_over_age_sex = lausanne_marathon_2016_cleaned.groupby(['sex', 'age']).count()[['name']]
df = {}

for sex in df_nb_runners_over_age_sex.index.levels[0]:
    df[sex] = df_nb_runners_over_age_sex.loc[sex].copy()
    df[sex].rename(columns={'name': str(sex) + ' count'}, inplace=True)

df_nb_runners_over_age_sex = pd.concat(df.values(), axis=1)
df_nb_runners_over_age_sex = df_nb_runners_over_age_sex.fillna(0)
df_nb_runners_over_age_sex.head()

#### Time (y axis)

In [None]:
df_time_over_age = lausanne_marathon_2016_cleaned.groupby(['age']).mean()[['time']]
df_time_over_age.head()

In [None]:
df_time_over_age_distance = lausanne_marathon_2016_cleaned.groupby(['distance (km)', 'age']).mean()[['time']]
df = {}

for distance in df_time_over_age_distance.index.levels[0]:
    df[distance] = df_time_over_age_distance.loc[distance].copy()
    df[distance].rename(columns={'time': str(distance) + 'km time'}, inplace=True)
    
df_time_over_age_distance = pd.concat(df.values(), axis=1)
df_time_over_age_distance = df_time_over_age_distance.fillna(method='bfill')
df_time_over_age_distance = df_time_over_age_distance.fillna(method='pad')

df_time_over_age_distance.head()

In [None]:
df_time_over_age_sex = lausanne_marathon_2016_cleaned.groupby(['sex', 'age']).mean()[['time']]
df = {}

for sex in df_time_over_age_sex.index.levels[0]:
    df[sex] = df_time_over_age_sex.loc[sex].copy()
    df[sex].rename(columns={'time': str(sex) + ' time'}, inplace=True)
    
df_time_over_age_sex = pd.concat(df.values(), axis=1)
df_time_over_age_sex = df_time_over_age_sex.fillna(method='bfill')
df_time_over_age_sex = df_time_over_age_sex.fillna(method='pad')

df_time_over_age_sex.head()

#### Merge all

In [None]:
df_by_age = pd.concat([
        df_speed_over_age, df_speed_over_age_distance, df_speed_over_age_sex,
        df_nb_runners_over_age, df_nb_runners_over_age_distance, df_nb_runners_over_age_sex,
        df_time_over_age, df_time_over_age_distance, df_time_over_age_sex
    ], axis=1)
df_by_age.to_csv('marathon-lausanne-2016-by-age.csv')
df_by_age.head()

### Speed (x axis)

#### Number of runners (y axis)

In [None]:
lausanne_marathon_2016_cleaned['Speed (m/s) Rounded'] = lausanne_marathon_2016_cleaned['Speed (m/s)'].round(1)

In [None]:
df_nb_runners_over_speed = lausanne_marathon_2016_cleaned.groupby(['Speed (m/s) Rounded']).count()[['name']]
df_nb_runners_over_speed.rename(columns={'name': 'count'}, inplace=True)
df_nb_runners_over_speed.head()

In [None]:
df_nb_runners_over_speed_distance = lausanne_marathon_2016_cleaned.groupby(['distance (km)', 'Speed (m/s) Rounded']).count()[['name']]
df = {}

for distance in df_nb_runners_over_speed_distance.index.levels[0]:
    df[distance] = df_nb_runners_over_speed_distance.loc[distance].copy()
    df[distance].rename(columns={'name': str(distance) + 'km count'}, inplace=True)
    
df_nb_runners_over_speed_distance = pd.concat(df.values(), axis=1)
df_nb_runners_over_speed_distance = df_nb_runners_over_speed_distance.fillna(0)
df_nb_runners_over_speed_distance.head()

In [None]:
df_nb_runners_over_speed_sex = lausanne_marathon_2016_cleaned.groupby(['sex', 'Speed (m/s) Rounded']).count()[['name']]
df = {}

for sex in df_nb_runners_over_speed_sex.index.levels[0]:
    df[sex] = df_nb_runners_over_speed_sex.loc[sex].copy()
    df[sex].rename(columns={'name': str(sex) + ' count'}, inplace=True)
    
df_nb_runners_over_speed_sex = pd.concat(df.values(), axis=1)
df_nb_runners_over_speed_sex = df_nb_runners_over_speed_sex.fillna(0)
df_nb_runners_over_speed_sex.head()

In [None]:
df_time_over_speed = lausanne_marathon_2016_cleaned.groupby(['Speed (m/s) Rounded']).mean()[['time']]
df_time_over_speed.head()

In [None]:
df_time_over_speed_distance = lausanne_marathon_2016_cleaned.groupby(['distance (km)', 'Speed (m/s) Rounded']).mean()[['time']]
df = {}

for distance in df_time_over_speed_distance.index.levels[0]:
    df[distance] = df_time_over_speed_distance.loc[distance].copy()
    df[distance].rename(columns={'time': str(distance) + 'km time'}, inplace=True)
    
df_time_over_speed_distance = pd.concat(df.values(), axis=1)
df_time_over_speed_distance = df_time_over_speed_distance.fillna(method='bfill')
df_time_over_speed_distance = df_time_over_speed_distance.fillna(method='pad')
df_time_over_speed_distance.head()

In [None]:
df_time_over_speed_sex = lausanne_marathon_2016_cleaned.groupby(['sex', 'Speed (m/s) Rounded']).mean()[['time']]
df = {}

for sex in df_time_over_speed_sex.index.levels[0]:
    df[sex] = df_time_over_speed_sex.loc[sex].copy()
    df[sex].rename(columns={'time': str(sex) + ' time'}, inplace=True)
    
df_time_over_speed_sex = pd.concat(df.values(), axis=1)
df_time_over_speed_sex = df_time_over_speed_sex.fillna(method='bfill')
df_time_over_speed_sex = df_time_over_speed_sex.fillna(method='pad')
df_time_over_speed_sex.head()

In [None]:
df_by_speed = pd.concat([
        df_nb_runners_over_speed, df_nb_runners_over_speed_distance, df_nb_runners_over_speed_sex,
        df_time_over_speed, df_time_over_speed_distance, df_time_over_speed_sex
    ], axis=1)
df_by_speed.to_csv('marathon-lausanne-2016-by-speed.csv')
df_by_speed.head()