In [None]:
import pandas as pd
import numpy as np
from copy import deepcopy
import random

import seaborn as sns
import plotly # https://plotly.com/python/
import plotly.graph_objects as go
import plotly.express as px
from webcolors import name_to_rgb

# https://community.plotly.com/t/plotly-colours-list/11730/3
# https://community.plotly.com/t/different-colors-for-bars-in-barchart-by-their-value/6527/7
# https://plotly.com/python/continuous-error-bars/
# https://community.plotly.com/t/plotly-colours-list/11730/5

## Colors

In [None]:
plotly_colors = '''
        aliceblue, antiquewhite, aqua, aquamarine, azure,
        beige, bisque, black, blanchedalmond, blue,
        blueviolet, brown, burlywood, cadetblue,
        chartreuse, chocolate, coral, cornflowerblue,
        cornsilk, crimson, cyan, darkblue, darkcyan,
        darkgoldenrod, darkgray, darkgrey, darkgreen,
        darkkhaki, darkmagenta, darkolivegreen, darkorange,
        darkorchid, darkred, darksalmon, darkseagreen,
        darkslateblue, darkslategray, darkslategrey,
        darkturquoise, darkviolet, deeppink, deepskyblue,
        dimgray, dimgrey, dodgerblue, firebrick,
        floralwhite, forestgreen, fuchsia, gainsboro,
        ghostwhite, gold, goldenrod, gray, grey, green,
        greenyellow, honeydew, hotpink, indianred, indigo,
        ivory, khaki, lavender, lavenderblush, lawngreen,
        lemonchiffon, lightblue, lightcoral, lightcyan,
        lightgoldenrodyellow, lightgray, lightgrey,
        lightgreen, lightpink, lightsalmon, lightseagreen,
        lightskyblue, lightslategray, lightslategrey,
        lightsteelblue, lightyellow, lime, limegreen,
        linen, magenta, maroon, mediumaquamarine,
        mediumblue, mediumorchid, mediumpurple,
        mediumseagreen, mediumslateblue, mediumspringgreen,
        mediumturquoise, mediumvioletred, midnightblue,
        mintcream, mistyrose, moccasin, navajowhite, navy,
        oldlace, olive, olivedrab, orange, orangered,
        orchid, palegoldenrod, palegreen, paleturquoise,
        palevioletred, papayawhip, peachpuff, peru, pink,
        plum, powderblue, purple, red, rosybrown,
        royalblue, saddlebrown, salmon, sandybrown,
        seagreen, seashell, sienna, silver, skyblue,
        slateblue, slategray, slategrey, snow, springgreen,
        steelblue, tan, teal, thistle, tomato, turquoise,
        violet, wheat, white, whitesmoke, yellow,
        yellowgreen
        '''

In [None]:
color_2_position = {1: "royalblue", 
                    2: "cornflowerblue",
                    3: "aqua",
                    4: "chartreuse",
                    5: "yellowgreen",
                    6: "green",
                    7: "teal",
                    8: "purple",
                    9: "mediumturquoise",
                    10: "plum",
                    11: "khaki",
                    12: "goldenrod",
                    13: "yellow",
                    14: "chocolate",
                    15: "lightpink",
                    16: "hotpink",
                    17: "lightsalmon",
                    18: "orange",
                    19: "orangered",
                    20: "red"
                   }

In [None]:
def color_name_to_rgba(name, fill=0.3):
    """name: CSS name"""
    assert name in plotly_colors
    
    values = ", ".join([str(c) for c in name_to_rgb(name)]+[str(fill)])
    
    return f'rgba({values})'
    

## Data Preparation

In [None]:
# functions to retrieve data
def rolling_mean_n_performance(df, window=5, performance_col='goals_scored'):
    dg = df.sort_values(by=['leg'])[['season', 'team', performance_col]].groupby(
        by=['season', 'team'])[performance_col].rolling(window=window, min_periods=1).mean().reset_index()
    
    new_col_name = f'rolling_{window}_games_avg_{performance_col}'

    df[new_col_name] = dg.set_index('level_2')[performance_col]
    return df

def get_past_feature(df, feat_col, team=True):
    merge_col = 'team' if team else 'opponent'    
    tmp_df = deepcopy(df[['season', 'leg', merge_col, feat_col]])
    tmp_df.loc[:,'next_leg'] = tmp_df['leg'] +1

    tmp_df.rename(columns={'leg': 'previous_leg', 
                           'next_leg':'leg', 
                           feat_col:f'previous_{merge_col}_{feat_col}'},
                  inplace=True)

    df = df.merge(tmp_df, how='left', on=['leg', 'season', merge_col])
    df.drop(columns=['previous_leg'], inplace=True)
    # print(f"length df : {len(df)}")
    return df

def prepare_data(csv_path, rolling=5):
    df = pd.read_csv(csv_path).drop(columns='Unnamed: 0')
    df['goal_diff'] = df['goals_scored'] - df['goals_conceded']
    # cumulative
    df['cum_pts'] = df[['season', 'team', 'nb_points']].groupby(
    by=['season', 'team']).cumsum()
    
    df['cum_goal_diff'] = df[['season', 'team', 'goal_diff']].groupby(
    by=['season', 'team']).cumsum()
    
    df['cum_goals_scored'] = df[['season', 'team', 'goals_scored']].groupby(
    by=['season', 'team']).cumsum()
    
    df['cum_goals_conceded'] = df['cum_goals_scored']-df['cum_goal_diff']
    df['rank'] = df[['season', 'leg', 'cum_pts', 'cum_goal_diff', 'cum_goals_scored']].sort_values(
        by=['cum_pts', 'cum_goal_diff', 'cum_goals_scored'], ascending=False).groupby(
        by=['season', 'leg']).cumcount() + 1
    
    df['avg_goals_scored_since_season_start'] = df['cum_goals_scored'].div(df['leg'])
    df['avg_goals_conceded_since_season_start'] = df['cum_goals_conceded'].div(df['leg'])
    df['avg_cum_pts_since_season_start'] = df['cum_pts'].div(df['leg'])
    
    # removed unwanted useless seasons
    data = deepcopy(df[df.season > '2003-2004'])
    data.reset_index(drop=True, inplace=True)
    
    leg_max = data.leg.max()
    
    end_season = data[data.leg==leg_max].rename(columns={'rank':'final_rank', 'cum_pts': 'final_cum_pts'})
    data = data.merge(end_season[['season', 'team', 'final_rank', 'final_cum_pts']], on=['season', 'team'])
    
    # rolling mean
    cols = ['goals_conceded', 'goals_scored', 'nb_points']
    for c in cols:
        data = rolling_mean_n_performance(df=data, window=rolling, performance_col=c)
    # past features
    past_features = {'rank': [True, False], 
                     'rolling_5_games_avg_goals_scored' : [True],
                     'rolling_5_games_avg_goals_conceded': [False],
                     'avg_goals_scored_since_season_start': [True],
                     'avg_goals_conceded_since_season_start': [False],
                     'goals_scored': [True],
                     'goals_conceded': [False],
                     'rolling_5_games_avg_nb_points': [True, False],
                     'nb_points': [True, False]
                    }
    # print(f'length {len(data)}')
    for col, is_team_ll in past_features.items():
        for is_team in is_team_ll:
            # print(f"is_team ={is_team}, col = {col}")
            data = get_past_feature(df=data, feat_col=col, team=is_team)
    
    return data


In [None]:
championship_csv = {'ligue-1': 'ligue-1_data_2002_2019',
                   'ligue-2': 'ligue-2_data_2002_2019',
                   'serie-A': 'serie-a_data_2004_2019',
                   'bundesliga': 'bundesliga_data_2004_2019',
                   'premier-league': 'premier-league_data_2004_2019',
                   'liga':'liga_data_2004_2019'}

## Plot functions

In [None]:
# plot_plotly_kpi
def plot_kpi_evolution(df, kpi='cum_pts', show_standard_deviation=False):
    admissible_kpis = {'rank', 
                       'cum_pts', 
                       'cum_goal_diff', 
                       'cum_goals_scored',
                       'goals_conceded', 
                       'goals_scored'
                      }
    yaxis_title_dict = {'cum_pts': 'Number of points',
                        'rank': 'Rank',
                        'cum_goal_diff': 'Goal difference',
                        'cum_goals_scored': 'Goal scored',
                        'goals_conceded': 'Goal conceded',
                        'goals_scored': 'Goal scored'
                       }
    
    avg_col = f'avg_{kpi}'
    std_col = f'std_{kpi}'
    
    if kpi not in admissible_kpis:
        raise Exception(f"""
        kpi {kpi} is not admissible. It must be part of the following set : {admissible_kpis}
        """)
    
    if not show_standard_deviation:
        fig = px.line(data_frame=df, x="leg", y=avg_col, color="final_rank",
              title=f"Average {kpi} Evolution based on final ranking",
             )

        fig.update_layout(
            autosize=False,
            width=800,
            height=800)

        fig.show()
    else:
        go_layers = []
        for ranking in df.final_rank.unique()[::-1]:
            dg = df[df.final_rank == ranking]
            sublayer = [
            go.Scatter(
                name=str(ranking),
                x=dg['leg'],
                y=dg[avg_col],
                mode='lines',
                line=dict(color=color_2_position[ranking],
                         width=2 if ranking not in [3, 18] else 5)
                    ),

            go.Scatter(
                name=f'Upper Bound {ranking}',
                x=dg['leg'],
                y=dg[avg_col]+dg[std_col],
                mode='lines',
                marker=dict(color="#444"),
                line=dict(width=0),
                showlegend=False
                    ),

            go.Scatter(
                name=f'Lower Bound {ranking}',
                x=dg['leg'],
                y=dg[avg_col]-dg[std_col],
                marker=dict(color="#444"),
                line=dict(width=0),
                mode='lines',
                fillcolor=color_name_to_rgba(name=color_2_position[ranking], fill=0.1),
                fill='tonexty',
                showlegend=False,
            )
            ]
            go_layers+=sublayer



        layout = go.Layout(
            autosize=True, #False,
            width=800,
            height=800,

            xaxis= go.layout.XAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            yaxis= go.layout.YAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            margin=go.layout.Margin(
                l=50,
                r=50,
                b=100,
                t=100,
                pad = 4
            )
        )

        fig = go.Figure(data=go_layers, layout=layout)

        fig.update_layout(
            yaxis_title=yaxis_title_dict[kpi],
            xaxis_title='leg',
            title=f"{kpi} evolution according to final ranking",
            hovermode="x"
        )

        #fig.update_layout(
        #    autosize=False,
        #    width=800,
        #    height=800)

        fig.show()

In [None]:
def compare_pts_evol_with_avg_evolution(data, team, season=None, until_leg=38, compare_with=None):
    """
    :param data: pd.DataFrame: data containing the league performance
    :param team: str: name of the team we want to analyze
    :param season: str: season we're interested in
    :param until_leg: int: plot team's pts evolution from legs 1 to until leg included
    :param compare_with: str: name of the team whose average pts evolution is computed and which is used for 
    comparison. That Team MUST have played at least 5 seasons
    """
    if season is None:
        team_data = deepcopy(data[(data.team==team) & (data.leg <= until_leg)])
    else:
        team_data = deepcopy(data[(data.team == team) & (data.season==season) & (data.leg <= until_leg)])
    comparator_data = deepcopy(data[data.team==compare_with])
    
    nb_season = comparator_data.season.nunique()
    if nb_season < 4 or len(team_data)==0:
        raise ValueError(f"""{team} has not played season {season} or {comparator_data} has played at most 
                         4 games. Please review your inputs""")
        
    avg_comparator_data = comparator_data[['leg', 'cum_pts']].groupby(
        by=['leg']).mean().reset_index().rename(columns={'cum_pts':'avg_cum_pts'})
    
    go_layers= [
    go.Scatter(name=f"{compare_with} averaged",
               x=avg_comparator_data['leg'],
               y=avg_comparator_data['avg_cum_pts'],
               mode='lines',
               line=dict(color="red",
                        width=5)
                   ),
     go.Scatter(name=team,
                x=team_data['leg'],
                y=team_data['cum_pts'],
                mode='lines',
                line=dict(color="royalblue",
                          width=2)
                 )
    ]
    
    layout = go.Layout(
            autosize=True, #False,
            width=800,
            height=800,

            xaxis= go.layout.XAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            yaxis= go.layout.YAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            margin=go.layout.Margin(
                l=50,
                r=50,
                b=100,
                t=100,
                pad = 4
            )
        )

    fig = go.Figure(data=go_layers, layout=layout)

    fig.update_layout(
        yaxis_title="number of points",
        title=f"{team}'s point evolution during season {season} wrt to {compare_with} average pts evolution",
        hovermode="x"
    )

    #fig.update_layout(
    #    autosize=False,
    #    width=800,
    #    height=800)

    fig.show() 
    

In [None]:
def compare_pts_evol_time(data, team, until_leg=38):
    """
    :param data: pd.DataFrame: data containing the league performance
    :param team: str: name of the team we want to analyze
    :param until_leg: int: plot team's pts evolution from legs 1 to until leg included
    comparison. That Team MUST have played at least 5 seasons
    """
    team_data = deepcopy(data[(data.team == team) & (data.leg <= until_leg)])
    comparator_data = deepcopy(data[data.team==team])
    
    nb_season = comparator_data.season.nunique()
    if nb_season < 4:
        raise ValueError(f"""{compare_with} has played at most 4 games in season {season}.
        Please pick a team having played at least 5 seasons. """)
    if len(team_data)==0:
        raise ValueError(f"""{team} has not played season {season}. 
        Please pick a team having played at least 5 seasons """)
        
    avg_comparator_data = comparator_data[['leg', 'cum_pts']].groupby(
        by=['leg']).mean().reset_index().rename(columns={'cum_pts':'avg_cum_pts'})
    
    go_layers= [
    go.Scatter(name="averaged point evolution",
               x=avg_comparator_data['leg'],
               y=avg_comparator_data['avg_cum_pts'],
               mode='lines',
               line=dict(color="red",
                        width=5)
                   )]
    i = 0
    for season_start in range(2004,2019):
        i+=1
        season = f'{season_start}-{season_start+1}'
        sublayer = [
         go.Scatter(name=season,
                    x=team_data[team_data.season==season]['leg'],
                    y=team_data[team_data.season==season]['cum_pts'],
                    mode='lines',
                    line=dict(color=color_2_position[i],
                              width=2)
                     )
        ]
        
        go_layers+=sublayer
    
    layout = go.Layout(
            autosize=True, #False,
            width=800,
            height=800,

            xaxis= go.layout.XAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            yaxis= go.layout.YAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            margin=go.layout.Margin(
                l=50,
                r=50,
                b=100,
                t=100,
                pad = 4
            )
        )

    fig = go.Figure(data=go_layers, layout=layout)

    fig.update_layout(
        yaxis_title="number of points",
        title=f"{team}'s point evolution over its {nb_season} seasons wrt to its average pts evolution",
        hovermode="x"
    )

    #fig.update_layout(
    #    autosize=False,
    #    width=800,
    #    height=800)

    fig.show() 
    

In [None]:
def plot_compare_team_pts_evolution_vs_final_rank(df, team, season=None, show_standard_deviation=True):
    
    kpi='cum_pts'
    avg_col = f'avg_{kpi}'
    std_col = f'std_{kpi}'
        
    go_layers = []
    
    if season is None:
        tmp_df = deepcopy(df[df.team==team])
        team_df = tmp_df[['leg', 'cum_pts']].groupby(
        by=['leg']).mean().reset_index().rename(columns={'cum_pts':'avg_cum_pts'})
        
        sub_layer = [
                    go.Scatter(name=f"{team} averaged point evolution",
                               x=team_df['leg'],
                               y=team_df['avg_cum_pts'],
                               mode='lines',
                               line=dict(color="silver",
                                        width=6)
                   )]
    else:
        team_df = deepcopy(df[(df.team == team) & (df.season==season)])
        sub_layer = [
                    go.Scatter(name=f"{team} point evolution",
                               x=team_df['leg'],
                               y=team_df['cum_pts'],
                               mode='lines',
                               line=dict(color="gold",
                                        width=6)
                   )]
    
    comparator = df.groupby(by=['final_rank', 'leg']).aggregate({'cum_pts': ['mean', 'std']})
    comparator.columns = [avg_col, std_col]
    comp_final = comparator.reset_index()
    
    go_layers += sub_layer
    for ranking in comp_final.final_rank.unique()[::-1]:
        dg = comp_final[comp_final.final_rank == ranking]
        sublayer = [
        go.Scatter(
            name=str(ranking),
            x=dg['leg'],
            y=dg[avg_col],
            mode='lines',
            line=dict(color=color_2_position[ranking],
                     width=2)
                ),

        go.Scatter(
            name=f'Upper Bound {ranking}',
            x=dg['leg'],
            y=dg[avg_col]+dg[std_col],
            mode='lines',
            marker=dict(color="#444"),
            line=dict(width=0),
            showlegend=False
                ),

        go.Scatter(
            name=f'Lower Bound {ranking}',
            x=dg['leg'],
            y=dg[avg_col]-dg[std_col],
            marker=dict(color="#444"),
            line=dict(width=0),
            mode='lines',
            fillcolor=color_name_to_rgba(name=color_2_position[ranking], fill=0.1*show_standard_deviation),
            fill='tonexty',
            showlegend=False,
        )
        ]
        go_layers+=sublayer



    layout = go.Layout(
        autosize=True, #False,
        width=800,
        height=800,

        xaxis= go.layout.XAxis(linecolor = 'black',
                              linewidth = 1,
                              mirror = True),

        yaxis= go.layout.YAxis(linecolor = 'black',
                              linewidth = 1,
                              mirror = True),

        margin=go.layout.Margin(
            l=50,
            r=50,
            b=100,
            t=100,
            pad = 4
        )
    )

    fig = go.Figure(data=go_layers, layout=layout)

    fig.update_layout(
        yaxis_title='Number of points',
        title=f"{kpi} Evolution according to final ranking",
        hovermode="x"
    )

    #fig.update_layout(
    #    autosize=False,
    #    width=800,
    #    height=800)

    fig.show()

##### Goal scored

In [None]:
# scatterplot on hist : change to hist bar
def draw_scatterplot(df, x, y, size_col, title):
    fig = px.scatter(df, x=x, y=y, size=size_col, title=title)
    fig.show()


# line on mean
def draw_line(df, x, y, title):
    fig = px.line(df, x=x, y=y, title=title)
    fig.show()

In [None]:
def draw_pie_chart(df, values, names, hover_data, title):  
    if isinstance(names, list):
        name = '_'.join(names)
        df[name] = df[names].apply(lambda r: '_'.join([str(_) for _ in r]), axis=1)
    else:
        name = names
    # hover_data = [hover_data] if isinstance(hover_data, str) else hover_data
    #fig = px.pie(df, values=values, names=name,
    #             title=title,
                 # hover_data=[hover_data]
                 #, labels={'lifeExp':'life expectancy'}
     #           )
    
    fig = go.Figure(data=[go.Pie(labels=df[name], values=df[values])])
    fig.update_traces(title_text=title, textposition='inside', textinfo='percent+label')
    fig.show()

### Aggregator

In [None]:
def hist_aggregator(df, column_to_describe, aggreg_column=None, bin_step=None):
    aggreg_column = column_to_describe if aggreg_column is None else aggreg_column
    if bin_step is not None:
        df[f'{aggreg_column}_binned'] = df[aggreg_column].apply(lambda x : (x//bin_step)*bin_step)
        aggreg_column = f'{aggreg_column}_binned'
    
    feats = list({aggreg_column, column_to_describe})
    df_agg = df[feats+['country']].groupby(
        by=feats).count().reset_index()
    df_agg.rename(columns={'country': 'cnt'}, inplace=True)
    return df_agg

In [None]:
def mean_aggregator(df, column_to_describe, aggreg_column='play', bin_step=None):
    if bin_step is not None:
        df[f'{aggreg_column}_binned'] = df[aggreg_column].apply(lambda x : (x//bin_step)*bin_step)
        aggreg_column = f'{aggreg_column}_binned'
        
    df_agg = df[[aggreg_column, column_to_describe]].groupby(
        by=[aggreg_column]).mean().reset_index()
    df_agg.rename(columns={column_to_describe: f'avg_{column_to_describe}'}, inplace=True)
    return df_agg

## Research directions

### Basics

In [None]:
def get_team_participation(df, championship):
    nb_teams = df.team.nunique()
    nb_seasons = df.season.nunique()
    first_season = df.season.min()
    last_season = df.season.max()
    print(f"""{nb_teams} teams have played in {championship} from season {first_season} to season {last_season}, 
    i.e over {nb_seasons} seasons """)
    
    season_length = df.leg.max()
    
    end_season_df = ligue1_data[df.leg==season_length].rename(columns={'rank':'final_rank'})
    participation_df = end_season_df[['team', 'final_rank']].groupby(by='team').agg('count').rename(
    columns={"final_rank":"nb_participation"})
    
    print("{nb_all_seasons} teams played all {nb_seasons} seasons".format(
        nb_all_seasons=len(participation_df[participation_df.nb_participation==nb_seasons]),
        nb_seasons=nb_seasons)
         )
    
    return participation_df.sort_values(by="nb_participation", ascending=False)

In [None]:
def get_goal_scored_repartition(data_df):
    dg = data_exploitable_df[['championship', 'goals_scored', 'play']].groupby(
    by=['championship', 'goals_scored']).count()
    dg.reset_index(inplace=True)
    dg.rename(columns={'play': 'quantity'}, inplace=True)
    total = data_exploitable_df[['championship', 'play']].groupby(by=['championship']).count()
    total.reset_index(inplace=True)
    total.rename(columns={'play': 'total'}, inplace=True)
    class_recap = dg.merge(total, how='left', on='championship')
    class_recap['percent'] = 100*class_recap['quantity'].div(class_recap['total'])
    return class_recap

### Rank

In [None]:
final_rank_perf_evolution = ligue1_data[
    ['final_rank', 'leg', 'cum_pts', 'goals_scored', 
     'goals_conceded', 'cum_goal_diff', 'cum_goals_scored', 'rank']].groupby(
    by=['final_rank', 'leg']).aggregate({'rank': ['mean', 'std'],
                                           'cum_pts': ['mean', 'std'],
                                           'cum_goal_diff': ['mean', 'std'],
                                           'cum_goals_scored': ['mean', 'std'],
                                           'goals_scored': ['mean', 'std'],
                                           'goals_conceded': ['mean', 'std'],
                                              })

final_rank_perf_evolution.columns = ['avg_rank', 'std_rank',
                                     'avg_cum_pts', 'std_cum_pts',
                                     'avg_cum_goal_diff', 'std_cum_goal_diff',
                                     'avg_cum_goals_scored', 'std_cum_goal_scored',
                                    'avg_goals_scored', 'std_goals_scored',
                                    'avg_goals_conceded', 'std_goals_conceded']

df=final_rank_perf_evolution.reset_index()

In [None]:
plot_plotly_kpi(df=df, kpi='cum_pts', not_show_standard_deviation=False)

In [None]:
compare_pts_evol_with_avg_evolution(data=ligue1_data, 
                                    team= 'Paris-SG', #'Lyon', 
                                    season='2008-2009', 
                                    until_leg=38, 
                                    compare_with='Lyon')

In [None]:
compare_pts_evol_time(data=ligue1_data,
                     team='Nantes', #'Paris-SG',
                     untilrolling_5_games_avg_nb_points_leg=38)


In [None]:
plot_compare_team_pts_evolution_vs_final_rank(df=ligue1_data,
                                              team='Marseille',
                                              season=None,
                                              show_standard_deviation=False)

### Goals

In [None]:
# cumulative goal scored
plot_plotly_kpi(df=df, kpi='cum_goals_scored', not_show_standard_deviation=True)

In [None]:
# cumulative goal difference
plot_plotly_kpi(df=df, kpi='cum_goal_diff', not_show_standard_deviation=True)

In [None]:
# goals scored
plot_plotly_kpi(df=df, kpi='goals_scored', not_show_standard_deviation=True)

In [None]:
# goals conceded
plot_plotly_kpi(df=df, kpi='goals_conceded', not_show_standard_deviation=True)

In [None]:
# home/away effect
home_away_goals_scored = hist_aggregator(df=df, 
                                         column_to_describe='goals_scored',
                                         aggreg_column='play')

home_pts = hist_aggregator(df=df[df.play=='Home'], 
                           column_to_describe='nb_points', 
                           aggreg_column='play')

In [None]:
# leg effect

leg_goals = hist_aggregator(df=df, column_to_describe='goals_scored', aggreg_column='leg')

leg_on_perf_at_home = hist_aggregator(df=df[df.play=='Home'],
                                      column_to_describe='nb_points', 
                                      aggreg_column='leg')

In [None]:
# Average all ongoing season goals scored / opponent avg goals conceded on number goals scored 
# --> Scatterplot with jitter + bins the avg by steps of 0.1 goals
opponent_season_perf_on_goals_hist = hist_aggregator(df=df, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_avg_goals_conceded_since_season_start',
                bin_step=.1)

opponent_season_perf_on_goals_mean = mean_aggregator(df=df, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_avg_goals_conceded_since_season_start',
                bin_step=.1)

draw_scatterplot(df=opponent_season_perf_on_goals_hist,
                 x='previous_opponent_avg_goals_conceded_since_season_start_binned',
                 y='goals_scored',
                 size_col='cnt',
                 title='Opponent avg goals conceded since season start vs goals to be scored')
draw_line(df=opponent_season_perf_on_goals_mean,
          x='previous_opponent_avg_goals_conceded_since_season_start_binned',
          y='avg_goals_scored', 
          title='Opponent avg goals conceded since season start vs avg goals to be scored')

In [None]:
team_season_perf_on_goals_hist = hist_aggregator(df=df, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_avg_goals_scored_since_season_start',
                bin_step=.1)
team_season_perf_on_goals_mean = mean_aggregator(df=df, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_avg_goals_scored_since_season_start',
                bin_step=.1)

draw_scatterplot(df=team_season_perf_on_goals_hist, 
                 x='previous_team_avg_goals_scored_since_season_start_binned',
                 y='goals_scored',
                 size_col='cnt',
                 title='Team avg goals scored since season start vs goals to be scored')

draw_line(df=team_season_perf_on_goals_mean,
          x='previous_team_avg_goals_scored_since_season_start_binned', 
          y='avg_goals_scored',
          title='Team avg goals scored since season start vs avg goals to be scored')


In [None]:
# Average 5 last game goals scored / opponent avg goals conceded on number goals scored 
# --> Scatterplot with jitter
# Opponent
opponent_last5_perf_on_goals_hist = hist_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_rolling_5_games_avg_goals_conceded',
                bin_step=.1)

opponent_last5_perf_on_goals_mean = mean_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_rolling_5_games_avg_goals_conceded',
                bin_step=.1)

draw_scatterplot(df=opponent_last5_perf_on_goals_hist, 
                 x='previous_opponent_rolling_5_games_avg_goals_conceded_binned',
                 y='goals_scored',
                 size_col='cnt',
                 title='5 leg Avg on opponents goals conceded vs goals to be scored')
draw_line(df=opponent_last5_perf_on_goals_mean,
          x='previous_opponent_rolling_5_games_avg_goals_conceded_binned',
          y='avg_goals_scored',
          title='5 leg Avg on opponents goals conceded vs avg goals to be scored')

In [None]:
# Team
team_last5_perf_on_goals_hist = hist_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_rolling_5_games_avg_goals_scored',
                bin_step=.1)
team_last5_perf_on_goals_mean = mean_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_rolling_5_games_avg_goals_scored',
                bin_step=.1)

draw_scatterplot(df=team_last5_perf_on_goals_hist,
                 x='previous_team_rolling_5_games_avg_goals_scored_binned',
                 y='goals_scored',
                 size_col='cnt',
                 title='5 leg Avg on Team goals scored vs goals to be scored')
draw_line(df=team_last5_perf_on_goals_mean,
          x='previous_team_rolling_5_games_avg_goals_scored_binned',
          y='avg_goals_scored',
          title='5 leg Avg on Team goals scored vs avg goals to be scored')

In [None]:
# LAST GAME PERFORMANCE
# Goals

# Team
last_game_team_goals_scored_hist = hist_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_goals_scored',
                bin_step=None)

last_game_team_goals_scored_mean = mean_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_goals_scored',
                bin_step=None)

draw_scatterplot(df=last_game_team_goals_scored_hist,
                 x='previous_team_goals_scored',
                 y='goals_scored',
                 size_col='cnt', 
                 title='Team previous game goals scored vs goals to be scored')

draw_line(df=last_game_team_goals_scored_mean, 
          x='previous_team_goals_scored', 
          y='avg_goals_scored',
          title='Team previous game goals scored vs avg goals to be scored')

In [None]:
# LAST GAME PERFORMANCE
# Goals

# Opponent

last_game_opponent_goals_conceded_hist = hist_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_goals_conceded',
                bin_step=None)

last_game_opponent_goals_conceded_mean = mean_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_goals_conceded',
                bin_step=None)

draw_scatterplot(df=last_game_opponent_goals_conceded_hist,
                 x='previous_opponent_goals_conceded',
                 y='goals_scored',
                 size_col='cnt',
                 title='Opponent previous game goals conceded vs goals to be scored')
draw_line(df=last_game_opponent_goals_conceded_mean,
          x='previous_opponent_goals_conceded',
          y='avg_goals_scored',
          title='Opponent previous game goals conceded vs avg goals to be scored')

In [None]:
# outcome (win, draw, loss) : 5 rolling games
# Team
last_5games_team_outcome_hist = hist_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_rolling_5_games_avg_nb_points',
                bin_step=None)
last_5games_team_outcome_mean = mean_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_rolling_5_games_avg_nb_points',
                bin_step=None)

draw_scatterplot(df=last_5games_team_outcome_hist,
                 x='previous_team_rolling_5_games_avg_nb_points',
                 y='goals_scored',
                 size_col='cnt',
                 title='Team 5 last games outcome vs goals to be scored')
draw_line(df=last_5games_team_outcome_mean,
          x='previous_team_rolling_5_games_avg_nb_points',
          y='avg_goals_scored',
          title='Team last 5 games outcome conceded vs avg goals to be scored')

In [None]:
# outcome (win, draw, loss) : 5 rolling games
# Opponent

last_5games_opponent_outcome_hist = hist_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_rolling_5_games_avg_nb_points',
                bin_step=None)

last_5games_opponent_outcome_mean = mean_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_rolling_5_games_avg_nb_points',
                bin_step=None)


draw_scatterplot(df=last_5games_opponent_outcome_hist,
                 x='previous_opponent_rolling_5_games_avg_nb_points',
                 y='goals_scored',
                 size_col='cnt',
                 title='Opponent 5 last games outcome vs goals to be scored')

draw_line(df=last_5games_opponent_outcome_mean,
          x='previous_opponent_rolling_5_games_avg_nb_points',
          y='avg_goals_scored',
          title='Opponent last 5 games outcome conceded vs avg goals to be scored')

In [None]:
# outcome (win, draw, loss) : last games
# Team

last_game_team_outcome_hist = hist_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_nb_points',
                bin_step=None)
last_game_team_outcome_mean = mean_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_nb_points',
                bin_step=None)

draw_scatterplot(df=last_game_team_outcome_hist,
                 x='previous_team_nb_points',
                 y='goals_scored',
                 size_col='cnt',
                 title='Team last game outcome vs goals to be scored')
draw_line(df=last_game_team_outcome_mean,
          x='previous_team_nb_points',
          y='avg_goals_scored',
          title='Team game outcome conceded vs avg goals to be scored')

In [None]:
# outcome (win, draw, loss) : last games
# Opponent
last_game_opponent_outcome_hist = hist_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_nb_points',
                bin_step=None)
last_game_opponent_outcome_mean = mean_aggregator(df=ligue1_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_nb_points',
                bin_step=None)

draw_scatterplot(df=last_game_opponent_outcome_hist,
                 x='previous_opponent_nb_points',
                 y='goals_scored',
                 size_col='cnt',
                 title='Opponent last game outcome vs goals to be scored')
draw_line(df=last_game_opponent_outcome_mean,
          x='previous_opponent_nb_points',
          y='avg_goals_scored',
          title='Opponent game outcome conceded vs avg goals to be scored')