In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly # https://plotly.com/python/
import random
import plotly.graph_objects as go
import plotly.express as px
from copy import deepcopy
from webcolors import name_to_rgb

# https://community.plotly.com/t/plotly-colours-list/11730/3
# https://community.plotly.com/t/different-colors-for-bars-in-barchart-by-their-value/6527/7
# https://plotly.com/python/continuous-error-bars/
# https://community.plotly.com/t/plotly-colours-list/11730/5

In [None]:
# 1. plot the evolution in term of points, goal diff and goal scores wrt to leg and according to final rank
#      --> average path + standard deviation
# 2. Home/Away advantage : pts won; goal scored; goal conceded
# 2.b Rank Performance : pts won; goal scored; goal conceded (scatterplot)
# 3. Winter champion vs Final Champion // Winter situation vs Final situation
# 4. Team Performance wrt seasons : ranks, total goal scored and conceded, Home/Away results (min 10 seasons) 
# 5. Average/Std Team Performance wrt to legs (min 10 seasons) 

# 0. Data Preparation

In [None]:
def prepare_data():
    # TO BE DONE IN REFACTO
    pass

In [None]:
liga_df = pd.read_csv('liga_data_2004_2019').drop(columns='Unnamed: 0')
# les données des saisons 2002-2003 et 2003-2004 de Ligue 1 sont foireuses

In [None]:
liga_df['goal_diff'] = liga_df['goals_scored'] - liga_df['goals_conceded']

In [None]:
# ligue1_df

In [None]:
liga_df['cum_pts'] = liga_df[['season', 'team', 'nb_points']].groupby(
    by=['season', 'team']).cumsum()

In [None]:
liga_df['cum_goal_diff'] = liga_df[['season', 'team', 'goal_diff']].groupby(
    by=['season', 'team']).cumsum()

In [None]:
liga_df['cum_goals_scored'] = liga_df[['season', 'team', 'goals_scored']].groupby(
    by=['season', 'team']).cumsum()

In [None]:
liga_df['cum_goals_conceded'] = liga_df['cum_goals_scored']-liga_df['cum_goal_diff']


In [None]:
liga_df['rank'] = liga_df[['season', 'leg', 'cum_pts', 'cum_goal_diff', 'cum_goals_scored']
                             ].sort_values(by=['cum_pts', 'cum_goal_diff', 'cum_goals_scored'],
                                           ascending=False
                                          ).groupby(by=['season', 'leg']).cumcount() + 1

In [None]:
liga_df['avg_goals_scored_since_season_start'] = liga_df['cum_goals_scored'].div(liga_df['leg'])

In [None]:
liga_df['avg_goals_conceded_since_season_start'] = liga_df['cum_goals_conceded'].div(liga_df['leg'])

In [None]:
# liga_df[['season', 'leg', 'country']].groupby(by=['season', 'leg']).count()

In [None]:
# removed 2 useless seasons
liga_data = deepcopy(liga_df)
# ligue1_data.reset_index(drop=True, inplace=True)

In [None]:
def rolling_mean_n_performance(df, window=5, performance_col='goals_scored'):
    dg = df.sort_values(by=['leg'])[['season', 'team', performance_col]].groupby(
        by=['season', 'team'])[performance_col].rolling(window=window).mean().reset_index()
    
    new_col_name = f'rolling_{window}_games_avg_{performance_col}'
    df[new_col_name] = dg.set_index('level_2')[performance_col]
    return df

In [None]:
liga_data = rolling_mean_n_performance(df=liga_data, window=5, performance_col='goals_conceded')

In [None]:
liga_data = rolling_mean_n_performance(df=liga_data, window=5, performance_col='goals_scored')

In [None]:
# ligue1_data.head()

In [None]:
end_season = liga_data[liga_data.leg==38].rename(columns={'rank':'final_rank'})
# end_season_L1

In [None]:
liga_data = liga_data.merge(end_season[['season', 'team', 'final_rank']], on=['season', 'team'])

In [None]:
# ligue1_data.drop(columns=['previous_leg', 'previous_team_rank'], inplace=True)

In [None]:
# ligue1_data

In [None]:
def get_past_feature(df, feat_col, team=True):

    merge_col = 'team' if team else 'opponent'    
    tmp_df = deepcopy(df[['season', 'leg', merge_col, feat_col]])
    tmp_df.loc[:,'next_leg'] = tmp_df['leg'] +1

    tmp_df.rename(columns={'leg': 'previous_leg', 
                           'next_leg':'leg', 
                           feat_col:f'previous_{merge_col}_{feat_col}'},
                  inplace=True)

    df = df.merge(tmp_df, how='left', on=['leg', 'season', merge_col])
    df.drop(columns=['previous_leg'], inplace=True)
    return df

In [None]:
liga_data = get_past_feature(df=liga_data, feat_col='rank', team=True)
#ligue1_data

In [None]:
liga_data = get_past_feature(df=liga_data, feat_col='rank', team=False)

In [None]:
liga_data = get_past_feature(df=liga_data, feat_col='rolling_5_games_avg_goals_scored', team=True)

In [None]:
liga_data = get_past_feature(df=liga_data, feat_col='rolling_5_games_avg_goals_conceded', team=False)

In [None]:
liga_data = get_past_feature(df=liga_data, feat_col='avg_goals_scored_since_season_start', team=True)

In [None]:
liga_data = get_past_feature(df=liga_data, feat_col='avg_goals_conceded_since_season_start', team=False)

In [None]:
liga_data = get_past_feature(df=liga_data, feat_col='goals_scored', team=True)

In [None]:
liga_data = get_past_feature(df=liga_data, feat_col='goals_conceded', team=False)

In [None]:
# ligue1_data.head(10)

In [None]:
# add 5 last perf as features

# 1. Performance (rank, pts, goals scored, goal difference) evolution during the season based on the Final Ranking 

In [None]:
#import plotly.graph_objects as go
#import plotly.express as px

In [None]:
nb_teams = liga_data.team.nunique()
nb_seasons = liga_data.season.nunique()

In [None]:
print(f"""{nb_teams} teams have played in Ligue-1 from season 2004-2005 to season 2018-2019,
i.e over {nb_seasons} seasons """)

In [None]:
participation_df = end_season[['team', 'final_rank']].groupby(by='team').agg('count').rename(
    columns={"final_rank":"nb_participation"})
participation_df.sort_values(by="nb_participation", ascending=False)

In [None]:
print("{nb_all_seasons} teams played all {nb_seasons} seasons".format(
    nb_all_seasons=len(participation_df[participation_df.nb_participation==nb_seasons]),
    nb_seasons=nb_seasons))

In [None]:
final_rank_perf_evolution = liga_data[
    ['final_rank', 'leg', 'cum_pts', 'goals_scored', 'goals_conceded', 'cum_goal_diff', 'cum_goals_scored', 'rank']
].groupby(by=['final_rank', 'leg']).aggregate({'rank': ['mean', 'std'],
                                               'cum_pts': ['mean', 'std'],
                                               'cum_goal_diff': ['mean', 'std'],
                                               'cum_goals_scored': ['mean', 'std'],
                                               'goals_scored': ['mean', 'std'],
                                               'goals_conceded': ['mean', 'std'],
                                              })

In [None]:
final_rank_perf_evolution.columns = ['avg_rank', 'std_rank',
                                     'avg_cum_pts', 'std_cum_pts',
                                     'avg_cum_goal_diff', 'std_cum_goal_diff',
                                     'avg_cum_goals_scored', 'std_cum_goal_scored',
                                    'avg_goals_scored', 'std_goals_scored',
                                    'avg_goals_conceded', 'std_goals_conceded']

In [None]:
df=final_rank_perf_evolution.reset_index()

In [None]:
plotly_colors = '''
        aliceblue, antiquewhite, aqua, aquamarine, azure,
        beige, bisque, black, blanchedalmond, blue,
        blueviolet, brown, burlywood, cadetblue,
        chartreuse, chocolate, coral, cornflowerblue,
        cornsilk, crimson, cyan, darkblue, darkcyan,
        darkgoldenrod, darkgray, darkgrey, darkgreen,
        darkkhaki, darkmagenta, darkolivegreen, darkorange,
        darkorchid, darkred, darksalmon, darkseagreen,
        darkslateblue, darkslategray, darkslategrey,
        darkturquoise, darkviolet, deeppink, deepskyblue,
        dimgray, dimgrey, dodgerblue, firebrick,
        floralwhite, forestgreen, fuchsia, gainsboro,
        ghostwhite, gold, goldenrod, gray, grey, green,
        greenyellow, honeydew, hotpink, indianred, indigo,
        ivory, khaki, lavender, lavenderblush, lawngreen,
        lemonchiffon, lightblue, lightcoral, lightcyan,
        lightgoldenrodyellow, lightgray, lightgrey,
        lightgreen, lightpink, lightsalmon, lightseagreen,
        lightskyblue, lightslategray, lightslategrey,
        lightsteelblue, lightyellow, lime, limegreen,
        linen, magenta, maroon, mediumaquamarine,
        mediumblue, mediumorchid, mediumpurple,
        mediumseagreen, mediumslateblue, mediumspringgreen,
        mediumturquoise, mediumvioletred, midnightblue,
        mintcream, mistyrose, moccasin, navajowhite, navy,
        oldlace, olive, olivedrab, orange, orangered,
        orchid, palegoldenrod, palegreen, paleturquoise,
        palevioletred, papayawhip, peachpuff, peru, pink,
        plum, powderblue, purple, red, rosybrown,
        royalblue, saddlebrown, salmon, sandybrown,
        seagreen, seashell, sienna, silver, skyblue,
        slateblue, slategray, slategrey, snow, springgreen,
        steelblue, tan, teal, thistle, tomato, turquoise,
        violet, wheat, white, whitesmoke, yellow,
        yellowgreen
        '''

In [None]:
color_2_position = {1: "royalblue", 
                    2: "cornflowerblue",
                    3: "aqua",
                    4: "chartreuse",
                    5: "yellowgreen",
                    6: "green",
                    7: "teal",
                    8: "purple",
                    9: "mediumturquoise",
                    10: "plum",
                    11: "khaki",
                    12: "goldenrod",
                    13: "yellow",
                    14: "chocolate",
                    15: "lightpink",
                    16: "hotpink",
                    17: "lightsalmon",
                    18: "orange",
                    19: "orangered",
                    20: "red"
                   }

In [None]:
plotly_colors_list = plotly_colors.split(',')
plotly_colors_list=[l.replace('\n','') for l in plotly_colors_list]
plotly_colors_list=[l.replace(' ','') for l in plotly_colors_list]
# plotly_colors_list

In [None]:
def color_name_to_rgba(name, fill=0.3):
    """name: CSS name"""
    assert name in plotly_colors_list
    
    values = ", ".join([str(c) for c in name_to_rgb(name)]+[str(fill)])
    
    return f'rgba({values})'
    

In [None]:
def plot_seaborn_kpi(data, kpi='cum_pts'):
    admissible_kpis = {'rank', 
                       'cum_pts', 
                       'cum_goal_diff', 
                       'cum_goals_scored',
                       'goals_conceded', 
                       'goals_scored'}
    if kpi not in admissible_kpis:
        raise Exception(f"kpi {kpi} is not admissible. It must be part of the following set : {admissible_kpis}")
        
    sns.relplot(x="leg", 
                y=kpi, 
                sort=True, 
                markers=True, 
                kind="line", 
                data=data, 
                hue='final_rank',
                height=7
               )

In [None]:
def plot_plotly_kpi(df, kpi='cum_pts', not_show_standard_deviation=True):
    admissible_kpis = {'rank', 
                       'cum_pts', 
                       'cum_goal_diff', 
                       'cum_goals_scored',
                       'goals_conceded', 
                       'goals_scored'
                      }
    
    avg_col = f'avg_{kpi}'
    std_col = f'std_{kpi}'
    
    if kpi not in admissible_kpis:
        raise Exception(f"""
        kpi {kpi} is not admissible. It must be part of the following set : {admissible_kpis}
        """)
    
    if not_show_standard_deviation:
        fig = px.line(data_frame=df, x="leg", y=avg_col, color="final_rank",
              title=f"Average {kpi} Evolution based on final ranking",
             )

        fig.update_layout(
            autosize=False,
            width=800,
            height=800)

        fig.show()
    else:
        go_layers = []
        for ranking in df.final_rank.unique()[::-1]:
            dg = df[df.final_rank == ranking]
            sublayer = [
            go.Scatter(
                name=str(ranking),
                x=dg['leg'],
                y=dg[avg_col],
                mode='lines',
                line=dict(color=color_2_position[ranking],
                         width=2 if ranking not in [3, 18] else 5)
                    ),

            go.Scatter(
                name=f'Upper Bound {ranking}',
                x=dg['leg'],
                y=dg[avg_col]+dg[std_col],
                mode='lines',
                marker=dict(color="#444"),
                line=dict(width=0),
                showlegend=False
                    ),

            go.Scatter(
                name=f'Lower Bound {ranking}',
                x=dg['leg'],
                y=dg[avg_col]-dg[std_col],
                marker=dict(color="#444"),
                line=dict(width=0),
                mode='lines',
                fillcolor=color_name_to_rgba(name=color_2_position[ranking], fill=0.1),
                fill='tonexty',
                showlegend=False,
            )
            ]
            go_layers+=sublayer



        layout = go.Layout(
            autosize=True, #False,
            width=800,
            height=800,

            xaxis= go.layout.XAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            yaxis= go.layout.YAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            margin=go.layout.Margin(
                l=50,
                r=50,
                b=100,
                t=100,
                pad = 4
            )
        )

        fig = go.Figure(data=go_layers, layout=layout)

        fig.update_layout(
            yaxis_title='Number of points',
            title=f"{kpi} Evolution according to final ranking",
            hovermode="x"
        )

        #fig.update_layout(
        #    autosize=False,
        #    width=800,
        #    height=800)

        fig.show()

In [None]:
# https://community.plotly.com/t/different-colors-for-bars-in-barchart-by-their-value/6527/7
# https://plotly.com/python/continuous-error-bars/


## Points

In [None]:
plot_seaborn_kpi(data=liga_data, kpi='cum_pts')

In [None]:
# plot_plotly_kpi(df=df, kpi='cum_pts', not_show_standard_deviation=True)

## Rank

In [None]:
plot_seaborn_kpi(data=liga_data, kpi='rank')

In [None]:
# plot_plotly_kpi(df=df, kpi='rank', not_show_standard_deviation=True)

In [None]:
# https://plotly.com/python/sankey-diagram/

#### Sankey Approach

In [None]:
sankey_df = liga_data[['season', 'team', 'leg', 'rank']].rename(columns={"leg": "source_leg", "rank": "source_rank"})
sankey_df_2 = liga_data[['season', 'team', 'leg', 'rank']].rename(columns={"leg": "target_leg", "rank": "target_rank"})

In [None]:
sankey_df_2['source_leg'] = sankey_df_2['target_leg'] - 1

In [None]:
# sankey_df_2

In [None]:
sankey_df_3 = sankey_df.merge(sankey_df_2, on=['season', 'source_leg', 'team'], how='inner')

In [None]:
# sankey_df_3.head()

In [None]:
cnt_df = sankey_df_3.groupby(
    by=['source_leg','source_rank', 'target_leg', 'target_rank'])['team'].count().reset_index()

cnt_df.rename(columns={"team" : 'cnt'}, inplace=True) 

In [None]:
cnt_df.loc[:,'source_node'] = cnt_df[['source_leg', 'source_rank']].apply(lambda r : 'leg_{}_rank_{}'.format(
    r[0], r[1]), axis=1)

In [None]:
cnt_df.loc[:,'target_node'] = cnt_df[['target_leg', 'target_rank']].apply(lambda r : 'leg_{}_rank_{}'.format(
    r[0], r[1]), axis=1)

In [None]:
# cnt_df

In [None]:
node_2_key = {node :i for i, node in enumerate(
    ['leg_{}_rank_{}'.format(leg, rnk) for rnk in range(1,21) for leg in range(1,39)])}

In [None]:
# node_2_key

In [None]:
# data = json.loads(response.read())

# override gray link colors with 'source' colors
# opacity = 0.4
# change 'magenta' to its 'rgba' value to add opacity
#data['data'][0]['node']['color'] = ['rgba(255,0,255, 0.8)' if color == "magenta" else color for color in data['data'][0]['node']['color']]
#data['data'][0]['link']['color'] = [data['data'][0]['node']['color'][src].replace("0.8", str(opacity))
#                                    for src in data['data'][0]['link']['source']]

def show_sankey(sankey_data, start_leg=1, end_leg=38):
    
    sankey_data = sankey_data[(sankey_data.source_leg >= start_leg)&(sankey_data.source_leg<end_leg)]
    node_2_key = {node :i for i, node in enumerate(
        ['leg_{}_rank_{}'.format(leg, rnk) for rnk in range(1,21) for leg in range(start_leg, end_leg+1)])}
    
    go_layers = [go.Sankey(
        valueformat = ".0f",
        # valuesuffix = "TWh",
        # Define nodes
        node = dict(
           # x = [.1 + .03*x for x in range(end_leg - start_leg + 1)],
           # y = [.8 - y*.03 for y in range(1,21)],
          pad = 7,
          thickness = 5,
          line = dict(color = "black", width = 0.5),
          label =  ['leg_{}_rank_{}'.format(leg, rnk) for rnk in range(1,21) for leg in range(start_leg, end_leg+1)],
          color =  [color_name_to_rgba(name=color_2_position[rnk], fill=0.8) 
                    for rnk in range(1,21) for leg in range(start_leg, end_leg+1)]
        ),
        # Add links
        link = dict(
          source = [node_2_key[source_node] for source_node in sankey_data['source_node'].values],
          target = [node_2_key[target_node] for target_node in sankey_data['target_node'].values],
          value =  sankey_data['cnt'].values,
          # label =  data['data'][0]['link']['label'],
          color =  [color_name_to_rgba(name=color_2_position[rnk], fill=0.4) 
                    for rnk in sankey_data.source_rank.values]
    ))]


    layout = go.Layout(
                autosize=True, #False,
                width=1200,
                height=1200,

                xaxis= go.layout.XAxis(linecolor = 'black',
                                      linewidth = 1,
                                      mirror = True),

                yaxis= go.layout.YAxis(linecolor = 'black',
                                      linewidth = 1,
                                      mirror = True),

                margin=go.layout.Margin(
                    l=50,
                    r=50,
                    b=100,
                    t=100,
                    pad = 4
                )
            )


    sankey_fig = go.Figure(data=go_layers, layout=layout)
    sankey_fig.update_layout(title_text="Rank evolution",
                      font_size=10)
    sankey_fig.show()

In [None]:
show_sankey(sankey_data=cnt_df, start_leg=26, end_leg=38)

## Comparison with average point evolution 

In [None]:
liga_data.team.unique()

In [None]:
def compare_pts_evol_with_avg_evolution(data, team, season='2018-2019', until_leg=38, compare_with=None):
    """
    :param data: pd.DataFrame: data containing the league performance
    :param team: str: name of the team we want to analyze
    :param season: str: season we're interested in
    :param until_leg: int: plot team's pts evolution from legs 1 to until leg included
    :param compare_with: str: name of the team whose average pts evolution is computed and which is used for 
    comparison. That Team MUST have played at least 5 seasons
    """
    team_data = deepcopy(data[(data.team == team) & (data.season==season) & (data.leg <= until_leg)])
    comparator_data = deepcopy(data[data.team==compare_with])
    
    nb_season = comparator_data.season.nunique()
    if nb_season < 4 or len(team_data)==0:
        raise ValueError(f"""{team} has not played season {season} or {comparator_data} has played at most 
                         4 games. Please review your inputs""")
        
    avg_comparator_data = comparator_data[['leg', 'cum_pts']].groupby(
        by=['leg']).mean().reset_index().rename(columns={'cum_pts':'avg_cum_pts'})
    
    go_layers= [
    go.Scatter(name=f"{compare_with} averaged",
               x=avg_comparator_data['leg'],
               y=avg_comparator_data['avg_cum_pts'],
               mode='lines',
               line=dict(color="red",
                        width=5)
                   ),
     go.Scatter(name=team,
                x=team_data['leg'],
                y=team_data['cum_pts'],
                mode='lines',
                line=dict(color="royalblue",
                          width=2)
                 )
    ]
    
    layout = go.Layout(
            autosize=True, #False,
            width=800,
            height=800,

            xaxis= go.layout.XAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            yaxis= go.layout.YAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            margin=go.layout.Margin(
                l=50,
                r=50,
                b=100,
                t=100,
                pad = 4
            )
        )

    fig = go.Figure(data=go_layers, layout=layout)

    fig.update_layout(
        yaxis_title="number of points",
        title=f"{team}'s point evolution during season {season} wrt to {compare_with} average pts evolution",
        hovermode="x"
    )

    #fig.update_layout(
    #    autosize=False,
    #    width=800,
    #    height=800)

    fig.show() 
    

In [None]:
compare_pts_evol_with_avg_evolution(data=liga_data, 
                                    team= 'Real Madrid', # 'FC Barcelone', #'Lyon', 
                                    season='2008-2009', 
                                    until_leg=38, 
                                    compare_with='Real Madrid')

In [None]:
def compare_pts_evol_time(data, team, until_leg=38):
    """
    :param data: pd.DataFrame: data containing the league performance
    :param team: str: name of the team we want to analyze
    :param until_leg: int: plot team's pts evolution from legs 1 to until leg included
    comparison. That Team MUST have played at least 5 seasons
    """
    team_data = deepcopy(data[(data.team == team) & (data.leg <= until_leg)])
    comparator_data = deepcopy(data[data.team==team])
    
    nb_season = comparator_data.season.nunique()
    if nb_season < 4 or len(team_data)==0:
        raise ValueError(f"""{team} has not played season {season} or has not played at least 
                         5 games. Please change team""")
        
    avg_comparator_data = comparator_data[['leg', 'cum_pts']].groupby(
        by=['leg']).mean().reset_index().rename(columns={'cum_pts':'avg_cum_pts'})
    
    go_layers= [
    go.Scatter(name="averaged point evolution",
               x=avg_comparator_data['leg'],
               y=avg_comparator_data['avg_cum_pts'],
               mode='lines',
               line=dict(color="red",
                        width=5)
                   )]
    i = 0
    for season_start in range(2004,2019):
        i+=1
        season = f'{season_start}-{season_start+1}'
        sublayer = [
         go.Scatter(name=season,
                    x=team_data[team_data.season==season]['leg'],
                    y=team_data[team_data.season==season]['cum_pts'],
                    mode='lines',
                    line=dict(color=color_2_position[i],
                              width=2)
                     )
        ]
        
        go_layers+=sublayer
    
    layout = go.Layout(
            autosize=True, #False,
            width=800,
            height=800,

            xaxis= go.layout.XAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            yaxis= go.layout.YAxis(linecolor = 'black',
                                  linewidth = 1,
                                  mirror = True),

            margin=go.layout.Margin(
                l=50,
                r=50,
                b=100,
                t=100,
                pad = 4
            )
        )

    fig = go.Figure(data=go_layers, layout=layout)

    fig.update_layout(
        yaxis_title="number of points",
        title=f"{team}'s point evolution over its {nb_season} seasons wrt to its average pts evolution",
        hovermode="x"
    )

    #fig.update_layout(
    #    autosize=False,
    #    width=800,
    #    height=800)

    fig.show() 
    
    

In [None]:
compare_pts_evol_time(data=liga_data,
                     team='Real Madrid', #'Paris-SG',
                     until_leg=38)


## Cumulative goals scored

In [None]:
plot_seaborn_kpi(data=liga_data, kpi='cum_goals_scored')

In [None]:
plot_plotly_kpi(df=df, kpi='cum_goals_scored', not_show_standard_deviation=True)

## Cumulative goal difference 

In [None]:
plot_seaborn_kpi(data=liga_data, kpi='cum_goal_diff')

In [None]:
plot_plotly_kpi(df=df, kpi='cum_goal_diff', not_show_standard_deviation=True)

## Goal scored

In [None]:
plot_seaborn_kpi(data=liga_data, kpi='goals_scored')

In [None]:
plot_plotly_kpi(df=df, kpi='goals_scored', not_show_standard_deviation=True)

## Goal conceded

In [None]:
plot_seaborn_kpi(data=liga_data, kpi='goals_conceded')

In [None]:
plot_plotly_kpi(df=df, kpi='goals_conceded', not_show_standard_deviation=True)

## Scatter plot : 
#### x : nbr goals scored,
#### y: nbr scored conceded, 
#### color : final rank or current rank

In [None]:
def plot_plotly_scatter(df, color, season=None):
    if color not in ['rank', 'final_rank']:
        raise ValueError(f"color MUST be rank or final_rank. Got {color}")
    
    if season:
        df=df[df.season == season]
    go_layers = []
    for ranking in sorted(df.final_rank.unique(), reverse=False):
        dg = deepcopy(df[df[color] == ranking])
        
        dg.loc[:, 'goals_scored'] = dg['goals_scored'].apply(lambda x: x+random.uniform(0, .7))
        dg.loc[:, 'goals_conceded'] = dg['goals_conceded'].apply(lambda x: x+random.uniform(0, .7))
        
        sublayer = [
        go.Scatter(
            name=str(ranking),
            x=dg['goals_scored'],
            y=dg['goals_conceded'],
            mode='markers',
            line=dict(color=color_2_position[ranking],
                     width=2 if ranking not in [3, 18] else 5)
                )
        ]
        go_layers+=sublayer



    layout = go.Layout(
        autosize=True, #False,
        width=800,
        height=800,

        xaxis= go.layout.XAxis(linecolor = 'black',
                              linewidth = 1,
                              mirror = True),

        yaxis= go.layout.YAxis(linecolor = 'black',
                              linewidth = 1,
                              mirror = True),

        margin=go.layout.Margin(
            l=50,
            r=50,
            b=100,
            t=100,
            pad = 4
        )
    )

    fig = go.Figure(data=go_layers, layout=layout)

    fig.update_layout(
        yaxis_title='Goals conceded',
        xaxis_title='Goals scored',
        title=f"Scatterplot goals scored/conceded. Color is related to {color} column",
        hovermode="x"
    )

    #fig.update_layout(
    #    autosize=False,
    #    width=800,
    #    height=800)

    fig.show()

In [None]:
plot_plotly_scatter(df=liga_data, color='rank', season='2018-2019')

## Histogram

In [None]:
def plot_plotly_histogram(df, x, color, season=None):
    if color not in ['rank', 'final_rank', 'previous_team_rank', 'previous_opponent_rank']:
        raise ValueError(f"color MUST be rank or final_rank. Got {color}")
        
    if x not in ['goals_scored', 'goals_conceded']:
        raise ValueError(f"x MUST be goals_scored or goals_conceded. Got {x}")
        
    if season:
        df=df[df.season == season]
        
    go_layers = []
    for ranking in sorted(df.final_rank.unique(), reverse=False):
        dg = deepcopy(df[df[color] == ranking])
        
        
        sublayer = [
        go.Histogram(
            name=str(ranking),
            x=dg[x].values,
            marker=dict(line=dict(color=color_2_position[ranking],
                     width=2 if ranking not in [3, 18] else 5)
                ))
        ]
        go_layers+=sublayer



    layout = go.Layout(
        autosize=True, #False,
        width=800,
        height=800,

        xaxis= go.layout.XAxis(linecolor = 'black',
                              linewidth = 1,
                              mirror = True),

        yaxis= go.layout.YAxis(linecolor = 'black',
                              linewidth = 1,
                              mirror = True),

        margin=go.layout.Margin(
            l=50,
            r=50,
            b=100,
            t=100,
            pad = 4
        )
    )

    fig = go.Figure(data=go_layers, layout=layout)
    
    disp_season = season if season else 'all'
    
    fig.update_layout(
        yaxis_title='nb. times,
        xaxis_title='Goals scored',
        title=f"Histogram of {x} for {disp_season}. Color is related to {color} column",
        hovermode="x"
    )
    
    # Overlay both histograms
    fig.update_layout(barmode='overlay')
    # Reduce opacity to see both histograms
    fig.update_traces(opacity=0.5)

    #fig.update_layout(
    #    autosize=False,
    #    width=800,
    #    height=800)

    fig.show()

In [None]:
#tmp_df = deepcopy(ligue1_data[['season', 'leg', 'team', 'rank']])
#tmp_df.loc[:,'next_leg'] = tmp_df['leg'] +1

In [None]:
#tmp_df.rename(columns={'leg': 'previous_leg', 'next_leg':'leg', 'rank':'previous_rank'}, inplace=True)

In [None]:
#tmp_df

In [None]:
#ligue1_data_dev = ligue1_data.merge(tmp_df, how='left', on=['leg', 'season', 'team'])

In [None]:
# ligue1_data_dev[ligue1_data_dev.leg==1]

In [None]:
# ligue1_data

In [None]:
plot_plotly_histogram(df=liga_data, x='goals_scored', color='previous_team_rank', season='2018-2019')

In [None]:
def hist_aggregator(df, column_to_describe, aggreg_column='play', bin_step=None):
    if bin_step is not None:
        df[f'{aggreg_column}_binned'] = df[aggreg_column].apply(lambda x : (x//bin_step)*bin_step)
        aggreg_column = f'{aggreg_column}_binned'
        
    df_agg = df[[aggreg_column, column_to_describe, 'country']].groupby(
        by=[aggreg_column, column_to_describe]).count().reset_index()
    df_agg.rename(columns={'country': 'cnt'}, inplace=True)
    return df_agg

In [None]:
def mean_aggregator(df, column_to_describe, aggreg_column='play', bin_step=None):
    if bin_step is not None:
        df[f'{aggreg_column}_binned'] = df[aggreg_column].apply(lambda x : (x//bin_step)*bin_step)
        aggreg_column = f'{aggreg_column}_binned'
        
    df_agg = df[[aggreg_column, column_to_describe]].groupby(
        by=[aggreg_column]).mean().reset_index()
    df_agg.rename(columns={column_to_describe: f'avg_{column_to_describe}'}, inplace=True)
    return df_agg

In [None]:
hist_aggregator(df=liga_data, column_to_describe='goals_scored', aggreg_column='previous_team_rank')

In [None]:
def draw_pie_chart(df, values, names, hover_data, title):  
    if isinstance(names, list):
        name = '_'.join(names)
        df[name] = df[names].apply(lambda r: '_'.join([str(_) for _ in r]), axis=1)
    else:
        name = names
    # hover_data = [hover_data] if isinstance(hover_data, str) else hover_data
    #fig = px.pie(df, values=values, names=name,
    #             title=title,
                 # hover_data=[hover_data]
                 #, labels={'lifeExp':'life expectancy'}
     #           )
    
    fig = go.Figure(data=[go.Pie(labels=df[name], values=df[values])])
    fig.update_traces(title_text=title, textposition='inside', textinfo='percent+label')
    fig.show()

In [None]:
def draw_sunburst(df, path, values, color=None):
    fig = px.sunburst(df, path=path, values=values, color=color)
    # fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.show()

# Home/Away advantage : pts won; goal scored; goal conceded

# TO DO : Pie Charts

In [None]:
home_away_goals_scored = hist_aggregator(df=liga_data, 
                                         column_to_describe='goals_scored',
                                         aggreg_column='play')
home_away_goals_scored

In [None]:
draw_pie_chart(df=home_away_goals_scored, values='cnt', 
               names=['play','goals_scored'],
               title='nb. of goals when playing home/away', 
               hover_data=['play','goals_scored'])

In [None]:
draw_sunburst(df=home_away_goals_scored,
             path=['play','goals_scored'],
             values='cnt')

In [None]:
draw_pie_chart(df=home_away_goals_scored, values='cnt', 
               names='goals_scored',
               title='nb. of goals', 
               hover_data=['play','goals_scored'])

In [None]:
home_pts = hist_aggregator(df=liga_data[liga_data.play=='Home'], 
                           column_to_describe='nb_points', 
                           aggreg_column='play')
home_pts

In [None]:
draw_pie_chart(df=home_pts, values='cnt', names='nb_points', title='Home performance', hover_data='nb_points')

## Leg on goals

In [None]:
leg_on_perf_at_home = hist_aggregator(df=liga_data[liga_data.play=='Home'],
                                      column_to_describe='nb_points', 
                                      aggreg_column='leg')
leg_on_perf_at_home

In [None]:
draw_pie_chart(df=deepcopy(leg_on_perf_at_home), values='cnt', 
               names=['leg'],
               title='Legs on ties when playing home', 
               hover_data=['leg',])

In [None]:
draw_sunburst(df=leg_on_perf_at_home,
             path=['leg', 'nb_points'],
             values='cnt')

In [None]:
leg_goals = hist_aggregator(df=liga_data, column_to_describe='goals_scored', aggreg_column='leg')
leg_goals

In [None]:
draw_sunburst(df=leg_goals,
             path=['leg', 'goals_scored'],
             values='cnt')

## Average all ongoing season goals scored / opponent avg goals conceded on number goals scored  --> Scatterplot with jitter + bins the avg by steps of 0.1 goals

In [None]:
# scatterplot on hist : change to hist bar
def draw_scatterplot(df, x, y, size_col, title):
    fig = px.scatter(df, x=x, y=y, size=size_col, title=title)
    fig.show()


# line on mean
def draw_line(df, x, y, title):
    fig = px.line(df, x=x, y=y, title=title)
    fig.show()

In [None]:
#'previous_opponent_avg_goals_conceded_since_season_start',
#'previous_team_avg_goals_score_since_season_start'

In [None]:
opponent_season_perf_on_goals_hist = hist_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_avg_goals_conceded_since_season_start',
                bin_step=.1)
opponent_season_perf_on_goals_hist


In [None]:
draw_scatterplot(df=opponent_season_perf_on_goals_hist,
                 x='previous_opponent_avg_goals_conceded_since_season_start_binned',
                 y='goals_scored',
                 size_col='cnt',
                 title='Opponent avg goals conceded since season start vs goals to be scored')

In [None]:
opponent_season_perf_on_goals_mean = mean_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_avg_goals_conceded_since_season_start',
                bin_step=.1)
opponent_season_perf_on_goals_mean

In [None]:
draw_line(df=opponent_season_perf_on_goals_mean,
          x='previous_opponent_avg_goals_conceded_since_season_start_binned',
          y='avg_goals_scored', 
          title='Opponent avg goals conceded since season start vs avg goals to be scored')

In [None]:
team_season_perf_on_goals_hist = hist_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_avg_goals_scored_since_season_start',
                bin_step=.1)
team_season_perf_on_goals_hist.head()

In [None]:
draw_scatterplot(df=team_season_perf_on_goals_hist, 
                 x='previous_team_avg_goals_scored_since_season_start_binned',
                 y='goals_scored',
                 size_col='cnt',
                 title='Team avg goals scored since season start vs goals to be scored')

In [None]:
team_season_perf_on_goals_mean = mean_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_avg_goals_scored_since_season_start',
                bin_step=.1)
team_season_perf_on_goals_mean.head()

In [None]:
draw_line(df=team_season_perf_on_goals_mean,
          x='previous_team_avg_goals_scored_since_season_start_binned', 
          y='avg_goals_scored',
          title='Team avg goals scored since season start vs avg goals to be scored')

## Average 5 last game goals scored / opponent avg goals conceded on number goals scored --> Scatterplot with jitter

In [None]:
#'previous_opponent_rolling_5_games_avg_goals_conceded',
#'previous_team_rolling_5_games_avg_goals_scored'

In [None]:
opponent_last5_perf_on_goals_hist = hist_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_rolling_5_games_avg_goals_conceded',
                bin_step=.1)
opponent_last5_perf_on_goals_hist

In [None]:
draw_scatterplot(df=opponent_last5_perf_on_goals_hist, 
                 x='previous_opponent_rolling_5_games_avg_goals_conceded_binned',
                 y='goals_scored',
                 size_col='cnt',
                 title='5 leg Avg on opponents goals conceded vs goals to be scored')

In [None]:
opponent_last5_perf_on_goals_mean = mean_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_rolling_5_games_avg_goals_conceded',
                bin_step=.1)
opponent_last5_perf_on_goals_mean

In [None]:
draw_line(df=opponent_last5_perf_on_goals_mean,
          x='previous_opponent_rolling_5_games_avg_goals_conceded_binned',
          y='avg_goals_scored',
          title='5 leg Avg on opponents goals conceded vs avg goals to be scored')

In [None]:
team_last5_perf_on_goals_hist = hist_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_rolling_5_games_avg_goals_scored',
                bin_step=.1)
team_last5_perf_on_goals_hist

In [None]:
draw_scatterplot(df=team_last5_perf_on_goals_hist,
                 x='previous_team_rolling_5_games_avg_goals_scored_binned',
                 y='goals_scored',
                 size_col='cnt',
                 title='5 leg Avg on Team goals scored vs goals to be scored')

In [None]:
team_last5_perf_on_goals_mean = mean_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_rolling_5_games_avg_goals_scored',
                bin_step=.1)
team_last5_perf_on_goals_mean

In [None]:
draw_line(df=team_last5_perf_on_goals_mean,
          x='previous_team_rolling_5_games_avg_goals_scored_binned',
          y='avg_goals_scored',
          title='5 leg Avg on Team goals scored vs avg goals to be scored')

# LAST GAME PERFORMANCE

##### Team

In [None]:
last_game_team_goals_scored_hist = hist_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_goals_scored',
                bin_step=None)
last_game_team_goals_scored_hist

In [None]:
draw_scatterplot(df=last_game_team_goals_scored_hist,
                 x='previous_team_goals_scored',
                 y='goals_scored',
                 size_col='cnt', 
                 title='Team previous game goals scored vs goals to be scored')

In [None]:
last_game_team_goals_scored_mean = mean_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_team_goals_scored',
                bin_step=None)
last_game_team_goals_scored_mean

In [None]:
draw_line(df=last_game_team_goals_scored_mean, 
          x='previous_team_goals_scored', 
          y='avg_goals_scored',
          title='Team previous game goals scored vs avg goals to be scored')

##### Opponent

In [None]:
last_game_opponent_goals_conceded_hist = hist_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_goals_conceded',
                bin_step=None)
last_game_opponent_goals_conceded_hist

In [None]:
draw_scatterplot(df=last_game_opponent_goals_conceded_hist,
                 x='previous_opponent_goals_conceded',
                 y='goals_scored',
                 size_col='cnt',
                 title='Opponent previous game goals conceded vs goals to be scored')

In [None]:
last_game_opponent_goals_conceded_mean = mean_aggregator(df=liga_data, 
                column_to_describe='goals_scored', 
                aggreg_column='previous_opponent_goals_conceded',
                bin_step=None)
last_game_opponent_goals_conceded_mean

In [None]:
draw_line(df=last_game_opponent_goals_conceded_mean,
          x='previous_opponent_goals_conceded',
          y='avg_goals_scored',
          title='Opponent previous game goals conceded vs avg goals to be scored')

# ANNEXE

In [None]:
def show_named_plotly_colours():
    """
    function to display to user the colours to match plotly's named
    css colours.

    Reference:
        #https://community.plotly.com/t/plotly-colours-list/11730/3

    Returns:
        plotly dataframe with cell colour to match named colour name

    """
    s='''
        aliceblue, antiquewhite, aqua, aquamarine, azure,
        beige, bisque, black, blanchedalmond, blue,
        blueviolet, brown, burlywood, cadetblue,
        chartreuse, chocolate, coral, cornflowerblue,
        cornsilk, crimson, cyan, darkblue, darkcyan,
        darkgoldenrod, darkgray, darkgrey, darkgreen,
        darkkhaki, darkmagenta, darkolivegreen, darkorange,
        darkorchid, darkred, darksalmon, darkseagreen,
        darkslateblue, darkslategray, darkslategrey,
        darkturquoise, darkviolet, deeppink, deepskyblue,
        dimgray, dimgrey, dodgerblue, firebrick,
        floralwhite, forestgreen, fuchsia, gainsboro,
        ghostwhite, gold, goldenrod, gray, grey, green,
        greenyellow, honeydew, hotpink, indianred, indigo,
        ivory, khaki, lavender, lavenderblush, lawngreen,
        lemonchiffon, lightblue, lightcoral, lightcyan,
        lightgoldenrodyellow, lightgray, lightgrey,
        lightgreen, lightpink, lightsalmon, lightseagreen,
        lightskyblue, lightslategray, lightslategrey,
        lightsteelblue, lightyellow, lime, limegreen,
        linen, magenta, maroon, mediumaquamarine,
        mediumblue, mediumorchid, mediumpurple,
        mediumseagreen, mediumslateblue, mediumspringgreen,
        mediumturquoise, mediumvioletred, midnightblue,
        mintcream, mistyrose, moccasin, navajowhite, navy,
        oldlace, olive, olivedrab, orange, orangered,
        orchid, palegoldenrod, palegreen, paleturquoise,
        palevioletred, papayawhip, peachpuff, peru, pink,
        plum, powderblue, purple, red, rosybrown,
        royalblue, saddlebrown, salmon, sandybrown,
        seagreen, seashell, sienna, silver, skyblue,
        slateblue, slategray, slategrey, snow, springgreen,
        steelblue, tan, teal, thistle, tomato, turquoise,
        violet, wheat, white, whitesmoke, yellow,
        yellowgreen
        '''
    li=s.split(',')
    li=[l.replace('\n','') for l in li]
    li=[l.replace(' ','') for l in li]

    import pandas as pd
    import plotly.graph_objects as go

    df=pd.DataFrame.from_dict({'colour': li})
    fig_col = go.Figure(data=[go.Table(
      header=dict(
        values=["Plotly Named CSS colours"],
        line_color='black', fill_color='white',
        align='center', font=dict(color='black', size=14)
      ),
      cells=dict(
        values=[df.colour],
        line_color=[df.colour], fill_color=[df.colour],
        align='center', font=dict(color='black', size=11)
      ))
    ])

    fig_col.show()

In [None]:
show_named_plotly_colours()

In [None]:
color_2_position = {1: "royalblue",
                   2: "aqua",
                   3: "cornflowerblue",
                   4: "chartreuse",
                   5: "yellowgreen",
                   6: "green",
                   7: "teal",
                   8: "purple",
                   9: "mediumturquoise",
                   10: "plum",
                   11: "khaki",
                   12: "goldenrod",
                   13: "yellow",
                   14: "chocolate",
                   15: "lightpink",
                   16: "hotpink",
                   17: "lightsalmon",
                   18: "orange",
                   19: "orangered",
                   20: "red"}