# Atalanta 2010-2020 from Serie B to Champions

This notebook uses data from the [https://www.football-data.co.uk]
The 

In [None]:
import pandas as pd
import numpy as np
import datetime
import collections

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Define the LEAGUE, TEAM and the PERIOD of analysis (start/end year)
league='I1'
selected_team = 'Atalanta'
start_year=11
end_year=18

seasons = []
for year in range(start_year, end_year+1):
    seasons.append(str(year)+str(year+1))

def geturl(season, league):
    return f"https://www.football-data.co.uk/mmz4281/{season}/{league}.csv"


# Div = League Division
# Date = Match Date (dd/mm/yy)
# HomeTeam = Home Team
# AwayTeam = Away Team
# FTR and Res = Full Time Result (H=Home Win, D=Draw, A=Away Win)
base_cols=['Date', 'HomeTeam', 'AwayTeam', 'FTR']

# Match Statistics (where available)

# FTHG and HG = Full Time Home Team Goals
# HS = Home Team Shots
# HST = Home Team Shots on Target
# HHW = Home Team Hit Woodwork (NOT AVAILABLE)
# HC = Home Team Corners
# HF = Home Team Fouls Committed
# HFKC = Home Team Free Kicks Conceded (NOT AVAILABLE)
# HO = Home Team Offsides (NOT AVAILABLE)
# HY = Home Team Yellow Cards
# HR = Home Team Red Cards
home_cols=['FTHG','HS','HST','HF','HC','HY','HR']

# FTAG and AG = Full Time Away Team Goals
# AS = Away Team Shots
# AST = Away Team Shots on Target
# AHW = Away Team Hit Woodwork (NOT AVAILABLE)
# AC = Away Team Corners
# AF = Away Team Fouls Committed
# AFKC = Away Team Free Kicks Conceded (NOT AVAILABLE)
# AO = Away Team Offsides (NOT AVAILABLE)
# AY = Away Team Yellow Cards
# AR = Away Team Red Cards
away_cols =['FTAG','AS','AST','AF','AC','AY','AR']
use_cols = base_cols + home_cols + away_cols

#point_map_home = 
#point_map_away = {'H':0, 'D':1, 'A':3}

seasons_df = []
for season in seasons:
    #df = pd.read_csv(geturl(season), index_col='Date', parse_dates=['Date']) 
    season_df = pd.read_csv(geturl(season, league), parse_dates=['Date'], usecols=use_cols)     
    season_df['Season'] = season[0:2] + '/' + season[2:]
    season_df['HomePoint'] = season_df['FTR'].map({'H':3, 'D':1, 'A':0})
    season_df['AwayPoint'] = season_df['FTR'].map({'H':0, 'D':1, 'A':3})
    season_df['GDiff'] = 0
    seasons_df.append(season_df)

fulldf = pd.concat(seasons_df, sort=False)
fulldf.set_index('Season', inplace=True)

## Full dataset

In [None]:
fulldf

In [None]:
# utility functions
TeamData = collections.namedtuple('TeamData', 'my_normalized_df, my_home_stats, vs_home_stats, my_away_stats, vs_away_stats, my_tot_stats, vs_tot_stats')
norm_cols = ['Goal', 'Shots', 'ShotsTarget', 'Corner', 'Fouls', 'YellowCard', 'RedCard','Point', 'GDiff']

def normalize_df(adf, cols):
    stats_df = adf.loc[:,cols]
    stats_df.columns = norm_cols
    return stats_df

def grouped_df(adf, cols):
    stats_df = adf.groupby(adf.index).sum()[cols]
    stats_df.columns = norm_cols
    return stats_df

def build_team_data(team, df, home_cols, away_cols):
    # filter the entire dataset keeping only the rows where appear my team (home and away matches)
    homedf = df[(df.loc[:,"HomeTeam"] == team)]
    awaydf = df[(df.loc[:,"AwayTeam"] == team)]

    my_normalized_df = pd.concat([normalize_df(homedf, home_cols), normalize_df(awaydf, away_cols)], sort=False)

    # Modify the HOME DataFrame assigning to MY team the HOME statistics 
    # ('FTHG', 'HS', ...) and rename these columns as Standard columns ('Goal', 'Shots', ...)
    my_home_stats = grouped_df(homedf, home_cols)
    
    # Modify the HOME DataFrame assigning to the VS team the AWAY statistics 
    vs_home_stats = grouped_df(homedf, away_cols)
    
    # Modify the AWAY DataFrame assigning to MY team the AWAY statistics 
    my_away_stats = grouped_df(awaydf, away_cols)
    
    # Modify the AWAY DataFrame assigning to the VS team the HOME statistics 
    vs_away_stats = grouped_df(awaydf, home_cols)
    
    # sum MY home stats and away stats
    my_tot_stats = my_home_stats + my_away_stats 
    
    # sum VS home stats and away stats
    vs_tot_stats = vs_home_stats + vs_away_stats 
    
    # update the GDiff column in my_tot_stas as the difference of MY goals and VS goals
    my_tot_stats['GDiff'] = my_tot_stats['Goal'] - vs_tot_stats['Goal']
    
    data = TeamData(my_normalized_df, my_home_stats, vs_home_stats, my_away_stats, vs_away_stats, my_tot_stats, vs_tot_stats)
    return data

In [None]:
# get unique team name from the full data frame 
teams = set(fulldf['HomeTeam'])
teams = list(filter(lambda i:(type(i) is str), teams))

teams_dic={}
hcols = home_cols + ['HomePoint', 'GDiff']
acols = away_cols + ['AwayPoint', 'GDiff']


for team in teams:
    teams_dic[team] = build_team_data(team, fulldf, hcols, acols)        

## Selected Team Total Stats

In [None]:
teams_dic[selected_team].my_tot_stats

In [None]:
teams_dic[selected_team].my_normalized_df

## Visualization

In [None]:
# utility functions

def compare_histograms(a, b, col, a_name, b_name):
    data = [go.Bar(x=a.index, y=a[col], opacity=0.4, name=a_name),
            go.Bar(x=b.index, y=b[col], opacity=0.4, width=0.5, name=b_name)]
    layout = go.Layout(barmode='overlay')
    fig = go.Figure(data, layout)
    return fig

def addTrace(fig, trace, title, row, col=1):
    fig.add_trace(trace, row=row, col=col)
    fig.update_yaxes(title_text=title, row=row, col=col)


In [None]:
# specs=[[{'colspan': 2},None], [{'colspan': 2}, {}], [{}, {}], [{}, {}], [{}, {}], [{}, {}] ]
t = teams_dic[selected_team]
colors = ['rgba(255, 0, 0, 0.4)',] * 5
colors += (['rgba(0, 0, 255, 0.4)'] * 3)

norm = t.my_normalized_df
norm['ShotsTarget%'] = norm['ShotsTarget'].values / norm['Shots'].values

data = {
    'Points': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Point'], name='Points', showlegend=False),
    'Goal balance': go.Bar(x = t.my_tot_stats.index, y = t.my_tot_stats['GDiff'], name='Goal Difference', marker_color=colors, showlegend=False),
    'Concedes': go.Bar(x = t.vs_tot_stats.index, y = t.vs_tot_stats['Goal'], opacity=0.5, name='Goal Concedes', marker_color='rgba(255, 0, 0, 0.4)', showlegend=True),
    'Goal': go.Bar(x = t.my_tot_stats.index, y = t.my_tot_stats['Goal'], opacity=0.5, name='Goals', marker_color='rgba(0, 0, 255, 0.4)', showlegend=True),
    'Home Goal': go.Bar(x = t.my_home_stats.index, y = t.my_home_stats['Goal'], name='Home Goal', opacity=0.5, width=0.2, showlegend=False),
    'Away Goal': go.Bar(x = t.my_away_stats.index, y = t.my_away_stats['Goal'], name='Away Goal', opacity=0.5, width=0.2, showlegend=False),
    'Home Concedes': go.Bar(x = t.vs_home_stats.index, y = t.vs_home_stats['Goal'], name='Home Concedes', opacity=0.5, width=0.05, showlegend=False),
    'Away Concedes': go.Bar(x = t.vs_away_stats.index, y = t.vs_away_stats['Goal'], name='Away Concedes', opacity=0.5, width=0.05, showlegend=False),
    'OnTarget%': go.Box(x = norm.index, y = norm['ShotsTarget%'], opacity=0.6, name='target %', showlegend=False),
    'Shots': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Shots'], name='Shots', showlegend=False),
    'On Target': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['ShotsTarget'], name='Shots on target', showlegend=False), 
    'Fouls': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Fouls'], name='Fouls', showlegend=False),
    'Corners': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Corner'], name='Corner', showlegend=False), 
    'Yellow Cards': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['YellowCard'], name='Yellow Card', showlegend=False), 
    'Red Cards': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['RedCard'], name='Red Card', showlegend=False)
}

In [None]:
fig = make_subplots(shared_xaxes=True, rows=3, cols=1, vertical_spacing=0.04)

addTrace(fig, data['Goal'], 'Goal', 1)
addTrace(fig, data['Concedes'], 'Goal', 1)
addTrace(fig, data['Goal balance'], 'Balance', 2)
addTrace(fig, data['Points'], 'Points', 3)

#fig.update_traces(showlegend=False)
fig.add_annotation(x='11/12', y=52, text="12th")
fig.add_annotation(x='12/13', y=42, text="15th")
fig.add_annotation(x='13/14', y=50, text="11th")
fig.add_annotation(x='14/15', y=37, text="17th")
fig.add_annotation(x='15/16', y=45, text="13th")
fig.add_annotation(x='16/17', y=72, text="<b>4th</b>")
fig.add_annotation(x='17/18', y=60, text="<b>7th</b>")
fig.add_annotation(x='18/19', y=69, text="<b>3rd</b>")
fig.update_annotations(dict(
            xref='x3', yref='y3', 
            ax=0, ay=-20,
            showarrow=True,
            arrowsize = .5,
            arrowwidth = .5,
            arrowhead = 0,
            font = dict(size = 9)
            ))

fig.update_xaxes(title_text="Season", row=3, col=1)
fig.update_layout(title_text=f"Positive Goal balance in last 3 seasons ({selected_team})", height=600)
fig.show()


In [None]:
fig = make_subplots(shared_xaxes=True, rows=4, cols=1, x_title='Season', vertical_spacing=0.04, row_width=[0.2, 0.2, 0.2, 0.4])

#addTrace(fig, data['Home Goal'], 'Goal', 1)
#addTrace(fig, data['Away Goal'], 'Goal', 1)
addTrace(fig, data['OnTarget%'], 'On target %', 1)
addTrace(fig, data['On Target'], 'On Target', 2)
addTrace(fig, data['Shots'], 'Shots', 3)
addTrace(fig, data['Goal'], 'Goal', 4)


fig.update_traces(showlegend=False)
fig.update_layout(title_text=f"Improved shooting precision in last 3 seaons ({selected_team})", barmode='stack', height=600)
fig.show()

In [None]:
fig = make_subplots(shared_xaxes=True, rows=3, cols=1, x_title='Season', vertical_spacing=0.04)

addTrace(fig, data['Fouls'], 'Fouls Committed', 1)
addTrace(fig, data['Yellow Cards'], 'Yellow Cards', 2)
addTrace(fig, data['Red Cards'], 'Red Cards', 3)
#addTrace(fig, data['Corners'], 'Corners', 2)

fig.update_traces(showlegend=False)
fig.update_layout(title_text=f"Increased number of commited fouls ({selected_team})", barmode='stack', height=600)
fig.show()


In [None]:
# Check correlation between Goals and Points using all teams from the entire dataset

fig = make_subplots(shared_xaxes=False, rows=2, cols=1, vertical_spacing=0.04)

data = {'Goal Difference':[], 'Total Goal':[], 'Total Concedes':[]}
for k,t in teams_dic.items():
    labels = [f'{k} {season}' for season in t.my_tot_stats.index]

    if(k == selected_team):
        marker = dict(color='rgba(255, 0, 0, 1)', size=7, line_width=1) 
        mode = 'markers'
        showlegend = True
        name = selected_team
    else:
        marker = dict(color='rgba(80, 80, 70, .3)')
        mode = 'markers'
        showlegend = False
        name = ""
        
    data['Goal Difference'].append(go.Scatter(y = t.my_tot_stats['GDiff'], x = t.my_tot_stats['Point'], name=name, 
                             mode=mode, text=labels, marker=marker, showlegend=showlegend))
    data['Total Goal'].append(go.Scatter(y = t.my_tot_stats['Goal'], x = t.my_tot_stats['Point'], 
                             mode=mode, text=labels, marker=marker, showlegend=False))
    #data['Total Concedes'].append(go.Scatter(y = t.vs_tot_stats['Goal'], x = t.my_tot_stats['Point'], 
    #                         mode=mode, text=labels, marker=marker, showlegend=False))
    
for i, k in enumerate(data):
    for g in data[k]:
        fig.add_trace(g, row=i+1, col=1)
        fig.update_yaxes(title_text=k, row=i+1, col=1)
        #fig.update_xaxes(title_text='Points', row=i+1, col=1)

#fig.update_traces(showlegend=False)

fig.add_annotation(x=52, y=-2, text="11/12<br>(12th)", ax=-40, ay=-40, xref='x1', yref='y1')
fig.add_annotation(x=42, y=-17, text="12/13<br>(15th)", ax=40, ay=40, xref='x1', yref='y1')
fig.add_annotation(x=50, y=-8, text="13/14<br>(11th)", ax=40, ay=40, xref='x1', yref='y1')
fig.add_annotation(x=37, y=-19, text="14/15<br>(17th)", ax=-40, ay=-40, xref='x1', yref='y1')
fig.add_annotation(x=45, y=-6, text="15/16<br>(13th)", ax=-40, ay=-40, xref='x1', yref='y1')
fig.add_annotation(x=72, y=21, text="16/17<br>(4th)", ax=40, ay=40, xref='x1', yref='y1')
fig.add_annotation(x=60, y=18, text="17/18<br>(7th)", ax=-40, ay=-40, xref='x1', yref='y1')
fig.add_annotation(x=69, y=31, text="18/19<br>(<b>3rd</b>)", ax=-40, ay=-40, xref='x1', yref='y1')

fig.add_annotation(x=102, y=57, text="Juventus<br>13/14 (1st)", ax=0, ay=40, xref='x1', yref='y1')
#fig.add_annotation(x=22, y=-57, text="Pescara<br>12/13 (20th)", ax=60, ay=0, xref='x1', yref='y1')

#fig.add_annotation(x=18, y=37, text="Pescara<br>16/17<br>(20th)", ax=0, ay=-40, xref='x2', yref='y2')
fig.add_annotation(ax=-60, ay=-40, xref='x2', yref='y2', x=37, y=38, text="14/15<br>(17th)")
fig.add_annotation(ax=40, ay=40, xref='x2', yref='y2', x=52, y=41, text="11/12<br>(12th)<br><b>Back to Serie A</b>")
fig.add_annotation(ax=40, ay=40, xref='x2', yref='y2', x=72, y=62, text="16/17<br>(4th)<br>New Coach<br><b>Mr Gasperini</b>")
fig.add_annotation(ax=-60, ay=-60, xref='x2', yref='y2', x=45, y=41, text="15/16<br>(13th)")
fig.add_annotation(ax=-40, ay=-40, xref='x2', yref='y2', x=60, y=57, text="17/18<br>(7th)")
fig.add_annotation(ax=-40, ay=-40, xref='x2', yref='y2', x=69, y=77, text="18/19<br>(3rd)")
fig.add_annotation(ax=80, ay=0, xref='x2', yref='y2', x=86, y=94, text="Napoli<br>16/17 (3rd)")

fig.update_annotations(dict(
            showarrow=True,
            arrowsize = .5,
            arrowwidth = .5,
            arrowhead = 0,
            font = dict(size = 9)
            ))

fig.update_layout(title_text=f"Improvement in last 3 seasons (2016 - 2019)", height=1000)
fig.show()


In [None]:
# TODO: remove subplots
fig = make_subplots(shared_xaxes=False, rows=1, cols=1)

graphs = []

# for each team
for k,t in teams_dic.items():
    labels = [f'{k} {season}' for season in t.my_tot_stats.index]

    if(k == selected_team):
        marker = dict(color='rgba(255, 0, 0, 1)', size=7, line_width=1) 
        mode = 'markers'
    else:
        marker = dict(color='rgba(80, 80, 70, .3)')
        mode = 'markers'
        
    graphs.append(go.Scatter(y = t.vs_tot_stats['Goal'], x = t.my_tot_stats['Goal'], name="", 
                              mode=mode, text=labels, marker=marker, showlegend=False))
    
for g in graphs:
    fig.add_trace(g, row=1, col=1)
    fig.update_yaxes(title_text='Concedes', row=1, col=1)
    fig.update_xaxes(title_text='Goal', row=1, col=1)

fig.add_annotation(x=27, y=84, text="Pescara <br>2012/2013", ay = -50)
fig.add_annotation(x=77, y=46, text="Atalanta <br>2018/2019", ay = -50)
fig.add_annotation(x=94, y=39, text="Napoli <br>2016/2017", ay = -50)

fig.update_annotations(dict(
            xref="x",
            yref="y",
            showarrow=True,
            ax = 0, 
            arrowsize = 1,
            arrowwidth = 1,
            arrowhead = 1,
            font = dict(size = 12)
            ))

fig.update_layout(title_text=f"Correlation Concedes/Goal")
fig.show()

In [None]:
#np.corrcoef(x,y)[0,1]:.2f

In [None]:
#teams_dic.items()