# Atalanta 2010-2020 from Serie B to Champions

This notebook uses data from the [https://www.football-data.co.uk]

In [4]:
import pandas as pd
import numpy as np
import datetime
import collections

import plotly.graph_objects as go
from plotly.subplots import make_subplots

start_year=11
end_year=18
seasons = []
for year in range(start_year, end_year+1):
    seasons.append(str(year)+str(year+1))

def geturl(var):
    return f"https://www.football-data.co.uk/mmz4281/{var}/I1.csv"


# Div = League Division
# Date = Match Date (dd/mm/yy)
# HomeTeam = Home Team
# AwayTeam = Away Team
# FTR and Res = Full Time Result (H=Home Win, D=Draw, A=Away Win)
base_cols=['Date', 'HomeTeam', 'AwayTeam', 'FTR']

# Match Statistics (where available)

# FTHG and HG = Full Time Home Team Goals
# HS = Home Team Shots
# HST = Home Team Shots on Target
# HHW = Home Team Hit Woodwork (NOT AVAILABLE)
# HC = Home Team Corners
# HF = Home Team Fouls Committed
# HFKC = Home Team Free Kicks Conceded (NOT AVAILABLE)
# HO = Home Team Offsides (NOT AVAILABLE)
# HY = Home Team Yellow Cards
# HR = Home Team Red Cards
home_cols=['FTHG','HS','HST','HF','HC','HY','HR']

# FTAG and AG = Full Time Away Team Goals
# AS = Away Team Shots
# AST = Away Team Shots on Target
# AHW = Away Team Hit Woodwork (NOT AVAILABLE)
# AC = Away Team Corners
# AF = Away Team Fouls Committed
# AFKC = Away Team Free Kicks Conceded (NOT AVAILABLE)
# AO = Away Team Offsides (NOT AVAILABLE)
# AY = Away Team Yellow Cards
# AR = Away Team Red Cards
away_cols =['FTAG','AS','AST','AF','AC','AY','AR']
use_cols = base_cols + home_cols + away_cols

#point_map_home = 
#point_map_away = {'H':0, 'D':1, 'A':3}

li = []
for season in seasons:
    #df = pd.read_csv(geturl(season), index_col='Date', parse_dates=['Date']) 
    df = pd.read_csv(geturl(season), parse_dates=['Date'], usecols=use_cols)     
    df['Season'] = season[0:2] + '/' + season[2:]
    df['HomePoint'] = df['FTR'].map({'H':3, 'D':1, 'A':0})
    df['AwayPoint'] = df['FTR'].map({'H':0, 'D':1, 'A':3})
    df['GoalBalance'] = 0
    li.append(df)

df = pd.concat(li, sort=False)
df.set_index('Season', inplace=True)
df

Unnamed: 0_level_0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HS,AS,HST,AST,...,AF,HC,AC,HY,AY,HR,AR,HomePoint,AwayPoint,GoalBalance
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11/12,2011-09-09,Milan,Lazio,2.0,2.0,D,18.0,12.0,8.0,5.0,...,15.0,3.0,1.0,2.0,2.0,0.0,0.0,1.0,1.0,0
11/12,2011-10-09,Cesena,Napoli,1.0,3.0,A,11.0,18.0,3.0,6.0,...,12.0,4.0,6.0,2.0,3.0,1.0,0.0,0.0,3.0,0
11/12,2011-11-09,Catania,Siena,0.0,0.0,D,9.0,4.0,1.0,2.0,...,25.0,4.0,4.0,2.0,4.0,0.0,0.0,1.0,1.0,0
11/12,2011-11-09,Chievo,Novara,2.0,2.0,D,11.0,14.0,4.0,4.0,...,17.0,0.0,6.0,1.0,1.0,1.0,0.0,1.0,1.0,0
11/12,2011-11-09,Fiorentina,Bologna,2.0,0.0,H,15.0,14.0,7.0,2.0,...,12.0,12.0,1.0,1.0,1.0,0.0,0.0,3.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18/19,2019-05-26,Inter,Empoli,2.0,1.0,H,20.0,9.0,15.0,5.0,...,9.0,8.0,2.0,4.0,2.0,1.0,1.0,3.0,0.0,0
18/19,2019-05-26,Roma,Parma,2.0,1.0,H,16.0,9.0,8.0,5.0,...,8.0,13.0,8.0,2.0,1.0,0.0,0.0,3.0,0.0,0
18/19,2019-05-26,Sampdoria,Juventus,2.0,0.0,H,10.0,6.0,3.0,1.0,...,12.0,7.0,6.0,0.0,2.0,0.0,0.0,3.0,0.0,0
18/19,2019-05-26,Spal,Milan,2.0,3.0,A,7.0,16.0,4.0,8.0,...,13.0,8.0,4.0,2.0,2.0,0.0,0.0,0.0,3.0,0


In [9]:
TeamData = collections.namedtuple('TeamData', 'my_home_stats, vs_home_stats, my_away_stats, vs_away_stats, my_tot_stats, vs_tot_stats')

def normalize_df(df, cols):
    norm_cols = ['Goal', 'Shots', 'ShotsTarget', 'Corner', 'Fouls', 'YellowCard', 'RedCard','Point', 'GoalBalance']
    stats_df = df.groupby(df.index).sum()[cols]
    stats_df.columns = norm_cols
    return stats_df

def build_team_data(team, df, home_cols, away_cols):
    homedf = df[(df.loc[:,"HomeTeam"] == team)]
    awaydf = df[(df.loc[:,"AwayTeam"] == team)]
    my_home_stats = normalize_df(homedf, home_cols);
    vs_home_stats = normalize_df(homedf, away_cols);
    my_away_stats = normalize_df(awaydf, away_cols);
    vs_away_stats = normalize_df(homedf, home_cols);
    my_tot_stats = my_home_stats + my_away_stats 
    vs_tot_stats = vs_home_stats + vs_away_stats 
    my_tot_stats['GoalBalance'] = my_tot_stats['Goal'] - vs_tot_stats['Goal']
    return TeamData(my_home_stats, vs_home_stats, my_away_stats, vs_away_stats, my_tot_stats, vs_tot_stats)

teams = set(df['HomeTeam'])
teams = list(filter(lambda i:(type(i) is str), teams))

teams_dic={}
hcols = home_cols + ['HomePoint', 'GoalBalance']
acols = home_cols + ['AwayPoint', 'GoalBalance']

for team in teams:
    teams_dic[team] = build_team_data(team, df, hcols, acols)

## Visualization

In [None]:
def compare_histograms(a, b, col, a_name, b_name):
    data = [go.Bar(x=a.index, y=a[col], opacity=0.4, name=a_name),
            go.Bar(x=b.index, y=b[col], opacity=0.4, width=0.5, name=b_name)]
    layout = go.Layout(barmode='overlay')
    fig = go.Figure(data, layout)
    return fig

In [18]:
# specs=[[{'colspan': 2},None], [{'colspan': 2}, {}], [{}, {}], [{}, {}], [{}, {}], [{}, {}] ]
t = teams_dic['Atalanta']

data = {
    'Points': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Point'], name='Points'),
    'Goal balance': go.Bar(x = t.my_tot_stats.index, y = t.my_tot_stats['GoalBalance'], name='Goal Balance', opacity=0.5),
    'Goal': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Goal'], name='Goals'),
    'Home Goal': go.Bar(x = t.my_home_stats.index, y = t.my_home_stats['Goal'], name='Home Goal', opacity=0.5, width=0.05),
    'Away Goal': go.Bar(x = t.my_away_stats.index, y = t.my_away_stats['Goal'], name='Away Goal', opacity=0.5, width=0.05),
    'Concedes': go.Scatter(x = t.vs_tot_stats.index, y = t.vs_tot_stats['Goal'], name='Goal Concedes'),
    'Home Concedes': go.Bar(x = t.vs_home_stats.index, y = t.vs_home_stats['Goal'], name='Home Concedes', opacity=0.5, width=0.05),
    'Away Concedes': go.Bar(x = t.vs_away_stats.index, y = t.vs_away_stats['Goal'], name='Away Concedes', opacity=0.5, width=0.05),
    'Shots': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Shots'], name='Shots'),
    'On Target': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['ShotsTarget'], name='Shots on target'), 
    'Fouls': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Fouls'], name='Fouls'),
    'Corners': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Corner'], name='Corner'), 
    'Yellow Cards': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['YellowCard'], name='Yellow Card'), 
    'Red Cards': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['RedCard'], name='Red Card') #, showlegend=False), 
}

def addTrace(fig, trace, title, row, col=1):
    fig.add_trace(trace, row=row, col=col)
    fig.update_yaxes(title_text=title, row=row, col=col)
    
#fig = make_subplots(shared_xaxes=True, rows=10, cols=1, subplot_titles=list(traces.keys()))
fig = make_subplots(shared_xaxes=True, rows=10, cols=1)

addTrace(fig, data['Points'], 'Points', 1)
addTrace(fig, data['Goal balance'], 'Goal balance', 2)
addTrace(fig, data['Goal'], 'Goal', 3)
addTrace(fig, data['Home Goal'], 'Goal', 3)
addTrace(fig, data['Away Goal'], 'Goal', 3)
addTrace(fig, data['Concedes'], 'Concedes', 4)
addTrace(fig, data['Home Concedes'], 'Concedes', 4)
addTrace(fig, data['Away Concedes'], 'Concedes', 4)
addTrace(fig, data['Shots'], 'Shots', 5)
addTrace(fig, data['On Target'], 'On Target', 6)
addTrace(fig, data['Fouls'], 'Fouls', 7)
addTrace(fig, data['Corners'], 'Corners', 8)
addTrace(fig, data['Yellow Cards'], 'Yellow Cards', 9)
addTrace(fig, data['Red Cards'], 'Red Cards', 10)

fig.update_traces(showlegend=False)
fig.update_layout(title_text="Game analysis by Season", height=1400, barmode='stack')
fig.show()


In [20]:
data = [
    go.Bar(x = t.my_tot_stats.index, y = t.my_tot_stats['Goal'], opacity=0.5, name='total goals'),
    go.Bar(x = t.vs_tot_stats.index, y = t.vs_tot_stats['Goal'], opacity=0.5, name='total concedes')]
layout = {
    'title': 'Goals vs Concedes',
    'xaxis': { 'title': 'Season' },
    'yaxis': { 'title': 'Goals' }
}
fig = go.Figure(data, layout)
fig.show()

In [22]:
# TEST SCATTER FTAG
#fig = go.Figure(data=go.Scatter(x=df['FTHG'], y=df['HomePoint'], mode='markers'))
#fig = go.Figure(data=go.Box(y=my_tot_stats['GoalBalance'], x=my_tot_stats['Point']))
data = []
for k,t in teams_dic.items():
    data.append(go.Scatter(y = t.my_tot_stats['GoalBalance'], x = t.my_tot_stats['Point'], mode='markers'))

fig.update_traces(showlegend=False)
fig = go.Figure(data)
fig.show()

