# Atalanta 2010-2020 from Serie B to Champions

This notebook uses data from the [https://www.football-data.co.uk]
The 

In [None]:
import pandas as pd
import numpy as np
import datetime
import collections

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Define the LEAGUE, TEAM and the PERIOD of analysis (start/end year)
league='I1'
selected_team = 'Atalanta'
start_year=11
end_year=18

seasons = []
for year in range(start_year, end_year+1):
    seasons.append(str(year)+str(year+1))

def geturl(season, league):
    return f"https://www.football-data.co.uk/mmz4281/{season}/{league}.csv"


# Div = League Division
# Date = Match Date (dd/mm/yy)
# HomeTeam = Home Team
# AwayTeam = Away Team
# FTR and Res = Full Time Result (H=Home Win, D=Draw, A=Away Win)
base_cols=['Date', 'HomeTeam', 'AwayTeam', 'FTR']

# Match Statistics (where available)

# FTHG and HG = Full Time Home Team Goals
# HS = Home Team Shots
# HST = Home Team Shots on Target
# HHW = Home Team Hit Woodwork (NOT AVAILABLE)
# HC = Home Team Corners
# HF = Home Team Fouls Committed
# HFKC = Home Team Free Kicks Conceded (NOT AVAILABLE)
# HO = Home Team Offsides (NOT AVAILABLE)
# HY = Home Team Yellow Cards
# HR = Home Team Red Cards
home_cols=['FTHG','HS','HST','HF','HC','HY','HR']

# FTAG and AG = Full Time Away Team Goals
# AS = Away Team Shots
# AST = Away Team Shots on Target
# AHW = Away Team Hit Woodwork (NOT AVAILABLE)
# AC = Away Team Corners
# AF = Away Team Fouls Committed
# AFKC = Away Team Free Kicks Conceded (NOT AVAILABLE)
# AO = Away Team Offsides (NOT AVAILABLE)
# AY = Away Team Yellow Cards
# AR = Away Team Red Cards
away_cols =['FTAG','AS','AST','AF','AC','AY','AR']
use_cols = base_cols + home_cols + away_cols

#point_map_home = 
#point_map_away = {'H':0, 'D':1, 'A':3}

li = []
for season in seasons:
    #df = pd.read_csv(geturl(season), index_col='Date', parse_dates=['Date']) 
    fulldf = pd.read_csv(geturl(season, league), parse_dates=['Date'], usecols=use_cols)     
    fulldf['Season'] = season[0:2] + '/' + season[2:]
    fulldf['HomePoint'] = fulldf['FTR'].map({'H':3, 'D':1, 'A':0})
    fulldf['AwayPoint'] = fulldf['FTR'].map({'H':0, 'D':1, 'A':3})
    fulldf['GDiff'] = 0
    li.append(fulldf)

fulldf = pd.concat(li, sort=False)
fulldf.set_index('Season', inplace=True)

## Full dataset

In [None]:
fulldf

In [None]:
# utility functions
TeamData = collections.namedtuple('TeamData', 'my_home_stats, vs_home_stats, my_away_stats, vs_away_stats, my_tot_stats, vs_tot_stats')

def normalize_df(adf, cols):
    norm_cols = ['Goal', 'Shots', 'ShotsTarget', 'Corner', 'Fouls', 'YellowCard', 'RedCard','Point', 'GDiff']
    stats_df = adf.groupby(adf.index).sum()[cols]
    stats_df.columns = norm_cols
    return stats_df

def build_team_data(team, df, home_cols, away_cols):
    # filter the entire dataset keeping only the rows where appear my team (home and away matches)
    homedf = df[(df.loc[:,"HomeTeam"] == team)]
    awaydf = df[(df.loc[:,"AwayTeam"] == team)]

    # Modify the HOME DataFrame assigning to MY team the HOME statistics 
    # ('FTHG', 'HS', ...) and rename these columns as Standard columns ('Goal', 'Shots', ...)
    my_home_stats = normalize_df(homedf, home_cols)
    
    # Modify the HOME DataFrame assigning to the VS team the AWAY statistics 
    vs_home_stats = normalize_df(homedf, away_cols)
    
    # Modify the AWAY DataFrame assigning to MY team the AWAY statistics 
    my_away_stats = normalize_df(awaydf, away_cols)
    
    # Modify the AWAY DataFrame assigning to the VS team the HOME statistics 
    vs_away_stats = normalize_df(awaydf, home_cols)
    
    # sum MY home stats and away stats
    my_tot_stats = my_home_stats + my_away_stats 
    
    # sum VS home stats and away stats
    vs_tot_stats = vs_home_stats + vs_away_stats 
    
    # update the GDiff column in my_tot_stas as the difference of MY goals and VS goals
    my_tot_stats['GDiff'] = my_tot_stats['Goal'] - vs_tot_stats['Goal']
    
    data = TeamData(my_home_stats, vs_home_stats, my_away_stats, vs_away_stats, my_tot_stats, vs_tot_stats)
    return data

In [None]:
# get unique team name from the full data frame 
teams = set(fulldf['HomeTeam'])
teams = list(filter(lambda i:(type(i) is str), teams))

teams_dic={}
hcols = home_cols + ['HomePoint', 'GDiff']
acols = away_cols + ['AwayPoint', 'GDiff']

for team in teams:
    teams_dic[team] = build_team_data(team, fulldf, hcols, acols)    

## Selected Team Total Stats

In [None]:
res = teams_dic[selected_team].my_tot_stats
res

## Visualization

In [None]:
# utility functions

def compare_histograms(a, b, col, a_name, b_name):
    data = [go.Bar(x=a.index, y=a[col], opacity=0.4, name=a_name),
            go.Bar(x=b.index, y=b[col], opacity=0.4, width=0.5, name=b_name)]
    layout = go.Layout(barmode='overlay')
    fig = go.Figure(data, layout)
    return fig

def addTrace(fig, trace, title, row, col=1):
    fig.add_trace(trace, row=row, col=col)
    fig.update_yaxes(title_text=title, row=row, col=col)


In [None]:
# specs=[[{'colspan': 2},None], [{'colspan': 2}, {}], [{}, {}], [{}, {}], [{}, {}], [{}, {}] ]
t = teams_dic[selected_team]

data = {
    'Points': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Point'], name='Points'),
    'Goal balance': go.Bar(x = t.my_tot_stats.index, y = t.my_tot_stats['GDiff'], name='Goal Difference', opacity=0.5),
    'Goal': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Goal'], name='Goals'),
    'Home Goal': go.Bar(x = t.my_home_stats.index, y = t.my_home_stats['Goal'], name='Home Goal', opacity=0.5, width=0.05),
    'Away Goal': go.Bar(x = t.my_away_stats.index, y = t.my_away_stats['Goal'], name='Away Goal', opacity=0.5, width=0.05),
    'Concedes': go.Scatter(x = t.vs_tot_stats.index, y = t.vs_tot_stats['Goal'], name='Goal Concedes'),
    'Home Concedes': go.Bar(x = t.vs_home_stats.index, y = t.vs_home_stats['Goal'], name='Home Concedes', opacity=0.5, width=0.05),
    'Away Concedes': go.Bar(x = t.vs_away_stats.index, y = t.vs_away_stats['Goal'], name='Away Concedes', opacity=0.5, width=0.05),
    'Shots': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Shots'], name='Shots'),
    'On Target': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['ShotsTarget'], name='Shots on target'), 
    'Fouls': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Fouls'], name='Fouls'),
    'Corners': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['Corner'], name='Corner'), 
    'Yellow Cards': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['YellowCard'], name='Yellow Card'), 
    'Red Cards': go.Scatter(x = t.my_tot_stats.index, y = t.my_tot_stats['RedCard'], name='Red Card') #, showlegend=False), 
}

   
#fig = make_subplots(shared_xaxes=True, rows=10, cols=1, subplot_titles=list(traces.keys()))
fig = make_subplots(shared_xaxes=True, rows=10, cols=1, x_title='Season')

addTrace(fig, data['Points'], 'Points', 1)
addTrace(fig, data['Goal balance'], 'Goal balance', 2)
addTrace(fig, data['Goal'], 'Goal', 3)
addTrace(fig, data['Home Goal'], 'Goal', 3)
addTrace(fig, data['Away Goal'], 'Goal', 3)
addTrace(fig, data['Concedes'], 'Concedes', 4)
addTrace(fig, data['Home Concedes'], 'Concedes', 4)
addTrace(fig, data['Away Concedes'], 'Concedes', 4)
addTrace(fig, data['Shots'], 'Shots', 5)
addTrace(fig, data['On Target'], 'On Target', 6)
addTrace(fig, data['Fouls'], 'Fouls', 7)
addTrace(fig, data['Corners'], 'Corners', 8)
addTrace(fig, data['Yellow Cards'], 'Yellow Cards', 9)
addTrace(fig, data['Red Cards'], 'Red Cards', 10)

fig.update_traces(showlegend=False)
fig.update_layout(title_text=f"Game analysis ({selected_team})", height=1500, barmode='stack')
fig.show()


In [None]:
data = [
    go.Bar(x = t.my_tot_stats.index, y = t.my_tot_stats['Goal'], opacity=0.5, name='total goals'),
    go.Bar(x = t.vs_tot_stats.index, y = t.vs_tot_stats['Goal'], opacity=0.5, name='total concedes')]
layout = {
    'title': f'Goals vs Concedes comparison ({selected_team})',
    'xaxis': { 'title': 'Season' },
    'yaxis': { 'title': 'Goals' }
}
fig = go.Figure(data, layout)
fig.show()

In [None]:
# Check correlation between Goals and Points using all teams from the entire dataset
data = {'Goal Difference':[], 'Total Goal':[], 'Total Concedes':[]}
fig = make_subplots(shared_xaxes=True, rows=3, cols=1, x_title='Points')

for k,t in teams_dic.items():
    data['Goal Difference'].append(go.Scatter(y = t.my_tot_stats['GDiff'], x = t.my_tot_stats['Point'], mode='markers', name=k))
    data['Total Goal'].append(go.Scatter(y = t.my_tot_stats['Goal'], x = t.my_tot_stats['Point'], mode='markers', name=k))
    data['Total Concedes'].append(go.Scatter(y = t.vs_tot_stats['Goal'], x = t.my_tot_stats['Point'], mode='markers', name=k))
    
for i, k in enumerate(data):
    for g in data[k]:
        fig.add_trace(g, row=i+1, col=1)
        fig.update_yaxes(title_text=k, row=i+1, col=1)

fig.update_traces(showlegend=False)
fig.update_layout(title_text=f"Correlation graphs (All teams)")
fig.show()