# Atalanta 2010-2020 from Serie B to Champions

This notebook uses data from the [https://www.football-data.co.uk]

In [1]:
import pandas as pd
import numpy as np
import datetime

import plotly.graph_objects as go
from plotly.subplots import make_subplots

seasons = []
for year in range(11,19):
    seasons.append(str(year)+str(year+1))

def geturl(var):
    return f"https://www.football-data.co.uk/mmz4281/{var}/I1.csv"

# Div = League Division
# Date = Match Date (dd/mm/yy)
# HomeTeam = Home Team
# AwayTeam = Away Team
# FTR and Res = Full Time Result (H=Home Win, D=Draw, A=Away Win)
base_cols=['Date', 'HomeTeam', 'AwayTeam', 'FTR']

# Match Statistics (where available)

# FTHG and HG = Full Time Home Team Goals
# HS = Home Team Shots
# HST = Home Team Shots on Target
# HHW = Home Team Hit Woodwork (NOT AVAILABLE)
# HC = Home Team Corners
# HF = Home Team Fouls Committed
# HFKC = Home Team Free Kicks Conceded (NOT AVAILABLE)
# HO = Home Team Offsides (NOT AVAILABLE)
# HY = Home Team Yellow Cards
# HR = Home Team Red Cards
home_cols=['FTHG','HS','HST','HF','HC','HY','HR']

# FTAG and AG = Full Time Away Team Goals
# AS = Away Team Shots
# AST = Away Team Shots on Target
# AHW = Away Team Hit Woodwork (NOT AVAILABLE)
# AC = Away Team Corners
# AF = Away Team Fouls Committed
# AFKC = Away Team Free Kicks Conceded (NOT AVAILABLE)
# AO = Away Team Offsides (NOT AVAILABLE)
# AY = Away Team Yellow Cards
# AR = Away Team Red Cards
away_cols =['FTAG','AS','AST','AF','AC','AY','AR']
use_cols = base_cols + home_cols + away_cols

li = []
for season in seasons:
    #df = pd.read_csv(geturl(season), index_col='Date', parse_dates=['Date']) 
    df = pd.read_csv(geturl(season), parse_dates=['Date'], usecols=use_cols)     
    df['Season'] = season[0:2] + '/' + season[2:]
    li.append(df)

df = pd.concat(li, sort=False)
df.set_index('Season', inplace=True)
    
team = "Atalanta"
home_filter = df.loc[:,"HomeTeam"] == team
away_filter = df.loc[:,"AwayTeam"] == team
#allcases = df[home|away].iloc[:,1:22]
homedf = df[home_filter]
awaydf = df[away_filter]
df

Unnamed: 0_level_0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
11/12,2011-09-09,Milan,Lazio,2.0,2.0,D,18.0,12.0,8.0,5.0,16.0,15.0,3.0,1.0,2.0,2.0,0.0,0.0
11/12,2011-10-09,Cesena,Napoli,1.0,3.0,A,11.0,18.0,3.0,6.0,14.0,12.0,4.0,6.0,2.0,3.0,1.0,0.0
11/12,2011-11-09,Catania,Siena,0.0,0.0,D,9.0,4.0,1.0,2.0,14.0,25.0,4.0,4.0,2.0,4.0,0.0,0.0
11/12,2011-11-09,Chievo,Novara,2.0,2.0,D,11.0,14.0,4.0,4.0,20.0,17.0,0.0,6.0,1.0,1.0,1.0,0.0
11/12,2011-11-09,Fiorentina,Bologna,2.0,0.0,H,15.0,14.0,7.0,2.0,22.0,12.0,12.0,1.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18/19,2019-05-26,Inter,Empoli,2.0,1.0,H,20.0,9.0,15.0,5.0,11.0,9.0,8.0,2.0,4.0,2.0,1.0,1.0
18/19,2019-05-26,Roma,Parma,2.0,1.0,H,16.0,9.0,8.0,5.0,13.0,8.0,13.0,8.0,2.0,1.0,0.0,0.0
18/19,2019-05-26,Sampdoria,Juventus,2.0,0.0,H,10.0,6.0,3.0,1.0,6.0,12.0,7.0,6.0,0.0,2.0,0.0,0.0
18/19,2019-05-26,Spal,Milan,2.0,3.0,A,7.0,16.0,4.0,8.0,17.0,13.0,8.0,4.0,2.0,2.0,0.0,0.0


In [5]:
def normalize_df(df, cols):
    norm_cols = ['Goal', 'Shots', 'ShotsTarget', 'Corner', 'Fouls', 'YellowCard', 'RedCard']
    stats_df = df.groupby(homedf.index).sum()[cols]
    stats_df.columns = norm_cols
    return stats_df

my_home_stats = normalize_df(homedf, home_cols);
vs_home_stats = normalize_df(homedf, away_cols);

my_away_stats = normalize_df(awaydf, away_cols);
vs_away_stats = normalize_df(homedf, home_cols);

#df.groupby(['col1','col2']).agg({'col3':'sum','col4':'sum'}).reset_index()
print('home_stats\n',my_home_stats)
print('away_stats\n',my_away_stats)
my_tot_stats = my_home_stats + my_away_stats 
vs_tot_stats = vs_home_stats + vs_away_stats 
my_tot_stats

home_stats
         Goal  Shots  ShotsTarget  Corner  Fouls  YellowCard  RedCard
Season                                                              
11/12   23.0  229.0         78.0   302.0   98.0        50.0      1.0
12/13   19.0  223.0         68.0   282.0  100.0        54.0      7.0
13/14   28.0  247.0         85.0   251.0  110.0        38.0      0.0
14/15   22.0  271.0         74.0   307.0  105.0        49.0      7.0
15/16   27.0  254.0         80.0   301.0  118.0        41.0      4.0
16/17   31.0  299.0        114.0   277.0  132.0        40.0      0.0
17/18   30.0  302.0         92.0   229.0  110.0        30.0      2.0
18/19   36.0  250.0        133.0   213.0  139.0        28.0      0.0
away_stats
         Goal  Shots  ShotsTarget  Corner  Fouls  YellowCard  RedCard
Season                                                              
11/12   18.0  184.0         64.0   290.0   91.0        46.0      3.0
12/13   20.0  186.0         61.0   303.0   80.0        51.0      6.0
13/14   15

Unnamed: 0_level_0,Goal,Shots,ShotsTarget,Corner,Fouls,YellowCard,RedCard
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
11/12,41.0,413.0,142.0,592.0,189.0,96.0,4.0
12/13,39.0,409.0,129.0,585.0,180.0,105.0,13.0
13/14,43.0,447.0,154.0,525.0,190.0,81.0,6.0
14/15,38.0,463.0,132.0,629.0,193.0,103.0,9.0
15/16,41.0,454.0,137.0,636.0,190.0,95.0,14.0
16/17,62.0,534.0,201.0,603.0,229.0,85.0,3.0
17/18,57.0,573.0,197.0,491.0,229.0,67.0,4.0
18/19,77.0,475.0,254.0,429.0,242.0,61.0,3.0


In [13]:
def compare_histograms(a, b, col, a_name, b_name):
    data = [go.Bar(x=a.index, y=a[col], opacity=0.4, name=a_name),
            go.Bar(x=b.index, y=b[col], opacity=0.4, width=0.5, name=b_name)]
    layout = go.Layout(barmode='overlay')
    fig = go.Figure(data, layout)
    return fig

## Visualization

In [16]:
#home_stat
fig = compare_histograms(my_tot_stats, vs_tot_stats, 'Goal', 'total goals', 'total concedes')
fig.show()


In [15]:
fig1 = compare_histograms(my_home_stats, vs_home_stats, 'Goal', 'home goals', 'home concedes')
fig2 = compare_histograms(my_away_stats, vs_away_stats, 'Goal', 'away goals', 'away concedes')
fig1.show()
fig2.show()