# Import Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime as dt
import itertools
from IPython.display import display


pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('display.precision', 3)
pd.set_option('display.width', 3000)
pd.set_option('display.max_columns', 75)
pd.set_option('display.max_rows', 38)

In [3]:
from sklearn.metrics import classification_report,  confusion_matrix, accuracy_score, f1_score,  precision_recall_fscore_support
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, accuracy_score, roc_auc_score, f1_score


In [10]:
folder='BPL Football Data/'

In [11]:
# Read data from the CSV into a dataframe

raw_data={}
for year in range(2000,2022):
  season = f'{year}-{str(year+1)[2:]}.csv'
  raw_data[year] = pd.read_csv(folder+season)

In [12]:
# Columns to start:
cols = ['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'Referee', 'HS', 'AS', 'HST', 'AST', 'HF', 'AF', 'HC',
       'AC', 'HY', 'AY', 'HR', 'AR']


In [13]:
stats = {}

for year in raw_data.keys():
  stats[year] = raw_data[year][cols]


# Feature Engineering

#Stats and Feature Engineering
**Will get the average later**

HTGS: Home Total Goals Scored   
ATGS: Away Total Goals Scored  
HTGC: Home Total Goals Conceded    
ATGC: Away Total Goals Conced  
HTS: Home Total Shots  
ATS: Away Total Shots  
HTSC: Home total shots conceded   
ATSC: Away total shots conceded   
HTTS: Home Total Target Shots  
ATTS: Away Total Target Shots  
HTTSC: Home total target shots conceded  
ATTSC: Away total target shots conceded   
HTF: Home Total Fouls  
ATF: Away Total Fouls  
HTC: Home Total Corners  
ATC: Away Total Corners  
HTY: Home Total Yellows  
ATY: Away Total Yellows  
HTR: Home Total Reds  
ATR: Home Total Reds  
HTP: Home Team Points   
ATP: Away Team Points   
HTFormPts: Home points in the last 5 games   
ATFormPts:  Aeay points in the last 5 games  
MW: Matchweek   

In [14]:
def in_game_stats(year_stats):

  dfs = {}

  for team in year_stats['HomeTeam'].unique(): # loops through all the teams in a season in a data frame and puts it in a dictionary where the key is the team name and the value is the dataframe

    home_df  = year_stats[year_stats['HomeTeam']==team]   
    away_df  = year_stats[year_stats['AwayTeam']==team]


    comb_df = pd.concat([home_df, away_df]).sort_index()

    def off_cums(row):
      '''Gets the offensive stats of team'''
      if row[0] == team:
        return row[1]
      else: 
        return row[2]

    def def_cums(row):
      '''Gets defensive stats of team'''
      if row[0] == team:
        return row[2]
      else: 
        return row[1]

    def get_points(row):
      '''Gets the offensive stats of team'''
      if row['HomeTeam'] == team:
        if row['FTHG'] > row['FTAG']:
          return 3
        if row['FTHG'] == row['FTAG']:
          return 1
        else:
          return 0
      else:
        if row['FTHG'] < row['FTAG']:
          return 3
        if row['FTHG'] == row['FTAG']:
          return 1
        else:
          return 0



    old_cols = ['FTHG', 'FTAG', 'HS', 'AS', 'HST', 'AST']
    new_cols = ['TGS', 'TGC', 'TS','TSC', 'TTS','TTSC']


    for i in (range(0, len(old_cols),2)): 
      home_stat = old_cols[i]
      away_stat = old_cols[i+1]
      comb_df[new_cols[i]]= comb_df[['HomeTeam',home_stat,away_stat]].apply(off_cums, axis=1).cumsum().shift(periods=1, fill_value=0)       # gets the running sum of all the offensive stats
      comb_df[new_cols[i+1]]= comb_df[['HomeTeam',home_stat,away_stat]].apply(def_cums, axis=1).cumsum().shift(periods=1, fill_value=0)     # gets the runing sum of all the defensive stats 


    old_cols = ['HF', 'AF', 'HC', 'AC', 'HY', 'AY', 'HR', 'AR',]
    new_cols = ['TF', 'TC', 'TY', 'TR']

    for i in (range(0, len(old_cols),2)):
      home_stat = old_cols[i]
      away_stat = old_cols[i+1]
      comb_df[new_cols[int(i/2)]]= comb_df[['HomeTeam',home_stat,away_stat]].apply(off_cums, axis=1).cumsum().shift(periods=1, fill_value=0) # gets the running summ of all the miscellenous stats

    comb_df['TP']= comb_df[['HomeTeam','FTHG','FTAG']].apply(get_points, axis=1).cumsum().shift(periods=1, fill_value=0) # gets cumaltive points 
    comb_df['TFormPts'] = comb_df['TP'] - comb_df['TP'].shift(5,fill_value=0) # gets the points from the last 5 games
    comb_df['Team'] = team

    dfs[team] = comb_df 

  df = pd.concat([dfs[team] for team in year_stats['HomeTeam'].unique()]).sort_index() # concats all the dataframes in the dfs dictionary 
  df1 = df[df['HomeTeam'] == df['Team']]
  df2 = df[df['AwayTeam'] == df['Team']]
  final_year = df1.drop(columns='Team').merge(df2.iloc[:,19:-1], how='inner', right_index=True, left_index=True, suffixes=['H','A']) 

  new_cols = [col[-1]+col[:-1] for col in final_year.columns[19:]]
  old_cols = final_year.columns[19:]
  col_dic = dict(zip(old_cols,new_cols)) # renames columns for clarity

  final_year =final_year.rename(columns=col_dic)

  return final_year




In [15]:
stats1 = {}
for year in stats.keys():
  stats1[year] = in_game_stats(stats.copy()[year])

## Get MatchWeek:

In [16]:
def get_mw(playing_stat):
    j = 1
    MatchWeek = []
    for i in range(380):
        MatchWeek.append(j)
        if ((i + 1)% 10) == 0:
            j = j + 1
    playing_stat['MW'] = MatchWeek
    return playing_stat

stats2 = {}
for year in stats.keys():
  stats2[year] = get_mw(stats1[year].copy())


In [17]:
stats2[2017]

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,HTGS,HTGC,HTS,HTSC,HTTS,HTTSC,HTF,HTC,HTY,HTR,HTP,HTFormPts,ATGS,ATGC,ATS,ATSC,ATTS,ATTSC,ATF,ATC,ATY,ATR,ATP,ATFormPts,MW
0,11/08/17,Arsenal,Leicester,4,3,H,M Dean,27,6,10,3,9,12,9,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,12/08/17,Brighton,Man City,0,2,A,M Oliver,6,14,2,4,6,9,3,10,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,12/08/17,Chelsea,Burnley,2,3,A,C Pawson,19,10,6,5,16,11,8,5,3,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,12/08/17,Crystal Palace,Huddersfield,0,3,A,J Moss,14,8,4,6,7,19,12,9,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,12/08/17,Everton,Stoke,1,0,H,N Swarbrick,9,9,4,1,13,10,6,7,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,13/05/18,Newcastle,Chelsea,3,0,H,M Atkinson,16,6,6,2,11,10,4,2,0,1,0,0,36,47,433,473,144,155,398,163,52,2,41,3,62,35,600,361,218,114,353,228,40,4,70,13,38
376,13/05/18,Southampton,Man City,0,1,A,A Marriner,8,13,3,2,8,10,1,12,3,1,0,0,37,55,443,480,142,168,408,226,60,2,36,8,105,27,651,230,259,84,334,272,57,2,97,13,38
377,13/05/18,Swansea,Stoke,1,2,A,A Taylor,26,8,11,5,12,9,6,0,1,2,0,0,27,54,310,530,93,186,365,144,50,1,33,1,33,67,374,552,126,209,427,136,60,1,30,3,38
378,13/05/18,Tottenham,Leicester,5,4,H,C Pawson,14,16,6,9,9,13,4,4,1,2,0,0,69,32,608,339,211,117,375,242,48,2,74,7,52,55,406,476,139,162,352,199,50,5,47,4,38


**GET TEAM FORM:**

In [None]:
def get_matchres(playing_stat):
    # Create a dictionary with team names as keys
    teams = {}
    for i in playing_stat.groupby('HomeTeam').mean().T.columns:
        teams[i] = []

    # the value corresponding to keys is a list containing the match result
    for i in range(len(playing_stat)):
        if playing_stat.iloc[i].FTR == 'H':
            teams[playing_stat.iloc[i].HomeTeam].append('W')
            teams[playing_stat.iloc[i].AwayTeam].append('L')
        elif playing_stat.iloc[i].FTR == 'A':
            teams[playing_stat.iloc[i].AwayTeam].append('W')
            teams[playing_stat.iloc[i].HomeTeam].append('L')
        else:
            teams[playing_stat.iloc[i].AwayTeam].append('D')
            teams[playing_stat.iloc[i].HomeTeam].append('D')
            
    return pd.DataFrame(data=teams, index = [i for i in range(1,39)]).T

def get_form(playing_stat,num):
    form = get_matchres(playing_stat)
    form_final = form.copy()
    for i in range(num,39):
        form_final[i] = ''
        j = 0
        while j < num:
            form_final[i] += form[i-j]
            j += 1           
    return form_final

def add_form(playing_stat,num):
    form = get_form(playing_stat,num)
    h = ['M' for i in range(num * 10)]  # since form is not available for n MW (n*10)
    a = ['M' for i in range(num * 10)]
    
    j = num
    for i in range((num*10),380):
        ht = playing_stat.iloc[i].HomeTeam
        at = playing_stat.iloc[i].AwayTeam
        
        past = form.loc[ht][j]               # get past n results
        h.append(past[num-1])                    # 0 index is most recent
        
        past = form.loc[at][j]               # get past n results.
        a.append(past[num-1])                   # 0 index is most recent
        
        if ((i + 1)% 10) == 0:
            j = j + 1

    playing_stat['HM' + str(num)] = h                 
    playing_stat['AM' + str(num)] = a

    
    return playing_stat


def add_form_df(playing_statistics):
    playing_statistics = add_form(playing_statistics,1)
    playing_statistics = add_form(playing_statistics,2)
    playing_statistics = add_form(playing_statistics,3)
    playing_statistics = add_form(playing_statistics,4)
    playing_statistics = add_form(playing_statistics,5)
    return playing_statistics    
    
# Make changes to df
stats3 = {}
for year in stats.keys():
  stats3[year] = add_form_df(stats2[year].copy())

In [19]:
stats3[2017][:5]

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,HTGS,HTGC,HTS,HTSC,HTTS,HTTSC,HTF,HTC,HTY,HTR,HTP,HTFormPts,ATGS,ATGC,ATS,ATSC,ATTS,ATTSC,ATF,ATC,ATY,ATR,ATP,ATFormPts,MW,HM1,AM1,HM2,AM2,HM3,AM3,HM4,AM4,HM5,AM5
0,11/08/17,Arsenal,Leicester,4,3,H,M Dean,27,6,10,3,9,12,9,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,M,M,M,M,M,M,M,M,M,M
1,12/08/17,Brighton,Man City,0,2,A,M Oliver,6,14,2,4,6,9,3,10,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,M,M,M,M,M,M,M,M,M,M
2,12/08/17,Chelsea,Burnley,2,3,A,C Pawson,19,10,6,5,16,11,8,5,3,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,M,M,M,M,M,M,M,M,M,M
3,12/08/17,Crystal Palace,Huddersfield,0,3,A,J Moss,14,8,4,6,7,19,12,9,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,M,M,M,M,M,M,M,M,M,M
4,12/08/17,Everton,Stoke,1,0,H,N Swarbrick,9,9,4,1,13,10,6,7,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,M,M,M,M,M,M,M,M,M,M


## Convert to DataFrame


In [20]:
weeks_skip = 4 # removes first weeks of each season
stats_df = pd.concat([stats3[year][weeks_skip*10:] for year in stats3.keys()], ignore_index=True)

In [21]:
stats_df

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,HTGS,HTGC,HTS,HTSC,HTTS,HTTSC,HTF,HTC,HTY,HTR,HTP,HTFormPts,ATGS,ATGC,ATS,ATSC,ATTS,ATTSC,ATF,ATC,ATY,ATR,ATP,ATFormPts,MW,HM1,AM1,HM2,AM2,HM3,AM3,HM4,AM4,HM5,AM5
0,09/09/00,Coventry,Leeds,0,0,D,Paul Durkin,9,11,1,5,15,14,6,6,2,2,0,0,5,7,35,55,19,24,58,24,12,1,6,6,5,3,39,35,20,17,59,23,3,0,6,6,5,L,D,W,L,W,W,L,W,M,M
1,09/09/00,Ipswich,Aston Villa,1,2,A,Andy D'Urso,15,5,5,2,9,13,5,3,1,3,0,0,4,6,49,56,21,21,46,17,3,0,4,4,2,4,26,27,16,18,32,18,5,0,2,2,5,L,W,W,L,D,D,L,D,M,M
2,09/09/00,Leicester,Southampton,1,0,H,Mike Dean,17,9,6,5,6,9,10,6,0,0,0,0,3,1,34,37,20,22,50,24,8,0,8,8,7,8,50,30,20,19,54,35,9,0,3,3,5,W,D,D,D,W,L,D,D,M,M
3,09/09/00,Liverpool,Man City,3,2,H,Graham Barber,14,9,8,7,17,17,9,4,3,4,0,0,7,6,43,43,25,25,47,26,5,2,7,7,7,9,45,41,22,27,67,22,9,0,6,6,5,W,W,D,L,L,W,W,L,M,M
4,09/09/00,Man United,Sunderland,3,0,H,Neale Barry,15,5,7,1,18,14,4,4,1,3,0,0,11,3,63,45,32,26,35,23,4,0,8,8,4,6,43,59,16,31,55,15,9,0,4,4,5,W,D,D,L,D,L,W,W,M,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7475,22/05/2022,Crystal Palace,Man United,1,0,H,M Atkinson,6,10,3,4,12,22,3,6,2,4,0,0,49,46,406,433,144,142,403,173,65,1,45,8,57,56,498,505,183,180,373,193,71,2,58,4,38,L,L,D,W,W,D,W,L,D,L
7476,22/05/2022,Leicester,Southampton,4,1,H,J Moss,12,7,6,2,10,5,3,3,0,1,0,0,58,58,423,551,163,195,346,182,54,1,49,7,42,63,475,485,168,183,397,231,62,2,40,1,38,D,L,W,L,W,L,L,D,L,L
7477,22/05/2022,Liverpool,Wolves,3,1,H,A Taylor,29,7,8,5,6,3,5,3,1,0,0,0,91,25,701,289,248,105,357,280,49,1,89,13,37,40,395,482,131,163,362,165,59,2,51,2,38,W,D,W,L,D,D,W,L,W,L
7478,22/05/2022,Man City,Aston Villa,3,2,H,M Oliver,24,4,5,2,5,11,13,1,0,1,0,0,96,24,691,233,249,86,315,304,42,1,90,13,50,51,450,453,160,152,390,195,76,2,45,8,38,D,D,W,D,W,L,W,W,W,W


## Get Odds Data

In [22]:
cols_odds = ['B365H', 'B365D', 'B365A', 'BWH', 'BWD', 'BWA', 'GBH', 'GBD','GBA',
             'IWH', 'IWD', 'IWA', 'LBH', 'LBD', 'LBA', 'PSH', 'PSD',  'PSA', 'WHH',
             'WHD', 'WHA', 'SJH', 'SJD', 'SJA', 'VCH', 'VCD', 'VCA', 'BSH', 'BSD', 'BSA']
odds_data={}
for year in raw_data.keys():

  odds_data[year] = raw_data[year]        
  cols = ['Date','HomeTeam','AwayTeam'] + list(set(odds_data[year].columns).intersection(cols_odds))
  odds_data[year] = odds_data[year][cols]
  c = odds_data[year].columns
  cols =  ['Date','HomeTeam','AwayTeam']+ [i for i in c if i[-1] == 'H']
  odds_data[year] = odds_data[year][cols]
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  
odds_df = pd.concat([odds_data[year][weeks_skip*10:] for year in odds_data.keys()], ignore_index=True)
odds_df = odds_df[['AVG Home Odds','IWH', 'GBH', 'LBH', 'WHH', 'B365H', 'BWH', 'SJH', 'VCH', 'BSH', 'PSH']]

  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_data[year]['AVG Home Odds'] = odds_data[year].mean(axis=1)
  odds_dat

In [23]:
stats_df = pd.merge(stats_df, odds_df, right_index=True, left_index=True)

In [24]:
stats_df[-10:]

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,HTGS,HTGC,HTS,HTSC,HTTS,HTTSC,HTF,HTC,HTY,HTR,HTP,HTFormPts,ATGS,ATGC,ATS,ATSC,ATTS,ATTSC,ATF,ATC,ATY,ATR,ATP,ATFormPts,MW,HM1,AM1,HM2,AM2,HM3,AM3,HM4,AM4,HM5,AM5,AVG Home Odds,IWH,GBH,LBH,WHH,B365H,BWH,SJH,VCH,BSH,PSH
7470,22/05/2022,Arsenal,Everton,5,1,H,A Marriner,26,6,9,2,8,9,12,2,0,1,0,0,56,47,562,419,188,145,355,197,60,4,66,9,42,61,432,496,136,181,359,159,75,6,39,10,38,L,W,L,L,W,D,W,W,W,W,1.36,1.37,,,1.35,1.36,1.38,,1.33,,1.37
7471,22/05/2022,Brentford,Leeds,1,2,A,P Tierney,14,14,5,6,11,13,3,2,2,1,1,0,47,54,428,488,149,177,360,156,59,2,46,10,40,78,471,568,148,226,456,169,99,3,35,2,38,W,D,W,L,L,L,D,L,W,D,2.41,2.45,,,2.4,2.45,2.4,,2.38,,2.38
7472,22/05/2022,Brighton,West Ham,3,1,H,K Friend,18,7,7,2,9,9,5,6,1,1,0,0,39,43,472,469,145,150,384,197,70,2,48,8,59,48,426,530,156,160,315,185,46,3,56,5,38,D,D,W,W,W,L,D,L,L,D,2.367,2.4,,,2.4,2.37,2.35,,2.3,,2.38
7473,22/05/2022,Burnley,Newcastle,1,2,A,C Pawson,12,8,5,6,8,9,8,2,1,0,0,0,33,51,395,595,120,181,386,176,67,2,35,7,42,61,440,508,142,166,382,158,79,1,46,9,38,D,W,L,L,L,L,W,W,W,W,2.212,2.2,,,2.2,2.25,2.15,,2.2,,2.27
7474,22/05/2022,Chelsea,Watford,2,1,H,M Dean,19,8,8,3,9,9,3,3,0,0,0,0,74,32,573,333,203,116,400,239,63,1,71,6,33,75,388,542,124,185,431,159,57,3,23,1,38,D,L,W,D,D,L,L,L,D,L,1.17,1.22,,,1.15,1.18,1.18,,1.13,,1.16
7475,22/05/2022,Crystal Palace,Man United,1,0,H,M Atkinson,6,10,3,4,12,22,3,6,2,4,0,0,49,46,406,433,144,142,403,173,65,1,45,8,57,56,498,505,183,180,373,193,71,2,58,4,38,L,L,D,W,W,D,W,L,D,L,3.05,3.15,,,3.0,3.0,3.0,,3.0,,3.15
7476,22/05/2022,Leicester,Southampton,4,1,H,J Moss,12,7,6,2,10,5,3,3,0,1,0,0,58,58,423,551,163,195,346,182,54,1,49,7,42,63,475,485,168,183,397,231,62,2,40,1,38,D,L,W,L,W,L,L,D,L,L,1.857,1.85,,,1.88,1.85,1.85,,1.83,,1.88
7477,22/05/2022,Liverpool,Wolves,3,1,H,A Taylor,29,7,8,5,6,3,5,3,1,0,0,0,91,25,701,289,248,105,357,280,49,1,89,13,37,40,395,482,131,163,362,165,59,2,51,2,38,W,D,W,L,D,D,W,L,W,L,1.15,1.17,,,1.15,1.14,1.16,,1.13,,1.15
7478,22/05/2022,Man City,Aston Villa,3,2,H,M Oliver,24,4,5,2,5,11,13,1,0,1,0,0,96,24,691,233,249,86,315,304,42,1,90,13,50,51,450,453,160,152,390,195,76,2,45,8,38,D,D,W,D,W,L,W,W,W,W,1.152,1.15,,,1.15,1.16,1.17,,1.13,,1.15
7479,22/05/2022,Norwich,Tottenham,0,5,A,C Kavanagh,9,19,0,13,13,7,3,2,3,1,0,0,23,79,365,612,109,217,343,162,52,2,22,1,64,40,472,472,185,141,379,193,66,1,68,11,38,D,W,L,W,L,D,L,W,L,D,10.7,11.0,,,10.0,9.5,11.0,,11.5,,11.2


In [None]:
# stats_df.to_csv('clean_data.csv')