In [48]:
from pytorch_tabnet.tab_model import TabNetClassifier

import os
import torch
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt
 
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [115]:
historical_standings_pl = pd.read_csv('data/Premier/premier-league-standing-1993-2023.csv')

print(historical_standings_pl.columns)

Index(['Season_End_Year', 'Team', 'Rk', 'MP', 'W', 'D', 'L', 'GF', 'GA', 'GD',
       'Pts', 'Notes'],
      dtype='object')


In [116]:
#columna Notes no aporta.
historical_standings_pl = historical_standings_pl.drop(columns=['Notes'])
print(historical_standings_pl.columns)

Index(['Season_End_Year', 'Team', 'Rk', 'MP', 'W', 'D', 'L', 'GF', 'GA', 'GD',
       'Pts'],
      dtype='object')


In [117]:
#Revisamos la cantidad y los nombres de los equipos
print(historical_standings_pl['Team'].unique())
print("\ncantidad: ", len(historical_standings_pl['Team'].unique()))

['Arsenal' 'Aston Villa' 'Blackburn' 'Chelsea' 'Coventry City'
 'Crystal Palace' 'Everton' 'Ipswich Town' 'Leeds United' 'Liverpool'
 'Manchester City' 'Manchester Utd' 'Middlesbrough' 'Norwich City'
 "Nott'ham Forest" 'Oldham Athletic' 'QPR' 'Sheffield Utd'
 'Sheffield Weds' 'Southampton' 'Tottenham' 'Wimbledon' 'Newcastle Utd'
 'Swindon Town' 'West Ham' 'Leicester City' 'Bolton' 'Derby County'
 'Sunderland' 'Barnsley' 'Charlton Ath' 'Bradford City' 'Watford' 'Fulham'
 'Birmingham City' 'West Brom' 'Portsmouth' 'Wolves' 'Wigan Athletic'
 'Reading' 'Hull City' 'Stoke City' 'Burnley' 'Blackpool' 'Swansea City'
 'Cardiff City' 'Bournemouth' 'Brighton' 'Huddersfield' 'Brentford']

cantidad:  50


In [118]:
#Cargar archivo de los partidos
matches = pd.read_csv('data/Premier/premier-league-matches-1993-2023.csv')

print(matches.columns)

Index(['Season_End_Year', 'Wk', 'Date', 'Home', 'HomeGoals', 'AwayGoals',
       'Away', 'FTR'],
      dtype='object')


In [119]:
#Revisan los equipos
print("equipos locales: ", matches['Home'].unique())
print("\ncantidad locales: ", len(matches['Home'].unique()))

print("\n\nequipos visitantes: ", matches['Away'].unique())
print("\ncantidad visitantes: ", len(matches['Away'].unique()))

equipos locales:  ['Coventry City' 'Leeds United' 'Sheffield Utd' 'Crystal Palace' 'Arsenal'
 'Ipswich Town' 'Everton' 'Southampton' 'Chelsea' "Nott'ham Forest"
 'Manchester City' 'Blackburn' 'Wimbledon' 'Tottenham' 'Liverpool'
 'Aston Villa' 'Oldham Athletic' 'Middlesbrough' 'Norwich City' 'QPR'
 'Manchester Utd' 'Sheffield Weds' 'Newcastle Utd' 'West Ham'
 'Swindon Town' 'Leicester City' 'Bolton' 'Sunderland' 'Derby County'
 'Barnsley' 'Charlton Ath' 'Watford' 'Bradford City' 'Fulham'
 'Birmingham City' 'West Brom' 'Portsmouth' 'Wolves' 'Wigan Athletic'
 'Reading' 'Hull City' 'Stoke City' 'Burnley' 'Blackpool' 'Swansea City'
 'Cardiff City' 'Bournemouth' 'Brighton' 'Huddersfield' 'Brentford']

cantidad locales:  50


equipos visitantes:  ['Middlesbrough' 'Wimbledon' 'Manchester Utd' 'Blackburn' 'Norwich City'
 'Aston Villa' 'Sheffield Weds' 'Tottenham' 'Oldham Athletic' 'Liverpool'
 'QPR' 'Arsenal' 'Ipswich Town' 'Coventry City' 'Sheffield Utd'
 'Leeds United' 'Crystal Palace' 'Manch

In [120]:
#Se eliminan los resultados de la temporada 2023 de las historical standings
historical_standings_pl = historical_standings_pl[historical_standings_pl['Season_End_Year'] != 2023]

#Revisar seasons
print(historical_standings_pl['Season_End_Year'].unique())
#Se revisan los equipos d nuevo
print(historical_standings_pl['Team'].unique())
print("\ncantidad: ", len(historical_standings_pl['Team'].unique()))


[1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006
 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
 2021 2022]
['Arsenal' 'Aston Villa' 'Blackburn' 'Chelsea' 'Coventry City'
 'Crystal Palace' 'Everton' 'Ipswich Town' 'Leeds United' 'Liverpool'
 'Manchester City' 'Manchester Utd' 'Middlesbrough' 'Norwich City'
 "Nott'ham Forest" 'Oldham Athletic' 'QPR' 'Sheffield Utd'
 'Sheffield Weds' 'Southampton' 'Tottenham' 'Wimbledon' 'Newcastle Utd'
 'Swindon Town' 'West Ham' 'Leicester City' 'Bolton' 'Derby County'
 'Sunderland' 'Barnsley' 'Charlton Ath' 'Bradford City' 'Watford' 'Fulham'
 'Birmingham City' 'West Brom' 'Portsmouth' 'Wolves' 'Wigan Athletic'
 'Reading' 'Hull City' 'Stoke City' 'Burnley' 'Blackpool' 'Swansea City'
 'Cardiff City' 'Bournemouth' 'Brighton' 'Huddersfield' 'Brentford']

cantidad:  50


In [122]:
#Se elimina la temporada 1993 de los partidos porque no hay datos de la tabla de posiciones de la temp anterior.
matches = matches[matches['Season_End_Year'] != 1993]

In [123]:
#Comparar los equipos de los partidos con los equipos de las standings
teams_matches_home = matches['Home'].unique()
teams_standings = historical_standings_pl['Team'].unique()

teams_not_in_standings = []
for team in teams_matches_home:
    if team not in teams_standings:
        teams_not_in_standings.append(team)

print("Equipos en partidos pero no en standings: ", teams_not_in_standings)

#Comparamos las standings con partidos
teams_not_in_matches = []
for team in teams_standings:
    if team not in teams_matches_home:
        teams_not_in_matches.append(team)

print("Equipos en standings pero no en partidos: ", teams_not_in_matches)
    

Equipos en partidos pero no en standings:  []
Equipos en standings pero no en partidos:  []


In [124]:
#Ahora, se crea una columna de start year en las standings para hacer el merge.
historical_standings_pl['Season_Start_Year'] = historical_standings_pl['Season_End_Year'] + 1

#Se elimina la columna de season end year
historical_standings_pl = historical_standings_pl.drop(columns=['Season_End_Year'])
print(historical_standings_pl.head)
print(historical_standings_pl['Rk'].unique())

<bound method NDFrame.head of               Team  Rk  MP   W   D   L  GF  GA  GD  Pts  Season_Start_Year
0          Arsenal  10  42  15  11  16  40  38   2   56               1994
1      Aston Villa   2  42  21  11  10  57  40  17   74               1994
2        Blackburn   4  42  20  11  11  68  46  22   71               1994
3          Chelsea  11  42  14  14  14  51  54  -3   56               1994
4    Coventry City  15  42  13  13  16  52  57  -5   52               1994
..             ...  ..  ..  ..  ..  ..  ..  ..  ..  ...                ...
601    Southampton  15  38   9  13  16  43  67 -24   40               2023
602      Tottenham   4  38  22   5  11  69  40  29   71               2023
603        Watford  19  38   6   5  27  34  77 -43   23               2023
604       West Ham   7  38  16   8  14  60  51   9   56               2023
605         Wolves  10  38  15   6  17  38  43  -5   51               2023

[606 rows x 11 columns]>
[10  2  4 11 15 20 13 16 17  6  9  1 21  3 2

In [125]:
#Seleccionar equipos con Rk menor que 20.
rk_over_20 = historical_standings_pl[historical_standings_pl['Rk'] > 20]
print(rk_over_20)

               Team  Rk  MP   W   D   L  GF   GA  GD  Pts  Season_Start_Year
12    Middlesbrough  21  42  11  11  20  54   75 -21   44               1994
14  Nott'ham Forest  22  42  10  10  22  41   62 -21   40               1994
35  Oldham Athletic  21  42   9  13  20  42   68 -26   40               1995
40     Swindon Town  22  42   5  15  22  47  100 -53   30               1995
51     Ipswich Town  22  42   7   6  29  36   93 -57   27               1996
53   Leicester City  21  42   6  11  25  45   80 -35   29               1996


In [126]:
#Solo nos importan las columnas de la posición y los puntos
historical_standings_pl = historical_standings_pl[['Team', 'Season_Start_Year', 'Rk', 'Pts']]

#merge de los partidos con las standings del local
matches = matches.merge(historical_standings_pl, left_on=['Home', 'Season_End_Year'], right_on=['Team', 'Season_Start_Year'], how='left')

#Se renombran las columnas last_season
matches = matches.rename(columns={'Rk': 'Position_last_year_home', 'Pts': 'Points_last_year_home'})

#Se eliminan las columnas de team y season start year
matches = matches.drop(columns=['Team', 'Season_Start_Year'])

#merge de los partidos con las standings del visitante
matches = matches.merge(historical_standings_pl, left_on=['Away', 'Season_End_Year'], right_on=['Team', 'Season_Start_Year'], how='left')

#Se renombran las columnas last_season
matches = matches.rename(columns={'Rk': 'Position_last_year_away', 'Pts': 'Points_last_year_away'})

#Se eliminan las columnas de team y season start year
matches = matches.drop(columns=['Team', 'Season_Start_Year'])

print(matches.columns)
#print el primer partido
print(matches.iloc[0])


Index(['Season_End_Year', 'Wk', 'Date', 'Home', 'HomeGoals', 'AwayGoals',
       'Away', 'FTR', 'Position_last_year_home', 'Points_last_year_home',
       'Position_last_year_away', 'Points_last_year_away'],
      dtype='object')
Season_End_Year                      1994
Wk                                      1
Date                           1993-08-14
Home                            Liverpool
HomeGoals                               2
AwayGoals                               0
Away                       Sheffield Weds
FTR                                     H
Position_last_year_home               6.0
Points_last_year_home                59.0
Position_last_year_away               7.0
Points_last_year_away                59.0
Name: 0, dtype: object


Si no tiene posición en la temporada pasada, es porque el equipo **ascendió de categoría**. Se pone la posición 18.

In [128]:
matches['Position_last_year_home'] = matches['Position_last_year_home'].fillna(18)
print(matches['Position_last_year_home'].unique())

matches['Position_last_year_away'] = matches['Position_last_year_away'].fillna(18)
print(matches['Position_last_year_away'].unique())

[ 6. 11.  2. 19. 10.  9. 18. 14.  3.  8. 12. 17. 16. 13.  1.  7.  4. 15.
  5.]
[ 7.  4.  5. 16. 15. 17.  8. 18. 13. 12.  1. 10. 11.  9. 14.  2.  3. 19.
  6.]


Para el tema de los puntos vacios, se rellenan con el puntaje que tuvo el puesto 17 ya que es el último puesto que no desciende.

In [133]:
#Se rellenan los puntos con los obtenidos en el puesto 17 de esa temporada.

seasons = matches['Season_End_Year'].unique()

for season in seasons:
    
    points_17 = historical_standings_pl[(historical_standings_pl['Season_Start_Year'] == season) & (historical_standings_pl['Rk'] == 17.0)]['Pts']

    points_17 = points_17.values[0]
    matches.loc[(matches['Season_End_Year'] == season) & (matches['Points_last_year_home'].isnull()), 'Points_last_year_home'] = points_17
    matches.loc[(matches['Season_End_Year'] == season) & (matches['Points_last_year_away'].isnull()), 'Points_last_year_away'] = points_17

print(matches.iloc[0])
print(matches[matches['Points_last_year_home'].isnull()])
print(matches[matches['Points_last_year_away'].isnull()])

Season_End_Year                      1994
Wk                                      1
Date                           1993-08-14
Home                            Liverpool
HomeGoals                               2
AwayGoals                               0
Away                       Sheffield Weds
FTR                                     H
Position_last_year_home               6.0
Points_last_year_home                59.0
Position_last_year_away               7.0
Points_last_year_away                59.0
Name: 0, dtype: object
Empty DataFrame
Columns: [Season_End_Year, Wk, Date, Home, HomeGoals, AwayGoals, Away, FTR, Position_last_year_home, Points_last_year_home, Position_last_year_away, Points_last_year_away]
Index: []
Empty DataFrame
Columns: [Season_End_Year, Wk, Date, Home, HomeGoals, AwayGoals, Away, FTR, Position_last_year_home, Points_last_year_home, Position_last_year_away, Points_last_year_away]
Index: []


In [None]:
print