## EDA 

To answer below questions: 
- Which teams have been the most consistent in their performance over the last 10 years?
- Which teams have the most comebacks (wins after trailing)?
- How does playing at home or away affect the results

In [4]:
import pandas as pd 
datasetPremierLeague = pd.read_csv("../data/raw/datasetPremier.csv")

In [5]:
# Crear columnas auxiliares para victorias, empates y derrotas
datasetPremierLeague['Win_H'] = (datasetPremierLeague['FT Result'] == 'H').astype(int)  # Victoria local
datasetPremierLeague['Win_A'] = (datasetPremierLeague['FT Result'] == 'A').astype(int)  # Victoria visitante
datasetPremierLeague['Draw'] = (datasetPremierLeague['FT Result'] == 'D').astype(int)   # Empate

datasetPremierLeague['Loss_H'] = (datasetPremierLeague['FT Result'] == 'A').astype(int)  # Derrota local
datasetPremierLeague['Loss_A'] = (datasetPremierLeague['FT Result'] == 'H').astype(int)  # Derrota visitante

# Crear DataFrame para equipos locales
home_stats = datasetPremierLeague.groupby(['Season', 'HomeTeam']).agg(
    Wins=('Win_H', 'sum'),
    Draws=('Draw', 'sum'),
    Losses=('Loss_H', 'sum')
).reset_index()

# Crear DataFrame para equipos visitantes
away_stats = datasetPremierLeague.groupby(['Season', 'AwayTeam']).agg(
    Wins=('Win_A', 'sum'),
    Draws=('Draw', 'sum'),
    Losses=('Loss_A', 'sum')
).reset_index()

# Unificar datos de local y visitante sumando estadísticas
team_stats = pd.concat([home_stats, away_stats]).groupby(['Season', 'HomeTeam']).sum().reset_index()

# Renombrar la columna para representar a todos los equipos
team_stats = team_stats.rename(columns={'HomeTeam': 'Team'})

print(team_stats.head())  # Ver resultados

    Season            Team  Wins  Draws  Losses AwayTeam
0  2013/14         Arsenal    13      5       1        0
1  2013/14     Aston Villa     6      3      10        0
2  2013/14         Cardiff     5      5       9        0
3  2013/14         Chelsea    15      3       1        0
4  2013/14  Crystal Palace     8      3       8        0


In [7]:
# Calcular los puntos por equipo y temporada
team_stats['Points'] = (team_stats['Wins'] * 3) + (team_stats['Draws'] * 1)

# Ordenar por temporada
team_stats = team_stats.sort_values(by=['Team', 'Season'])


In [9]:
# Calcular la desviación estándar de puntos por equipo
team_consistency = team_stats.groupby('Team')['Points'].std().reset_index()
team_consistency = team_consistency.rename(columns={'Points': 'Std_Dev_Points'})

# Ver equipos más y menos consistentes
print(team_consistency.sort_values(by='Std_Dev_Points'))


                Team  Std_Dev_Points
6            Cardiff        0.000000
8     Crystal Palace        3.487641
33            Wolves        3.834058
32          West Ham        4.020403
12              Hull        4.041452
27        Sunderland        4.163332
2        Bournemouth        4.220133
28           Swansea        4.615192
31         West Brom        4.996666
4           Brighton        5.080307
17          Man City        5.258759
29         Tottenham        5.546498
13             Leeds        5.567764
0            Arsenal        5.640761
20         Newcastle        5.750362
21           Norwich        5.916080
26             Stoke        6.140033
18        Man United        6.274045
14         Leicester        6.912147
5            Burnley        7.005100
22     Nott'm Forest        7.071068
9            Everton        7.394101
30           Watford        7.536577
25       Southampton        7.789594
15         Liverpool        7.897065
10            Fulham        8.018728
3