In [1]:
import pandas as pd
import seaborn as sns 

In [None]:
file_path = '../data/England CSV.csv'
df = pd.read_csv(file_path)

In [3]:
#Get initial look at data 

df.head()

Unnamed: 0,Date,Season,HomeTeam,AwayTeam,FTH Goals,FTA Goals,FT Result,HTH Goals,HTA Goals,HT Result,...,H Fouls,A Fouls,H Corners,A Corners,H Yellow,A Yellow,H Red,A Red,Display_Order,League
0,16/01/2025,2024/25,Ipswich Town,Brighton & Hove Albion,0,2,A,0.0,1.0,A,...,13.0,14.0,1.0,9.0,2.0,2.0,0.0,0.0,20250116,Premier League
1,16/01/2025,2024/25,Man United,Southampton,3,1,H,0.0,1.0,A,...,7.0,10.0,4.0,4.0,1.0,3.0,0.0,0.0,20250116,Premier League
2,15/01/2025,2024/25,Everton,Aston Villa,0,1,A,0.0,0.0,D,...,17.0,10.0,8.0,5.0,2.0,1.0,0.0,0.0,20250115,Premier League
3,15/01/2025,2024/25,Leicester,Crystal Palace,0,2,A,0.0,0.0,D,...,7.0,6.0,4.0,3.0,0.0,0.0,0.0,0.0,20250115,Premier League
4,15/01/2025,2024/25,Newcastle,Wolves,3,0,H,1.0,0.0,H,...,10.0,13.0,4.0,2.0,0.0,2.0,0.0,0.0,20250115,Premier League


In [4]:
#Get a look at the columns to see potential features 

df.columns

Index(['Date', 'Season', 'HomeTeam', 'AwayTeam', 'FTH Goals', 'FTA Goals',
       'FT Result', 'HTH Goals', 'HTA Goals', 'HT Result', 'Referee',
       'H Shots', 'A Shots', 'H SOT', 'A SOT', 'H Fouls', 'A Fouls',
       'H Corners', 'A Corners', 'H Yellow', 'A Yellow', 'H Red', 'A Red',
       'Display_Order', 'League'],
      dtype='object')

- Predicting exact scores would be a challenge so most likely use 'FT Result' as label
- Shots, corners, and cards are standout features already 
- No columns for team form or standing which would hinder prediction performance 
- Can possibly create new column for team form using grouping 
- Also create a column for total points

## Adding in total points 

In [14]:
#Work on calculating points won from home each gameweek 

def get_points(result, type = 'home'):
    if type == 'home':
        if result == 'H':
            return 3
        elif result == 'D':
            return 1
        else:
            return 0
    elif type == 'away':
        if result == 'A':
            return 3
        elif result == 'D':
            return 1
        else:
            return 0

home_df = df[['Date', 'Season', 'HomeTeam', 'FT Result']].rename(
    columns={'HomeTeam': 'Team', 'FT Result': 'Result'}
)

home_df['Points'] = home_df['Result'].apply(get_points)
home_df['Date'] = pd.to_datetime(home_df['Date'], format='%d/%m/%Y')


home_df.head()

Unnamed: 0,Date,Season,Team,Result,Points
0,2025-01-16,2024/25,Ipswich Town,A,0
1,2025-01-16,2024/25,Man United,H,3
2,2025-01-15,2024/25,Everton,A,0
3,2025-01-15,2024/25,Leicester,A,0
4,2025-01-15,2024/25,Newcastle,H,3


In [15]:
#Do same for points won away from home 

away_df = df[['Date', 'Season', 'AwayTeam', 'FT Result']].rename(
    columns={'AwayTeam': 'Team', 'FT Result': 'Result'}
)

away_df['Points'] = away_df['Result'].apply(get_points, type='away')
away_df['Date'] = pd.to_datetime(away_df['Date'], format='%d/%m/%Y')

away_df.head()

Unnamed: 0,Date,Season,Team,Result,Points
0,2025-01-16,2024/25,Brighton & Hove Albion,A,3
1,2025-01-16,2024/25,Southampton,H,0
2,2025-01-15,2024/25,Aston Villa,A,3
3,2025-01-15,2024/25,Crystal Palace,A,3
4,2025-01-15,2024/25,Wolves,H,0


In [17]:
away_and_home_df = pd.concat([home_df, away_df]).sort_values(by='Date').reset_index(drop=True)
away_and_home_df.head(10)

Unnamed: 0,Date,Season,Team,Result,Points
0,1993-08-14,1993/94,Oldham,A,0
1,1993-08-14,1993/94,Newcastle,A,0
2,1993-08-14,1993/94,Chelsea,A,0
3,1993-08-14,1993/94,Arsenal,A,0
4,1993-08-14,1993/94,Southampton,A,0
5,1993-08-14,1993/94,Sheffield United,H,3
6,1993-08-14,1993/94,Liverpool,H,3
7,1993-08-14,1993/94,Aston Villa,H,3
8,1993-08-14,1993/94,Man City,D,1
9,1993-08-14,1993/94,QPR,H,0


In [19]:
#Calculate the cumilative points 

away_and_home_df['Cumulative Points'] = away_and_home_df.groupby(['Season', 'Team'])['Points'].cumsum()
away_and_home_df.head(50)

Unnamed: 0,Date,Season,Team,Result,Points,Cumulative Points
0,1993-08-14,1993/94,Oldham,A,0,0
1,1993-08-14,1993/94,Newcastle,A,0,0
2,1993-08-14,1993/94,Chelsea,A,0,0
3,1993-08-14,1993/94,Arsenal,A,0,0
4,1993-08-14,1993/94,Southampton,A,0,0
5,1993-08-14,1993/94,Sheffield United,H,3,3
6,1993-08-14,1993/94,Liverpool,H,3,3
7,1993-08-14,1993/94,Aston Villa,H,3,3
8,1993-08-14,1993/94,Man City,D,1,1
9,1993-08-14,1993/94,QPR,H,0,0
