In [3]:
# 📦 Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [4]:
# 📥 Step 2: Load Local CSV File
# Make sure 23-24Season.csv is in the same directory as your notebook
df = pd.read_csv("23-24Season.csv")

# Preview the data
df = df[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR']].dropna()
df.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,Burnley,Man City,0,3,A
1,Arsenal,Nott'm Forest,2,1,H
2,Bournemouth,West Ham,1,1,D
3,Brighton,Luton,4,1,H
4,Everton,Fulham,0,1,A


In [5]:
df['Target'] = df['FTR'].apply(lambda x: 1 if x == 'H' else 0)

In [6]:
# One-hot encode team names
df = pd.get_dummies(df, columns=['HomeTeam', 'AwayTeam'], drop_first=True)

# Drop unused columns
df = df.drop(columns=['FTHG', 'FTAG', 'FTR'])
df.head()

Unnamed: 0,Target,GoalDiff,HomeTeam_Aston Villa,HomeTeam_Bournemouth,HomeTeam_Brentford,HomeTeam_Brighton,HomeTeam_Burnley,HomeTeam_Chelsea,HomeTeam_Crystal Palace,HomeTeam_Everton,...,AwayTeam_Liverpool,AwayTeam_Luton,AwayTeam_Man City,AwayTeam_Man United,AwayTeam_Newcastle,AwayTeam_Nott'm Forest,AwayTeam_Sheffield United,AwayTeam_Tottenham,AwayTeam_West Ham,AwayTeam_Wolves
0,0,-3,False,False,False,False,True,False,False,False,...,False,False,True,False,False,False,False,False,False,False
1,1,1,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
2,0,0,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
3,1,3,False,False,False,True,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
4,0,-1,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False


In [7]:
# 🧠 Step 5: Train/Test Split
X = df.drop(columns=['Target'])
y = df['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [8]:
# 🤖 Step 6: Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [9]:
# 📊 Step 7: Evaluate the Model
y_pred = model.predict(X_test)

print("🔍 Classification Report:")
print(classification_report(y_test, y_pred))

print("🧮 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

🔍 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        42
           1       1.00      1.00      1.00        34

    accuracy                           1.00        76
   macro avg       1.00      1.00      1.00        76
weighted avg       1.00      1.00      1.00        76

🧮 Confusion Matrix:
[[42  0]
 [ 0 34]]


In [11]:
# 🔍 Optional: Predict some examples
sample = X_test.iloc[:5]
predicted = model.predict(sample)

print("Predictions for 5 sample matches:", predicted)

Predictions for 5 sample matches: [0 1 0 1 0]
