In [None]:
#Import Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
#Load Our Data
leagues = {"SP1": "LaLiga"}
seasons = ["2122", "2223", "2324", "2425", "2526"]

for code, league in leagues.items():
    for season in seasons:
        url = f"https://www.football-data.co.uk/mmz4281/{season}/{code}.csv"
        df = pd.read_csv(url)
        df.to_csv(f"{league}_{season}.csv")
        print("Downloaded")

Downloaded
Downloaded
Downloaded
Downloaded
Downloaded


In [None]:
#Create Pandas Dataframes
laliga1 = pd.read_csv("/content/LaLiga_2122.csv")
laliga2 = pd.read_csv("/content/LaLiga_2223.csv")
laliga3 = pd.read_csv("/content/LaLiga_2324.csv")
laliga4 = pd.read_csv("/content/LaLiga_2425.csv")
laliga5 = pd.read_csv("/content/LaLiga_2526.csv")

data = pd.concat([laliga1, laliga2, laliga3, laliga4, laliga5])
data.head()

Unnamed: 0.1,Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,...,BMGMCA,BVCH,BVCD,BVCA,CLCH,CLCD,CLCA,LBCH,LBCD,LBCA
0,0,SP1,13/08/2021,20:00,Valencia,Getafe,1,0,H,1,...,,,,,,,,,,
1,1,SP1,14/08/2021,18:30,Cadiz,Levante,1,1,D,0,...,,,,,,,,,,
2,2,SP1,14/08/2021,18:30,Mallorca,Betis,1,1,D,1,...,,,,,,,,,,
3,3,SP1,14/08/2021,21:00,Alaves,Real Madrid,1,4,A,0,...,,,,,,,,,,
4,4,SP1,14/08/2021,21:00,Osasuna,Espanol,0,0,D,0,...,,,,,,,,,,


In [None]:
##Take Out and Use The Most Important Features
data.iloc[:, 4:24].info()

<class 'pandas.core.frame.DataFrame'>
Index: 1610 entries, 0 to 89
Data columns (total 20 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   HomeTeam  1610 non-null   object
 1   AwayTeam  1610 non-null   object
 2   FTHG      1610 non-null   int64 
 3   FTAG      1610 non-null   int64 
 4   FTR       1610 non-null   object
 5   HTHG      1610 non-null   int64 
 6   HTAG      1610 non-null   int64 
 7   HTR       1610 non-null   object
 8   HS        1610 non-null   int64 
 9   AS        1610 non-null   int64 
 10  HST       1610 non-null   int64 
 11  AST       1610 non-null   int64 
 12  HF        1610 non-null   int64 
 13  AF        1610 non-null   int64 
 14  HC        1610 non-null   int64 
 15  AC        1610 non-null   int64 
 16  HY        1610 non-null   int64 
 17  AY        1610 non-null   int64 
 18  HR        1610 non-null   int64 
 19  AR        1610 non-null   int64 
dtypes: int64(16), object(4)
memory usage: 264.1+ KB


In [None]:
df = data.iloc[:, 4:24]
df.head(20)

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,Valencia,Getafe,1,0,H,1,0,H,4,22,2,4,24,15,1,9,6,3,1,1
1,Cadiz,Levante,1,1,D,0,1,A,7,12,2,3,10,16,2,4,1,5,0,0
2,Mallorca,Betis,1,1,D,1,0,H,6,10,2,1,12,18,4,3,1,3,0,0
3,Alaves,Real Madrid,1,4,A,0,0,D,11,19,4,7,14,10,0,4,1,2,0,0
4,Osasuna,Espanol,0,0,D,0,0,D,14,10,1,3,18,12,4,6,3,3,0,0
5,Celta,Ath Madrid,1,2,A,0,1,A,10,13,2,4,11,9,2,5,4,5,1,1
6,Barcelona,Sociedad,4,2,H,2,0,H,13,11,8,3,11,18,5,3,2,4,0,0
7,Sevilla,Vallecano,3,0,H,1,0,H,21,4,8,3,14,7,4,0,1,0,0,1
8,Villarreal,Granada,0,0,D,0,0,D,14,6,4,1,19,21,6,2,5,1,1,0
9,Elche,Ath Bilbao,0,0,D,0,0,D,8,9,2,3,13,19,6,8,2,2,0,0


In [None]:
cols = ["HS", "AS", "HST", "AST", "HF", "AF", "HC", "AC", "HY", "AY", "HR", "AR"]

def get_matches(homeTeam, awayTeam, df, games=10):
    #Get Club Matches
    club_matches = df[(df["HomeTeam"] == homeTeam) | (df["AwayTeam"] == awayTeam)]

    #Get last n games
    club_matches = club_matches.tail(games)

    #Get averages
    return club_matches[cols].mean().values

In [None]:
team1 = "Real Madrid"
team2 = "Barcelona"

get_matches(team1, team2, df)

array([13.1, 14.2,  4.5,  5.1, 12.4, 11.1,  4.7,  5. ,  2.7,  1.2,  0.2,
        0.2])

In [None]:
#Turn Our Clubs Into Numerical Values
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
clubs = ["HomeTeam", "AwayTeam"]

for club in clubs:
    df[club] = encoder.fit_transform(df[club])

In [None]:
df.head(20)

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,22,10,1,0,H,1,0,H,4,22,2,4,24,15,1,9,6,3,1,1
1,6,15,1,1,D,0,1,A,7,12,2,3,10,16,2,4,1,5,0,0
2,16,5,1,1,D,1,0,H,6,10,2,1,12,18,4,3,1,3,0,0
3,0,19,1,4,A,0,0,D,11,19,4,7,14,10,0,4,1,2,0,0
4,17,9,0,0,D,0,0,D,14,10,1,3,18,12,4,6,3,3,0,0
5,7,3,1,2,A,0,1,A,10,13,2,4,11,9,2,5,4,5,1,1
6,4,21,4,2,H,2,0,H,13,11,8,3,11,18,5,3,2,4,0,0
7,20,24,3,0,H,1,0,H,21,4,8,3,14,7,4,0,1,0,0,1
8,25,12,0,0,D,0,0,D,14,6,4,1,19,21,6,2,5,1,1,0
9,8,2,0,0,D,0,0,D,8,9,2,3,13,19,6,8,2,2,0,0


In [None]:
#4 Barcelona
#19 Madrid

In [None]:
#Split Our Dataset and Drop Prediction Columns
X = df.drop(["FTHG", "FTAG", "AwayTeam", "HomeTeam", "FTR", "HTR", "HTAG", "HTHG"], axis=1)
Y = df["FTR"]

In [None]:
X.head()

Unnamed: 0,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,4,22,2,4,24,15,1,9,6,3,1,1
1,7,12,2,3,10,16,2,4,1,5,0,0
2,6,10,2,1,12,18,4,3,1,3,0,0
3,11,19,4,7,14,10,0,4,1,2,0,0
4,14,10,1,3,18,12,4,6,3,3,0,0


In [None]:
Y.head()

Unnamed: 0,FTR
0,H
1,D
2,D
3,A
4,D


In [None]:
#Turn Our FTR Into Numerical Values
Y.replace(
{"H":1, "A":0, "D":2}
,inplace=True)
Y.head()

  Y.replace(


Unnamed: 0,FTR
0,1
1,2
2,2
3,0
4,2


In [None]:
X.shape

(1610, 12)

In [None]:
Y.shape

(1610,)

In [None]:
#Run Device Agnostic Code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
#Turn our Data Into Pytorch Tensors
X = np.asarray(X)
Y = np.asarray(Y)

X = torch.from_numpy(X).type(torch.float)
Y = torch.from_numpy(Y).type(torch.LongTensor)

In [None]:
#Train Test Split
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=2
)

In [None]:
#Add Data To Device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

In [None]:
##Create Model
class PredictionModel(nn.Module):
    def __init__(self, input_shape, output_shape, hidden_units):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            #nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            #nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            #nn.ReLU()
)

    def forward(self, x):
        return self.layer_stack(x)


#Instance of Model
model_0 = PredictionModel(
input_shape=12,
output_shape=3,
hidden_units=7
).to(device)

In [None]:
#Create Accuracy Function
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    accuracy = (correct / len(y_pred)) * 100
    return accuracy

In [None]:
#Create Loss Function and Optimizer
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(
params=model_0.parameters(),
lr=0.001
)

In [None]:
#Make Predictions
model_0.eval()

with torch.inference_mode():
    y_logits = model_0(X_test).to(device)
    y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)

    loss = loss_fn(y_logits, y_test)
    accuracy = accuracy_fn(y_test, y_preds)

#print(y_preds)
print(loss)
print(accuracy)

tensor(1.1029, device='cuda:0')
25.77639751552795


In [None]:
#Training and Testing Function
def train_model(
model: torch.nn.Module,
optimizer: torch.optim.Optimizer,
loss_fn,
device=device,
):
    #Model Train
    model.train()

    #Do The Forward Pass
    y_logits = model(X_train)
    #Turn Our Logits To Predictions
    y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)

    #Calculate The Loss
    loss = loss_fn(y_logits, y_train)
    accuracy = accuracy_fn(y_train, y_preds)

    #Optimizer zero grad
    optimizer.zero_grad()

    #Loss backward
    loss.backward()

    #Optimizer step
    optimizer.step()

    return loss, accuracy

In [None]:
#Test Function
def test_model(
model: torch.nn.Module,
loss_fn,
device=device
):
    #Model Eval Mode
    model.eval()

    #Activate Inference Mode
    with torch.inference_mode():
        test_logits = model(X_test)
        test_preds = torch.softmax(test_logits, dim=1).argmax(dim=1)

        test_loss = loss_fn(test_logits, y_test)
        test_accuracy = accuracy_fn(y_test, test_preds)

        return test_loss, test_accuracy

In [None]:
#Train and Test Our Model
from tqdm.auto import tqdm

torch.manual_seed(2)
torch.cuda.manual_seed(2)

epochs = 150
for epoch in tqdm(range(epochs)):
    loss, accuracy = train_model(model_0, optimizer, loss_fn)
    test_loss, test_accuracy = test_model(model_0, loss_fn)

    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.4f}, Accuracy: {accuracy:.2f}% | Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

  0%|          | 0/150 [00:00<?, ?it/s]

Epoch: 0 | Loss: 0.9321, Accuracy: 56.13% | Test Loss: 0.9605, Test Accuracy: 52.17%
Epoch: 10 | Loss: 0.9269, Accuracy: 56.29% | Test Loss: 0.9553, Test Accuracy: 52.48%
Epoch: 20 | Loss: 0.9222, Accuracy: 56.68% | Test Loss: 0.9506, Test Accuracy: 51.86%
Epoch: 30 | Loss: 0.9180, Accuracy: 56.83% | Test Loss: 0.9463, Test Accuracy: 51.24%
Epoch: 40 | Loss: 0.9144, Accuracy: 56.99% | Test Loss: 0.9425, Test Accuracy: 51.24%
Epoch: 50 | Loss: 0.9112, Accuracy: 57.30% | Test Loss: 0.9392, Test Accuracy: 50.93%
Epoch: 60 | Loss: 0.9084, Accuracy: 56.91% | Test Loss: 0.9362, Test Accuracy: 51.24%
Epoch: 70 | Loss: 0.9060, Accuracy: 57.22% | Test Loss: 0.9336, Test Accuracy: 50.93%
Epoch: 80 | Loss: 0.9039, Accuracy: 57.22% | Test Loss: 0.9312, Test Accuracy: 51.55%
Epoch: 90 | Loss: 0.9021, Accuracy: 57.14% | Test Loss: 0.9291, Test Accuracy: 52.17%
Epoch: 100 | Loss: 0.9005, Accuracy: 57.30% | Test Loss: 0.9273, Test Accuracy: 52.48%
Epoch: 110 | Loss: 0.8991, Accuracy: 57.38% | Test Los

In [None]:
#Make Predictions On Our Data
our_data = [[13.1, 14.2,  4.5,  5.1, 12.4, 11.1,  4.7,  5. ,  2.7,  1.2,  0.2,
        0.2]]
data_np = np.asarray(our_data)
data_torch = torch.from_numpy(data_np).type(torch.float32)
reshape_data = data_torch.reshape(1, -1)
reshape_data

tensor([[13.1000, 14.2000,  4.5000,  5.1000, 12.4000, 11.1000,  4.7000,  5.0000,
          2.7000,  1.2000,  0.2000,  0.2000]])

In [None]:
#Add to device
reshape_data = reshape_data.to(device)

In [None]:
#See Match winner

model_0.eval()
with torch.inference_mode():
    game_logits = model_0(reshape_data)
    game_probs = torch.softmax(game_logits, dim=1)
    game_preds = torch.argmax(game_probs, dim=1)

print(game_preds.item())

1


In [None]:
#Get Predictions In Full
match_pred = game_preds.item()

if match_pred == 1:
    print("REAL MADRID WINS THE MATCH")
elif match_pred == 0:
    print("BARCELONA WINS THE MATCH")
else:
    print("THE MATCH ENDS IN A DRAW")

REAL MADRID WINS THE MATCH


In [None]:
import nbformat

# Replace "your_notebook.ipynb" with your file path if needed
nb = nbformat.read("Real Madrid vs Barcelona Prediction.ipynb", as_version=4)

# Remove widget metadata if it exists
if 'widgets' in nb['metadata']:
    del nb['metadata']['widgets']

# Save cleaned notebook
nbformat.write(nb, "clean_notebook.ipynb")
print("Cleaned notebook saved successfully!")

FileNotFoundError: [Errno 2] No such file or directory: 'Real Madrid vs Barcelona Prediction.ipynb'

In [None]:
import os
os.getcwd()

'/content'

In [None]:
os.listdir()

['.config',
 'LaLiga_2526.csv',
 'LaLiga_2425.csv',
 'LaLiga_2122.csv',
 'LaLiga_2223.csv',
 'LaLiga_2324.csv',
 'sample_data']

In [132]:
import nbformat
from google.colab import drive
import os

# Mount Google Drive (if your notebook is saved there)
drive.mount('/content/drive')

# Change this path to where your notebook is saved
nb_path = "/content/drive/MyDrive/Colab Notebooks/Real Madrid vs Barcelona Prediction.ipynb"

# If it’s not there, you can check the path using os.listdir()

nb = nbformat.read(nb_path, as_version=4)

if 'widgets' in nb['metadata']:
    del nb['metadata']['widgets']

clean_path = "/content/drivReal_Madrid_vs_Barcelona_Prediction_CLEAN.ipynb"
nbformat.write(nb, clean_path)

print("✅ Cleaned notebook saved at:", clean_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Cleaned notebook saved at: /content/Real_Madrid_vs_Barcelona_Prediction_CLEAN.ipynb


In [None]:
import os
os.listdir("/content")

['.config',
 'LaLiga_2526.csv',
 'drive',
 'LaLiga_2425.csv',
 'LaLiga_2122.csv',
 'LaLiga_2223.csv',
 'LaLiga_2324.csv',
 'sample_data']