# Brownlow ML Sandbox

This notebook contains a minimal working MLP to predict the 2021 Brownlow vote. 

## Imports

In [None]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch.nn as nn
import torch
import torch.optim as optim
from torch.nn.modules.loss import MSELoss
import glob

In [None]:
torch.manual_seed(123)
os.chdir("/home/gabriel/Projects/sports-analytics/AFL/res/afl_table_data/")
os.getcwd()

In [None]:
for f in glob.glob("*2025*.csv"):
    df = pd.read_csv(f, low_memory=False)
    out = "AFL-Tables_" + f.replace(".csv", ".parquet")
    import pathlib

    print(pathlib.Path(out).resolve())
    df.to_parquet(out)
    print(f)

## Column Name class

In [None]:
class LongFormColumnNames:
    INDEX = "index"
    PLAYER = "player"
    TEAM = "team"
    ROUND = "round"
    OPPENENT = "opponents"
    STAT = "stat"
    VALUE = "value"
    YEAR = "year"

In [None]:
files = glob.glob("AFL-Tables_game-by-game-stats_*.parquet")

In [None]:
files = glob.glob("AFL-Tables_game-by-game-stats_*.parquet")
years = [f.split("_")[-1].split(".")[0] for f in files]
data = []
for i, f in enumerate(files):
    df = pd.read_parquet(f, engine="fastparquet")
    df[LongFormColumnNames.YEAR] = years[i]
    data.append(df)
df = pd.concat(data, ignore_index=True)
df.head()

In [None]:
df = df.loc[
    (df[LongFormColumnNames.VALUE] != "Off") & (df[LongFormColumnNames.VALUE] != "On")
]

NaN_key = {"NA": np.nan}
df[LongFormColumnNames.VALUE] = df[LongFormColumnNames.VALUE].replace(NaN_key)

df[LongFormColumnNames.VALUE] = df[LongFormColumnNames.VALUE].astype(np.float32)

In [None]:
df = df.pivot(
    index=[
        LongFormColumnNames.PLAYER,
        LongFormColumnNames.TEAM,
        LongFormColumnNames.ROUND,
        LongFormColumnNames.OPPENENT,
        LongFormColumnNames.YEAR,
    ],
    columns=LongFormColumnNames.STAT,
    values=LongFormColumnNames.VALUE,
).reset_index()

df = df.drop(columns="subs")

In [None]:
df = df.astype({"year": np.int32})
df_2022 = df[(df["round"] < 23) & (df["year"] == 2025)]
df = df[(df["round"] < 23) & (df["year"] >= 2000) & (df["year"] < 2025)]

In [None]:
df = df.fillna(0)


xlabels = [
    "team",
    "opponents",
    "%_played",
    "behinds",
    "bounces",
    "clangers",
    "clearances",
    "contested_marks",
    "contested_possessions",
    "disposals",
    "frees",
    "frees_against",
    "goal_assists",
    "goals",
    "handballs",
    "hit_outs",
    "inside_50s",
    "kicks",
    "marks",
    "marks_inside_50",
    "one_percenters",
    "rebounds",
    "tackles",
    "uncontested_possessions",
]

y_labels = "brownlow_votes"

In [None]:
X = df.drop(columns=["player", "team", "opponents", "round", "year", "brownlow_votes"])
Y = df["brownlow_votes"]

In [None]:
X

In [None]:
X, Y = torch.tensor(X.to_numpy(), dtype=torch.float), torch.tensor(
    Y.to_numpy(), dtype=torch.float
)
# Y = Y.type(torch.LongTensor)

In [None]:
# Build a class for the neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.hidden1 = nn.Sequential(nn.Linear(X.shape[1], 100), nn.ReLU())

        self.hidden2 = nn.Sequential(nn.Linear(100, 50), nn.ReLU())

        self.output = nn.Linear(50, 1)

    def forward(self, x):
        x = self.hidden1(x)

        x = self.hidden2(x)

        x = self.output(x)

        return x

In [None]:
MLP_loss = []


def train(
    X=X,
    Y=Y,
    loss_function=nn.MSELoss(),
    epoch_num=100,
    batch_size=100,
    lr=0.0001,
):

    network = Net()
    network.train()

    data_tuple = [[X[i], Y[i]] for i in range(len(X))]  # accuracy

    batch = torch.utils.data.DataLoader(data_tuple, batch_size=batch_size, shuffle=True)

    optimizer = optim.Adam(network.parameters(), lr=lr, betas=(0.9, 0.999))

    for epoch in range(epoch_num):

        if not epoch % 10:
            print("Iteration: ", epoch, "Completion: ", (epoch) / epoch_num)

        running_loss = 0

        for batch_shuffle in batch:

            x, y = batch_shuffle
            y = y.unsqueeze(1)
            # print(x.shape)

            # Give loss
            optimizer.zero_grad()
            loss = loss_function(network(x), y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        MLP_loss.append(running_loss / batch_size)

    return network


network = train()

In [None]:
torch.save(network.state_dict(), "model_state_2025.pt")

In [None]:
# Load the model
network = Net()

state_dict = torch.load("model_state_2025.pt")

# Load in the state dictionary
network.load_state_dict(state_dict)

In [None]:
plt.plot(np.arange(0, len(MLP_loss)), MLP_loss, color="red")
plt.tick_params(
    axis="x",  # changes apply to the x-axis
    which="both",  # both major and minor ticks are affected
    bottom=False,  # ticks along the bottom edge are off
    top=False,  # ticks along the top edge are off
    labelbottom=False,
)  # labels along the bottom edge are off
plt.title("MLP loss")
plt.show()

In [None]:
pred = network(X).detach().numpy()

In [None]:
len(pred)

In [None]:
df_2022 = df_2022.fillna(0)
X_test = df_2022[xlabels]
X_test = df_2022.drop(
    columns=["player", "team", "opponents", "round", "year", "brownlow_votes"]
)
X_test = X_test.astype(np.float32)
X_test = torch.tensor(X_test.to_numpy(), dtype=torch.float)

In [None]:
# X_test = torch.tensor(X_test, dtype=torch.float)
network.eval()
pred_2022 = network(X_test).detach().numpy()

In [None]:
pred_2022 = [i[0] for i in pred_2022]

In [None]:
df_2022["game_weight"] = pred_2022

In [None]:
key_team = {
    "adelaide": "AD",
    "brisbaneb": "BB",
    "brisbanel": "BL",
    "carlton": "CA",
    "collingwood": "CW",
    "essendon": "ES",
    "fitzroy": "FI",
    "fremantle": "FR",
    "geelong": "GE",
    "goldcoast": "GC",
    "gws": "GW",
    "hawthorn": "HW",
    "melbourne": "ME",
    "kangaroos": "NM",
    "padelaide": "PA",
    "richmond": "RI",
    "stkilda": "SK",
    "swans": "SY",
    "westcoast": "WC",
    "bullldogs": "WB",
}
# key_team = {value:key for key, value in key_team.items()}
df_2022["team"] = df_2022["team"].replace(key_team)

In [None]:
list_sets = []

for row in df_2022[["team", "opponents", "round"]].iterrows():

    team, opponent, r = row[1]["team"], row[1]["opponents"], str(row[1]["round"])
    ls = [team, opponent, r]
    ls.sort()

    list_sets.append("_".join(ls))

In [None]:
df_2022["game_id"] = list_sets

In [None]:
games = []
for game in df_2022.groupby("game_id"):
    game_df = game[1].nlargest(n=3, columns="game_weight")
    game_df["votes"] = [3, 2, 1]
    games.append(game_df)

game_df = pd.concat(games, axis=0)
game_df.shape

In [None]:
game_df[["player", "votes"]].groupby("player").sum().sort_values(
    "votes", ascending=False
).head(30)