In [None]:
import pandas as pd
import torch
from sqlalchemy import create_engine
from sqlalchemy.sql import text
from fastai.tabular.all import *
import torch.nn as nn
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay



In [None]:
if torch.backends.mps.is_available():
    selected_device = torch.device("mps")
    print("Using Apple Silicon")
else:
    selected_device = torch.device("cpu")
    print("MPS not available, using CPU")

In [None]:
engine = create_engine("postgresql+psycopg2://admin:admin@localhost:5432/SYAS")
sql_query = text("SELECT * FROM matches_values")
with engine.connect() as conn:
    df = pd.read_sql(sql_query, conn)
print(df.head())

In [None]:
cont_names = df.select_dtypes(include='number').columns.to_list()
cont_names

In [None]:
dep_var = 'match_status'
cat_names = [col for col in df.columns if col not in cont_names + [dep_var]]
cat_names

In [None]:
procs = [Categorify, FillMissing, Normalize]
dls = TabularDataLoaders.from_df(
    df,
    path='.',
    procs=procs,
    cat_names=cat_names,
    cont_names=cont_names,
    y_names=dep_var,
    valid_pct=0.2,
    seed=42,
    device=selected_device,
    y_block=CategoryBlock
)

In [None]:
dls.vocab

In [None]:
dls.show_batch()

In [None]:
# learn = tabular_learner(dls, metrics=F1Score(pos_label=0), loss_func=FocalLossFlat())
# learn.fit_one_cycle(10, cbs= [
#     EarlyStoppingCallback(monitor='valid_loss', patience=2),
#     SaveModelCallback(monitor='valid_loss')
# ])

Focal loss failed, so I'll switch to using custom class weights in the loss function 

In [None]:
train_y = dls.train_ds.items['match_status']
counts = Counter(train_y)
num_classes = len(counts)
total_samples = sum(counts.values())
weights = []
for i in range(num_classes):
    weight = total_samples / (num_classes * counts[i])
    weights.append(weight)
class_weights = torch.tensor(weights, dtype=torch.float32).to(dls.device)
# manual_weights = torch.tensor([25.0, 0.54], dtype=torch.float32).to(dls.device)
weighted_loss_func = nn.CrossEntropyLoss(weight=class_weights)
def squeezed_loss_func(preds, targs, **kwargs):
    # Target tensor has too many dimensions
    return weighted_loss_func(preds, targs.squeeze(), **kwargs)
print(f"Calculated Weights (for class 0, then 1): {class_weights}")

In [None]:
learn = tabular_learner(dls, metrics=F1Score(pos_label=0), loss_func=squeezed_loss_func)
suggestions = learn.lr_find(suggest_funcs=(valley, slide))

suggestions

In [None]:
learn.fit_one_cycle(1, lr_max=0.0004, cbs= [
    EarlyStoppingCallback(monitor='f1_score', patience=2),
    SaveModelCallback(monitor='f1_score')
])

In [None]:
preds, targs = learn.get_preds(ds_idx=1)
predicted_classes = preds.argmax(dim=1)
cm = confusion_matrix(targs, predicted_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=dls.vocab)
disp.plot(cmap=plt.cm.Blues)
plt.show()

In [None]:
learn.save("changed_weights_improved_lr")

In [None]:
learn.fit_one_cycle(1, lr_max=0.001445, cbs= [
    EarlyStoppingCallback(monitor='f1_score', patience=2),
    SaveModelCallback(monitor='f1_score')
])