In [1]:
from DataLoader import DataLoader
import GraphManager
from GNN import HeteroGNN
import numpy as np
import pandas as pd
import torch
import torch.optim
import torch_geometric
import torch_geometric.data
import networkx as nx
import matplotlib.pyplot as plt
import Utils
from tqdm import tqdm
import pickle
import torchinfo

In [2]:
dl = DataLoader('data/KaggleDataset.csv', 11)
gm = GraphManager.GraphManager(dl, Utils.GLOBALS.DEVICE.value)
model = HeteroGNN(
    embedding_dims=[dl.entities.shape[0], Utils.HYPERPARAETERS.EmbeddingDim.value],
    conv_dims=Utils.HYPERPARAETERS.ConvDims.value,
    fc_dims=Utils.HYPERPARAETERS.FCDims.value,
    dropout=Utils.HYPERPARAETERS.DropOuts.value
).to(Utils.GLOBALS.DEVICE.value)

criterion = torch.nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=Utils.HYPERPARAETERS.LearningRate.value) 

#Put True to load the Graphs from Load Folder - Put False to make the graphs, save them in Save Folder and use them
already_saved = False

#Put True to Load Model and loss Lists from Load Folder - Put False to start a new training
# ATTENTION: The result of the training will be saved in Save Folder ANYWAYS!!! - Copy Your Work Before Starting
continue_training = False

In [3]:
torchinfo.summary(model, depth=3)

Layer (type:depth-idx)                   Param #
HeteroGNN                                --
├─Embedding: 1-1                         87,048
├─ModuleList: 1-2                        --
│    └─HeteroConv: 2-1                   --
│    │    └─ModuleDict: 3-1              1,360
│    └─HeteroConv: 2-2                   --
│    │    └─ModuleDict: 3-2              2,512
│    └─HeteroConv: 2-3                   --
│    │    └─ModuleDict: 3-3              2,512
├─ModuleList: 1-3                        --
│    └─Linear: 2-4                       528
│    └─Linear: 2-5                       136
│    └─Linear: 2-6                       27
├─LogSoftmax: 1-4                        --
Total params: 93,499
Trainable params: 93,499
Non-trainable params: 0

In [4]:
if not already_saved:
    for league, league_df in dl.dataset.groupby('league'):
        print(f'Making {league} Graphs...')
        gm.make(
            league_df,
            mode='CG',
            validation_portion=0.1,
            test_portion=0.05,
            saveto=f'{Utils.GLOBALS.SavePath.value}{league}.gm')
        print(f'Graph List Length: {len(gm.graph_list)}')
        print(f'Train Mask: {len(gm.train_mask)}')
        print(f'Validation Mask: {len(gm.validation_mask)}')
        print(f'Test Mask: {len(gm.test_mask)}')
        print(f'Saved in: {Utils.GLOBALS.SavePath.value}{league}.gm')
        print('='*80)

In [5]:
def train_step(
    model: HeteroGNN,
    g: torch_geometric.data.HeteroData,
    criterion,
    optimizer: torch.optim.Optimizer
):
    model.train()
    optimizer.zero_grad()

    out = model(g)
    loss = criterion(out, g.y)
    loss.backward()
    optimizer.step()

    pred = torch.argmax(out, dim=-1)
    correct = (pred == g.y).sum().item()
    total = g.y.shape[0]

    return loss.item(), correct, total


@torch.no_grad()
def evaluation(model: HeteroGNN, g: torch_geometric.data.HeteroData):
    model.eval()

    out = model(g)
    pred = torch.argmax(out, dim=-1)
    correct = (pred == g.y).sum().item()
    total = g.y.shape[0]

    model.train()
    return correct, total


In [6]:
if continue_training:
    model = torch.load(f'{Utils.GLOBALS.LoadPath.value}model.pth')
    with open(f'{Utils.GLOBALS.LoadPath.value}lists.pl', 'rb') as pf:
        loss_list, train_acc_list, eval_acc_list = pickle.load(pf)
else:
    model.reset_parameters()
    loss_list = []
    train_acc_list = []
    eval_acc_list = []

In [7]:
for league, league_df in dl.dataset.groupby('league'):
    print(f'Training On: {league}')
    if already_saved:
        gm = GraphManager.load(f'{Utils.GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{Utils.GLOBALS.SavePath.value}{league}.gm')
    try:
        for epoch in range(100):
            t_loss = 0
            t_correct = 0
            t_total = 0
            for idx in gm.train_mask:
                g = gm.graph_list[idx]
                loss, correct, total = train_step(model, g, criterion, optimizer)
                t_loss += loss
                t_correct += correct
                t_total += total
            print(f'=================================== EPOCH {epoch + 1} ===================================')
            print(f'Average Loss: {t_loss / len(gm.train_mask)} - Train Accuracy: {t_correct / t_total: .3f}')
            loss_list.append(t_loss / len(gm.train_mask))
            train_acc_list.append(t_correct / t_total)

            t_correct = 0
            t_total = 0

            for idx in gm.validation_mask:
                g = gm.graph_list[idx]
                correct, total = evaluation(model, g)
                t_correct += correct
                t_total += total
            print(f'Validation Accuracy: {t_correct / t_total: .3f}')
            eval_acc_list.append(t_correct / t_total)

            if (epoch+1) % Utils.GLOBALS.SaveEvery.value == 0:
                torch.save(model, f'{Utils.GLOBALS.SavePath.value}model.pth')
                with open(f'{Utils.GLOBALS.SavePath.value}lists.pl', 'wb') as pf:
                    pickle.dump((loss_list, train_acc_list, eval_acc_list), pf)
                    
    except KeyboardInterrupt:
        pass
    t_correct = 0
    t_total = 0

    for idx in gm.test_mask:
        g = gm.graph_list[idx]
        correct, total = evaluation(model, g)
        t_correct += correct
        t_total += total
    print(f'Test Accuracy: {t_correct / t_total: .3f}')

Training On: Belgium Jupiler League
Average Loss: 1.2409981857646595 - Train Accuracy:  0.387
Validation Accuracy:  0.461
Average Loss: 1.0813993417855465 - Train Accuracy:  0.443
Validation Accuracy:  0.461
Average Loss: 1.0635774518504286 - Train Accuracy:  0.463
Validation Accuracy:  0.461
Average Loss: 1.061327347610936 - Train Accuracy:  0.457
Validation Accuracy:  0.461
Average Loss: 1.0537932713826497 - Train Accuracy:  0.464
Validation Accuracy:  0.461
Average Loss: 1.0624862641999215 - Train Accuracy:  0.463
Validation Accuracy:  0.461
Average Loss: 1.062943082144766 - Train Accuracy:  0.461
Validation Accuracy:  0.461
Average Loss: 1.0561287077990444 - Train Accuracy:  0.467
Validation Accuracy:  0.461
Average Loss: 1.0540829174446338 - Train Accuracy:  0.463
Validation Accuracy:  0.461
Average Loss: 1.0536478356881576 - Train Accuracy:  0.462
Validation Accuracy:  0.461
Average Loss: 1.0521464759653265 - Train Accuracy:  0.467
Validation Accuracy:  0.461
Average Loss: 1.0599

In [8]:
for league, league_df in dl.dataset.groupby('league'):
    print(f'Testing On: {league}')
    if already_saved:
        gm = GraphManager.load(f'{Utils.GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{Utils.GLOBALS.SavePath.value}{league}.gm')
    
    t_correct = 0
    t_total = 0

    for idx in gm.test_mask:
        g = gm.graph_list[idx]
        correct, total = evaluation(model, g)
        t_correct += correct
        t_total += total
    print(f'Test Accuracy: {t_correct / t_total: .3f}')

Testing On: Belgium Jupiler League
Test Accuracy:  0.452
Testing On: England Premier League
Test Accuracy:  0.433
Testing On: France Ligue 1
Test Accuracy:  0.486
Testing On: Germany 1. Bundesliga
Test Accuracy:  0.484
Testing On: Italy Serie A
Test Accuracy:  0.500
Testing On: Netherlands Eredivisie
Test Accuracy:  0.495
Testing On: Poland Ekstraklasa
Test Accuracy:  0.448
Testing On: Portugal Liga ZON Sagres
Test Accuracy:  0.539
Testing On: Scotland Premier League
Test Accuracy:  0.513
Testing On: Spain LIGA BBVA
Test Accuracy:  0.524
Testing On: Switzerland Super League
Test Accuracy:  0.551
