In [None]:
%%capture
!git init

# Uses (restricted) GitHub token to access private repo
# Valid for 30 days starting 6/15/2024
!git remote add origin https://bryjen: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@github.com/WilliamNazarian/Comp472Ai.git
!git fetch origin
!git reset --hard origin/main

In [None]:
%%capture
!pip install -r requirements.txt
!pip install pipe

In [None]:
import os
import sys

import src.kfold.kfold_training_testing as kfold

from src.types import *
from src.models.main_model import OB_05Model
from src.kfold.kfold_training_config import KFoldTrainingConfig

output_dir = "../output/kfold_model"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from torch.utils.data import random_split


transform = transforms.Compose([
    transforms.Resize((90, 90)),  # Resize images to 90x90
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

def get_trainset_colab(use_colored=False):
    return datasets.ImageFolder(root="dataset/cleaned_images", transform=transform)

def split_into_n_sub_datasets_colab(folds: int):
    trainset = datasets.ImageFolder(root=r"dataset/cleaned_images/", transform=transform)
    trainset_len = len(trainset)

    ratio = 1 / folds
    fold_len = int(trainset_len * ratio)
    last_fold_len = trainset_len - (folds - 1) * fold_len

    lengths = ([fold_len] * (folds - 1)) + [last_fold_len]
    return random_split(trainset, lengths)

In [None]:
# logger for output (we can output training data to stdout or a file for example)
logger = logging.getLogger()
logger.setLevel(logging.INFO)

trainset = get_trainset_colab()

model = OB_05Model()

initial_learning_rate = 0.0001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=initial_learning_rate, weight_decay=5e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5)

training_config = KFoldTrainingConfig(
    output_dir=output_dir,
    output_logger=logger,

    dataset=trainset,
    classes=trainset.classes,

    num_folds=10,
    epochs_per_fold=100,
    initial_learning_rate=initial_learning_rate,
    patience=5,

    model_type=OB_05Model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler
)

# K-fold

In [None]:
results_per_fold = kfold.kfold_cross_validation(training_config)

In [None]:
import src.types as tps

# Directly evaluating the results
evaluation_results_list = list(results_per_fold | pipe.select(lambda pair: pair[1]))
df = tps.EvaluationResults.format_evaluation_results_as_df(evaluation_results_list)
df.style

In [None]:
import pickle
import src.types as tps

output_path = "../output/kfold_model/results.pkl"

# Serialize the results into an output file
with open(output_path, "wb") as file:
    pickle.dump(results_per_fold, file)

# Deserialize and load from a file
with open(output_path, "rb") as f:
    results_per_fold = pickle.load(f)

evaluation_results_list = list(results_per_fold | pipe.select(lambda pair: pair[1]))
df = tps.EvaluationResults.format_evaluation_results_as_df(evaluation_results_list)
df.style