In [6]:
#=============================================================================
# Modules
#=============================================================================

# Standard modules
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import os
import torch.utils.data as data
from torchvision.datasets import FashionMNIST   
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim

# Custom modules
from logger import logProgress
from data_loader import loadImages, loadLabels

In [7]:
#=============================================================================
# Variables
#=============================================================================

# Path to the JSON metadata file
metadataFilePath = "configurations.json"

# Pixel normalisation value
pixels = 255.

# Seed for repeatable random initialisations
seed = np.random.seed(123456789)

# Set loss function for DL methods
criterion = nn.CrossEntropyLoss()

In [8]:
 # Logging
logProgress("Starting programme...")

#==========================================================================
# Data loading
#==========================================================================

# Reading the JSON configuration file
logProgress("Importing metadata...")
with open(metadataFilePath, "r") as metadata_file:
    jsonData = json.load(metadata_file)
logProgress("Imported metadata")

# Setting metadata variables
loggingName     = jsonData["loggingName"]
runNumber       = jsonData["runNumber"]
dataPath        = jsonData["dataPath"]
outputFigPath   = jsonData["outputFigPath"]
outputValPath   = jsonData["outputValPath"]
outputModelPath = jsonData["outputModelPath"]

# Update metadata file for next run
logProgress("Updating metadata file...")
jsonData["runNumber"] = str(int(runNumber) + 1)
with open(metadataFilePath, 'w') as metadata_file:
    json.dump(jsonData, metadata_file, indent=4)
logProgress("Updated metadata file")

# Paths to the downloaded data files
trainImagesPath = os.path.join(dataPath, "train-images-idx3-ubyte.gz")
trainLabelsPath = os.path.join(dataPath, "train-labels-idx1-ubyte.gz")
testImagesPath  = os.path.join(dataPath, "t10k-images-idx3-ubyte.gz")
testLabelsPath  = os.path.join(dataPath, "t10k-labels-idx1-ubyte.gz")

# Load the datasets
logProgress("Loading data...")
trainImages = loadImages(trainImagesPath)
trainLabels = loadLabels(trainLabelsPath)
testImages  = loadImages(testImagesPath)
testLabels  = loadLabels(testLabelsPath)
logProgress("Loaded data")


#==========================================================================
# Data pre-processing
#==========================================================================

# pre-process images
logProgress("Pre-processing image data...")
trainImages = trainImages.reshape(trainImages.shape[0], -1) / pixels
testImages = testImages.reshape(testImages.shape[0], -1) / pixels
logProgress("Pre-processed image data")

01 May 2024 21:22:45: Starting programme...
01 May 2024 21:22:45: Importing metadata...
01 May 2024 21:22:45: Imported metadata
01 May 2024 21:22:45: Updating metadata file...
01 May 2024 21:22:45: Updated metadata file
01 May 2024 21:22:45: Loading data...
01 May 2024 21:22:46: Loaded data
01 May 2024 21:22:46: Pre-processing image data...
01 May 2024 21:22:47: Pre-processed image data


In [18]:
# Gaussian Naive-Bayes

from gnb import GaussianNaiveBayes

logProgress("Running Naive-Bayes...")
GNB = GaussianNaiveBayes()
GNB.run(trainImages, trainLabels, testImages, testLabels)
logProgress("Naive-Bayes completed")

GNB.saveModel(f"{outputModelPath}naive_bayes_model_parameters_{runNumber}.txt")
logProgress("Naive-Bayes model saved")

GNB.saveValidation(f"{outputValPath}naive_bayes_validation_results_{runNumber}.txt")
logProgress("Naive-Bayes validation accuracy saved")

30 Apr 2024 20:06:17: Running Naive-Bayes...


IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [6]:
# k-Nearest neighbours

from knn import kNearestNeighbours

logProgress("Training k-Nearest neighbours...")
kNN = kNearestNeighbours()
kNN.train(f"{outputFigPath}{runNumber}", trainImages, trainLabels, kmin=1, kmax=20)
logProgress("k-Nearest neighbours training completed")

logProgress("Validating k-Nearest neighbours...")
kNN.predict(f"{outputFigPath}{runNumber}", trainImages, trainLabels, testImages, testLabels, k=kNN.k)
logProgress("k-Nearest neighbours validation completed")

kNN.saveModel(f"{outputModelPath}{runNumber}_knn_model_parameters_.txt")
logProgress("k-Nearest neighbours model saved")

kNN.saveValidation(f"{outputValPath}{runNumber}_knn_validation_results.txt")
logProgress("k-Nearest neighbours validation accuracy saved")

01 May 2024 02:23:34: Training k-Nearest neighbours...


Training Progress:   0%|          | 0/60000 [00:00<?, ?it/s]

Training Progress:   0%|          | 24/60000 [00:07<5:11:45,  3.21it/s]


KeyboardInterrupt: 

In [10]:
from nn import NeuralNetwork

# Transformations applied on each image => first make them a tensor, then normalize them in the range -1 to 1
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Loading the training dataset. We need to split it into a training and validation part
train_set = FashionMNIST(root="./", train=True, transform=transform, download=True)

# Loading the test set
test_set = FashionMNIST(root="./",train=False, transform=transform, download=True)

# We define a set of data loaders that we can use for various purposes later.
# Note that for actually training a model, we will use different data loaders
# with a lower batch size.
train_loader  = data.DataLoader(train_set, batch_size=64, shuffle=True, drop_last=False)
test_loader   = data.DataLoader(test_set, batch_size=64, shuffle=False, drop_last=False)

logProgress("Training neural network...")
NN = NeuralNetwork()
optimizer = optim.Adam(NN.parameters(), lr=0.001)
NN.train(train_loader, criterion, optimizer, f"{outputFigPath}{runNumber}_")
logProgress("neural network training completed")

logProgress("Validating neural network...")
NN.evaluate(test_loader)
logProgress("Neural network validation completed")

NN.saveModel(f"{outputModelPath}{runNumber}_nn_model.pth")
logProgress("Neural network model saved")

01 May 2024 21:24:08: Training neural network...


AttributeError: module 'torch' has no attribute 'ReLU'