## Neural Network Notebook for Object Classification with SDSS DR18

Within this notebook, we classify objects within SDSS DR18 using a neural network.

In [None]:
import kagglehub #used to get the data from kaggle.com
from kagglehub import KaggleDatasetAdapter #used to fetch the specific dataset from kaggle
import pandas as pd #used for viewing and manipulating the data
import matplotlib.pyplot as plt #used for data visulisation

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split as skl_tts
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report as skl_cr
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import sys, os #imports sys and os, allows for modifying the path to get functions
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) #changes the path to include the py folder and its contents (parent folder)

from functions import * #calls all functions from functions.py

ImportError: cannot import name 'skl_tts' from 'sklearn.model_selection' (/home/codespace/.local/lib/python3.12/site-packages/sklearn/model_selection/__init__.py)

In [None]:
data = dataGrabber()

In [None]:
pd.set_option('display.max_columns', None)
data.head()

In [None]:
data['class'].value_counts().plot(kind='bar', color = ['forestgreen', 'goldenrod', 'darkcyan'])
plt.title("Class Distribution")
plt.xlabel("Class")
plt.ylabel("Count")
plt.xticks(rotation = 0)
plt.show()

In [None]:
features = ['u', 'g', 'r', 'i', 'z']

x = data[features]
y = data['class']

labelEncoder = LabelEncoder()
yEncoded = labelEncoder.fit_transform(y)

scaler = StandardScaler()
xScaled = scaler.fit_transform(x)

test_size = 0.2
random_state = 12
xTrain, xTest, yTrain, yTest = dataSplitting(xScaled, yEncoded, test_size, random_state)

xTrainTensor = torch.tensor(xTrain, dtype = torch.float32) 
yTrainTensor = torch.tensor(yTrain, dtype = torch.long) 
xTestTensor = torch.tensor(xTest, dtype = torch.float32) 
yTestTensor = torch.tensor(yTest, dtype = torch.long)

trainData = TensorDataset(xTrainTensor, yTrainTensor)
testData = TensorDataset(xTestTensor, yTestTensor)
trainLoader = DataLoader(trainData, batch_size = 64, shuffle = True)
testLoader =  DataLoader(testData, batch_size = 64, shuffle = False)

In [None]:
model = NeuralNetworkClassifier()

In [None]:
criteria = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr = 0.001)

epochs = 25
lossHistory = modelTraining(model, trainLoader, criteria, optimiser, epochs)

In [None]:
plt.plot(range(1, len(lossHistory) + 1), lossHistory, marker = 'o', color = 'g')
plt.title("Training Loss over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.axhline(y = 0.170, xmin = 0, xmax = epochs, linestyle= '-', color = 'r', label = "Loss ~ 0.170")
plt.legend()
plt.grid()
plt.show()

In [None]:
modelEvaluationNN(model, testLoader, labelEncoder)