# MSA 2024 Phase 2 - Part 3

In [1]:
import tensorflow as tf
import numpy as np

ModuleNotFoundError: No module named 'tensorflow'

### 1. Data loading & preprocessing

In [5]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from tensorflow.keras.utils import to_categorical

train_dir = 'train'
test_dir = 'test'

def loadTrain(root_dir, csv_file):
    ids = []
    images = []
    labels = []
    annotations = pd.read_csv(csv_file)
    for idx, row in annotations.iterrows():
        img_id = int(row['id'])
        img_name = os.path.join(root_dir, f"image_{img_id}.png")
        image = np.array(Image.open(img_name).convert("RGB"))
        label = int(row['label'])

        ids.append(img_id)
        images.append(image)
        labels.append(label)
    return np.array(ids), np.array(images), np.array(labels)

# Load test data
def loadTest(root_dir):
    ids = []
    images = []
    for img_name in os.listdir(root_dir):
        img_id = int(img_name.split('_')[1].split('.')[0])
        img_path = os.path.join(root_dir, img_name)
        image = np.array(Image.open(img_path).convert("RGB"))

        ids.append(img_id)
        images.append(image)
    return np.array(ids), np.array(images)

# Load training, testing data and the training labels provided in train.csv
train_csv = '/Users/elliotbu/Desktop/MSA 2024/3. Deep Learning/train.csv'
id_train, X_train, y_train = loadTrain(train_dir, train_csv)
id_test, X_test = loadTest(test_dir)

# Normalize the data and reshape
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
X_train = X_train.reshape(-1, 32*32*3)  
X_test = X_test.reshape(-1, 32*32*3)    

# Convert training labels to one-hot encoded vectors
y_train = to_categorical(y_train, 10)

### 2. Build & train the model

In [6]:
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()

# Build the MLP model with dropout layers
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

# Compile the model 
model.compile(loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model for more epochs to ensure accuracy
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Make predictions on the test set
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

submission = pd.DataFrame({'id': id_test, 'label': predicted_labels})
submission.to_csv('submission.csv', index=False)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.0980 - loss: 2.4114 - val_accuracy: 0.1014 - val_loss: 2.3026
Epoch 2/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.1024 - loss: 2.3026 - val_accuracy: 0.0977 - val_loss: 2.3026
Epoch 3/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.0984 - loss: 2.3028 - val_accuracy: 0.1022 - val_loss: 2.3027
Epoch 4/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.0991 - loss: 2.3027 - val_accuracy: 0.0980 - val_loss: 2.3028
Epoch 5/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.1021 - loss: 2.3027 - val_accuracy: 0.0952 - val_loss: 2.3027
Epoch 6/20
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.0980 - loss: 2.3028 - val_accuracy: 0.0980 - val_loss: 2.3027
Epoch 7/20
[1m1

## 3. Summary 

Exploratory Data Analysis (EDA):

In this notebook, the focus shifts to classification tasks using neural networks. The dataset used is CIFAR-10, a standard dataset for image classification. The EDA involves loading and exploring the CIFAR-10 dataset, understanding its structure, and visualizing sample images to get an idea of the data distribution and class balance. Basic statistics about the dataset, such as the number of classes, image dimensions, and the number of samples, are examined.

Preprocessing Steps:

Preprocessing for neural network models includes transforming the image data into a suitable format for model training. This involves normalizing the pixel values, converting labels to categorical format using one-hot encoding, and splitting the dataset into training and testing sets. The preprocessing steps ensure that the data is in the correct format for feeding into a neural network model. Additionally, data augmentation techniques may be applied to enhance the model's ability to generalize by artificially expanding the training dataset through transformations like rotation, flipping, and scaling.

Model Implementation and Accuracy Evaluation:

A simple Multi-Layer Perceptron (MLP) model is built using TensorFlow and Keras. The model is compiled with the Adam optimizer and sparse categorical cross-entropy loss function. Training involves running the model for a fixed number of epochs while monitoring validation accuracy. After training, the model's performance is evaluated on the test set to ensure it generalizes well. The results, including accuracy, are saved to submission.csv for further evaluation. Accuracy is calculated by comparing the predicted labels with the actual labels in the test set, providing a quantitative measure of the model's performance.