In [None]:
import glob
import os

import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from matplotlib import pyplot as plt
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [None]:
data = pd.read_csv("./train.csv")
data.head()

In [None]:
data['Class'].replace(['YOUNG', 'MIDDLE', 'OLD'], [0, 1, 2], inplace=True)
data.head(3)

In [None]:
def readImage(path, ch=3, resize=(150, 150)):
    img = cv2.imread(path)
    img = cv2.resize(img, resize)
    return img

In [None]:
def load_data(image_path, label):
    img = readImage(image_path, 3, (150, 150))
    return (img, label)

In [None]:
PATH = "./Train"
image_paths = []
for path in os.listdir(PATH):
    image_paths.append(PATH+"/"+path)
print(len(image_paths))

response_list = []

for i in image_paths:
    _, tail = os.path.split(i)
    response = data.loc[data['ID'] == tail]['Class'].values[0]
    response_list.append(response)
print(len(response_list))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    image_paths, response_list, test_size=0.2, random_state=42)

In [None]:
train_data = []
for i in range(len(X_train)):
    train_data.append(load_data(X_train[i], y_train[i]))

test_data = []
for i in range(len(X_test)):
    test_data.append(load_data(X_test[i], y_test[i]))

In [None]:
X_train = []
y_train = []
for i in range(len(train_data)):
    # convert 150 x 150 x 3 image to 3 x 150 x 150
    X_train.append(train_data[i][0].transpose(2, 0, 1))
    y_train.append(train_data[i][1])

X_test = []
y_test = []
for i in range(len(test_data)):
    X_test.append(test_data[i][0].transpose(2, 0, 1))
    y_test.append(test_data[i][1])

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

In [None]:
train_dataset = TensorDataset(torch.from_numpy(
    X_train).float(), torch.from_numpy(y_train).long())
test_dataset = TensorDataset(torch.from_numpy(
    X_test).float(), torch.from_numpy(y_test).long())

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, shuffle=False)

In [None]:
class Model(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.conv1 = nn.Conv2d(3, 30, kernel_size=3,padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.conv2 = nn.Conv2d(30, 64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 37 * 37, 64)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(64, 3)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        x = self.softmax(x)

        return x
    
model = Model()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
import copy
def train(model, train_loader, optimizer, criterion,epochs = 10):
    train_losses = []
    train_acc = []
    val_acc = []
    val_losses = []
    best_val_acc = 0
    best_model = None
    for epoch in range(epochs):
        model.train()
        batch_losses = []
        batch_acc = []
        for i, (X, y) in enumerate(train_loader):
            optimizer.zero_grad()
            y_hat = model(X)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()
            batch_losses.append(loss.item())
            acc = (y_hat.argmax(1) == y).float().mean()
            batch_acc.append(acc)
        train_losses.append(batch_losses)
        train_acc.append(batch_acc)

        print(f"Epoch - {epoch+1} Train-Loss : {np.mean(train_losses[-1])} Train-Accuracy : {np.mean(train_acc[-1])}")

        model.eval()
        val_batch_losses = []
        val_batch_acc = []
        for i, (X, y) in enumerate(test_loader):
            y_hat = model(X)
            loss = criterion(y_hat, y)
            val_batch_losses.append(loss.item())
            acc = (y_hat.argmax(1) == y).float().mean()
            val_batch_acc.append(acc)
        val_losses.append(val_batch_losses)
        val_acc.append(val_batch_acc)
        print(f"Epoch - {epoch+1} Val-Loss : {np.mean(val_losses[-1])} Val-Accuracy : {np.mean(val_acc[-1])}")
        if np.mean(val_acc[-1]) > best_val_acc:
            best_val_acc = np.mean(val_acc[-1])
            best_model = copy.deepcopy(model)
    return train_losses, train_acc, val_losses, val_acc, best_model

train_losses, train_acc, val_losses, val_acc, best_model = train(model, train_loader, optimizer, criterion, epochs=10)