# Import Modules

In [1]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytz

from datetime import datetime
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms, datasets
from torchmetrics import Accuracy

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

# Dataset

In [2]:
transform = transforms.Compose([
    transforms.Lambda(lambda x: (x / 255) - 0.5),
])

In [3]:
train_dataset = pd.read_csv("../datasets/train.csv")
test_dataset = pd.read_csv("../datasets/test.csv")

X_train = transform(torch.FloatTensor(train_dataset.loc[:, train_dataset.columns != 'label'].to_numpy()))
y_train = torch.LongTensor(train_dataset.label.to_numpy())

X_test = transform(torch.FloatTensor(test_dataset.loc[:, test_dataset.columns != 'label'].to_numpy()))

In [4]:
X_train.shape, y_train.shape

(torch.Size([42000, 784]), torch.Size([42000]))

In [5]:
X_test.shape

torch.Size([28000, 784])

# Tensor Dataset

In [6]:
train_dataset = TensorDataset(X_train, y_train)

# Dataloder

In [7]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Model

In [8]:
class CustomModel(nn.Module):
    def __init__(self, width_img, height_img):
        super(CustomModel, self).__init__()
        self.feature = nn.Sequential(
            nn.Linear(in_features=width_img * height_img, out_features=1024),
            nn.Dropout(p=0.6),
            nn.LeakyReLU(),
            nn.Linear(in_features=1024, out_features=2048),
            nn.LeakyReLU(),
            nn.Linear(in_features=2048, out_features=10)
        )
        self.output = nn.Softmax(dim=1)
  
    def forward(self, x):
        x_feature = self.feature(x)
        x_output = self.output(x_feature)
        
        return x_output
    
    def get_feature(self, x):        
        return self.feature[0:4](x)

def init_wb_kaiming_uniform(layer):
    if isinstance(layer, nn.Linear):
        nn.init.kaiming_uniform_(layer.weight.data, a=0, mode="fan_in", nonlinearity="leaky_relu")
        nn.init.kaiming_uniform_(layer.bias.data.reshape(layer.bias.data.shape[0], 1), a=0, mode="fan_in", nonlinearity="leaky_relu")    

model = CustomModel(28, 28).to(device)
model.apply(init_wb_kaiming_uniform)
model.to(device)

CustomModel(
  (feature): Sequential(
    (0): Linear(in_features=784, out_features=1024, bias=True)
    (1): Dropout(p=0.6, inplace=False)
    (2): LeakyReLU(negative_slope=0.01)
    (3): Linear(in_features=1024, out_features=2048, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Linear(in_features=2048, out_features=10, bias=True)
  )
  (output): Softmax(dim=1)
)

# Eval Metric

In [9]:
eval_metric_train = Accuracy().to(device)
eval_metric_test = Accuracy().to(device)
eval_metric_train, eval_metric_test

(Accuracy(), Accuracy())

# Loss Metric

In [10]:
loss_metric = nn.CrossEntropyLoss().to(device)
loss_metric

CrossEntropyLoss()

# Optimizer

In [11]:
optimizer = optim.AdamW(model.parameters(), lr=6.25e-05)
optimizer

AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 6.25e-05
    maximize: False
    weight_decay: 0.01
)

# Training Step

In [12]:
def training_step(dataloader, model, loss_metric, eval_metric, optimizer, show_metric_every):
    model.train()
    losses = []
    for batch, (data, actual_labels) in enumerate(train_dataloader, 1):
        data = data.to(device)
        actual_labels = actual_labels.to(device)
        
        # Forward Propagation
        pred_labels = model(data)
        loss = loss_metric(pred_labels, actual_labels)
        evaluate = eval_metric(pred_labels, actual_labels)
        losses.append(loss)
        
        if batch % show_metric_every == 0 or batch == len(train_dataloader):
            print(f"Batch {batch} >> loss: {loss:.3f} | acc: {evaluate:.3f}")
        
        # Backward Propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    mean_loss = torch.mean(torch.tensor(losses))
    mean_metric = eval_metric.compute()
    
    return mean_loss, mean_metric

# Testing Step

In [13]:
def testing_step(dataloader, model, loss_metric, eval_metric, optimizer, show_metric_every):
    model.eval()
    losses = []
    for batch, (data, actual_labels) in enumerate(test_dataloader, 1):
        data = data.to(device)
        actual_labels = actual_labels.to(device)
        
        # Forward Propagation
        pred_labels = model(data)
        loss = loss_metric(pred_labels, actual_labels)
        evaluate = eval_metric(pred_labels, actual_labels)
        losses.append(loss_metric)
        
        if batch % show_metric_every == 0 or batch == len(test_dataloader):
            print(f"Batch {batch} >> loss: {loss:.3f} | acc: {evaluate:.3f}")

    mean_loss = torch.mean(torch.tensor(losses))
    mean_metric = eval_metric.compute()
    
    return mean_loss, mean_metric

# Fitting Step

In [14]:
import os

now = datetime.now(tz=pytz.timezone("Asia/Makassar"))
now = now.strftime("%m_%d_%Y-%H_%M_%S")
os.makedirs(f"../callbacks/{now}/epochs", exist_ok=True)

def fitting_step(n_epoch, n_penalty, train_dataloader, model, loss_metric, eval_metric_train, eval_metric_test, optimizer, device, show_metric_every):
    epoch = 1
    n_current_penalty = 0
    train_losses, train_metrics = [], []
    # test_losses, test_metrics = [], []
    
    while True:
        print(f"EPOCH {epoch}")
        print("=" * 40)
        print(f"Train")
        train_loss, train_metric = training_step(train_dataloader, model, loss_metric, eval_metric_train, optimizer, show_metric_every)
        train_losses.append(train_loss.to("cpu"))
        train_metrics.append(train_metric.to("cpu"))
        print(f"Average train loss : {train_loss:.3f}")
        print(f"Average train acc  : {train_metric:.3f}\n")
        
        # print(f"Test")
        # test_loss, test_metric = testing_step(test_dataloader, model, losseval_metric, eval_metric_test, optimizer)
        # test_losses.append(test_loss)
        # test_metrics.append(test_metric)
        # print(f"Average test loss : {test_loss:.3f}")
        # print(f"Average test acc  : {test_metric:.3f}\n")

        if epoch != 1:
            if train_losses[-1] > train_losses[-2]:
                n_current_penalty += 1
                print(f"Not improve! Number of penalty = {n_current_penalty}/{n_penalty}! 😔")
            elif train_losses[-1] < train_losses[-2] and n_current_penalty != 0:
                n_current_penalty -= 1
                print(f"Improve! 😄")
            else:
                print(f"Improve! 😄")

            if n_current_penalty == n_penalty:
                print(f"Number of penalty = {n_current_penalty}/{n_penalty}!, training stopped!")
                break
                
            # if train_losses[-1] < test_losses[-1]:
            #     print("Model overfit! Training stopped.")
            #     break
        print("=" * 40 + "\n")
        torch.save(model.state_dict(), f"../callbacks/{now}/epochs/{str(epoch).zfill(4)}.pth")
        epoch += 1
        eval_metric_train.reset()
        eval_metric_test.reset()
    torch.save(model.state_dict(), f"../callbacks/{now}/epochs/{str(epoch).zfill(4)}.pth")
    print("Done!")
    return train_losses, train_metrics

train_losses, train_metrics = fitting_step(2, 3, train_dataloader, model, loss_metric, eval_metric_train, eval_metric_test, optimizer, device, 20)

EPOCH 1
Train
Batch 20 >> loss: 2.291 | acc: 0.156
Batch 40 >> loss: 2.271 | acc: 0.188
Batch 60 >> loss: 2.180 | acc: 0.266
Batch 80 >> loss: 2.070 | acc: 0.438
Batch 100 >> loss: 2.105 | acc: 0.375
Batch 120 >> loss: 2.033 | acc: 0.453
Batch 140 >> loss: 1.963 | acc: 0.547
Batch 160 >> loss: 1.914 | acc: 0.562
Batch 180 >> loss: 1.914 | acc: 0.531
Batch 200 >> loss: 1.890 | acc: 0.656
Batch 220 >> loss: 1.931 | acc: 0.547
Batch 240 >> loss: 1.835 | acc: 0.656
Batch 260 >> loss: 1.954 | acc: 0.500
Batch 280 >> loss: 1.826 | acc: 0.641
Batch 300 >> loss: 1.799 | acc: 0.672
Batch 320 >> loss: 1.716 | acc: 0.797
Batch 340 >> loss: 1.696 | acc: 0.781
Batch 360 >> loss: 1.754 | acc: 0.750
Batch 380 >> loss: 1.815 | acc: 0.641
Batch 400 >> loss: 1.798 | acc: 0.672
Batch 420 >> loss: 1.762 | acc: 0.688
Batch 440 >> loss: 1.710 | acc: 0.766
Batch 460 >> loss: 1.803 | acc: 0.688
Batch 480 >> loss: 1.750 | acc: 0.703
Batch 500 >> loss: 1.739 | acc: 0.781
Batch 520 >> loss: 1.673 | acc: 0.781
Ba

KeyboardInterrupt: 

# Save Model

In [None]:
torch.save(model.state_dict(), f"../pretrained_models/{now}.pth")

# Evaluate

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=2, tight_layout=True, figsize=(10, 10))

ax[0][0].set_title("Train Accuracy")
ax[0][0].plot(train_metrics, label="Accuracy")
ax[0][0].set_xlabel("Epoch")
ax[0][0].set_ylabel("Score")
ax[0][0].grid()
ax[0][0].legend()

ax[0][1].set_title("Train Loss")
ax[0][1].plot(train_losses, label="Loss")
ax[0][1].set_xlabel("Epoch")
ax[0][1].set_ylabel("Score")
ax[0][1].grid()
ax[0][1].legend();

# ax[1][0].set_title("Train and Test Loss")
# ax[1][0].plot(train_losses, label="Train")
# ax[1][0].plot(test_losses, label="Test")
# ax[1][0].set_xlabel("Epoch")
# ax[1][0].set_ylabel("Score")
# ax[1][0].grid()
# ax[1][0].legend()

# ax[1][1].set_title("Train and Test Accuracy")
# ax[1][1].plot(train_metrics, label="Train")
# ax[1][1].plot(test_metrics, label="Test")
# ax[1][1].set_xlabel("Epoch")
# ax[1][1].set_ylabel("Score")
# ax[1][1].grid()
# ax[1][1].legend();

## X_train

In [None]:
# os.makedirs(f"../callbacks/{now}/images", exist_ok=True)
# model.eval()
# n_image = 900
# idx_start = np.random.randint(0, len(X_test) - 1)
# features = X_train[idx_start:idx_start+n_image].to(device)
# actual_labels = y_train[idx_start:idx_start+n_image].to(device)
# pred_labels = model(features).argmax(1)

# plt.figure(figsize=(n_image**0.5, n_image**0.5), tight_layout=True)
# for i in range(1, n_image + 1):
#     plt.subplot(int(n_image**0.5), int(n_image**0.5), i)
#     color_text = "green" if pred_labels[i-1] == actual_labels[i-1] else "red"
#     plt.title(f"Pred: {pred_labels[i-1]} | Actual {actual_labels[i-1]}", size=7, color=color_text)
#     plt.imshow(features[i-1].reshape((28, 28)).to("cpu"), cmap=plt.cm.gray)
#     plt.axis("off")
# plt.savefig(f"../callbacks/{now}/images/X_train_prediction.jpeg", dpi=100)

## X_test

In [None]:
# os.makedirs(f"../callbacks/{now}/images", exist_ok=True)
# model.eval()
# n_image = 900
# idx_start = np.random.randint(0, len(X_test) - 1)
# features = X_test[idx_start:idx_start+n_image].to(device)
# pred_labels = model(features).argmax(1)

# plt.figure(figsize=(n_image**0.5, n_image**0.5), tight_layout=True)
# for i in range(1, n_image + 1):
#     plt.subplot(int(n_image**0.5), int(n_image**0.5), i)
#     plt.title(f"Pred: {pred_labels[i-1]}", color="black")
#     plt.imshow(features[i-1].reshape((28, 28)).to("cpu"), cmap=plt.cm.gray)
#     plt.axis("off")
# plt.savefig(f"../callbacks/{now}/images/X_test_prediction.jpeg", dpi=100)

# Predict

In [None]:
# pred_labels = model(X_test.to(device)).argmax(1)
# submission_df = pd.DataFrame({
#     "ImageId": np.arange(1, 28001),
#     "Label": pred_labels.to("cpu")
# })

# submission_df.to_csv(f"../submissions/{now}.csv", index=False)

In [None]:
model.load_state_dict(torch.load(f"../callbacks/{now}/epochs/0074.pth"))
model.eval()
pred_labels = model(X_test.to(device)).argmax(1)
submission_df = pd.DataFrame({
    "ImageId": np.arange(1, len(pred_labels) + 1),
    "Label": pred_labels.to("cpu")
})

submission_df.to_csv(f"../submissions/{now}.csv", index=False)

In [None]:
sum([params.numel() for params in rnn.parameters() if params.requires_grad == True])

In [None]:
for name, params in model.named_parameters():
    if params.requires_grad:
        print(f"{name}: {params.numel()} params")

In [None]:
for name, params in rnn.named_parameters():
    
    if params.requires_grad:
        print(f"{type(name)}: {params.numel()}")