# The code will implement the training pipeline for the CNN regressor mentioned in https://www.nature.com/articles/s41598-021-02387-9

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## IMPORT

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import matplotlib.pyplot as plt
import json
import codecs
from tqdm import tqdm

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("We're using:", device)

We're using: cpu


In [3]:
%cd drive/Shareddrives/CIS519_Spring2023/

/content/drive/Shareddrives/CIS519_Spring2023


## FASTER DATALOADER (USE IT)

In [4]:
class AnimalCountDataset(Dataset):
        def __init__(self, image_label, image_data, transform=None):
            self.transform = transform
            self.label = image_label
            self.image = image_data
        def __len__(self):
          
            return len(self.label) 
            
        def __getitem__(self, idx):
            
            if torch.is_tensor(idx):
                idx = idx.tolist()

            image = torch.from_numpy(self.image[idx])
            image = image.permute(2, 0, 1)
            image = image.to(torch.float32)
            # Transform the image using self.transform
            if self.transform:
                image = self.transform(image)
            count = torch.tensor(self.label[idx], dtype = torch.float32)
            sample = (image, count)
            
            return sample

## preload data

In [5]:
image_save_path = "/content/drive/Shareddrives/CIS519_Spring2023/image.npy"
label_save_path = "/content/drive/Shareddrives/CIS519_Spring2023/label.npy"
label = np.load(label_save_path)
image = np.load(image_save_path)

In [6]:
transform = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

dataset = AnimalCountDataset(label, image, transform=transform)

# define the sizes of the training, testing, and validation datasets
train_size = int(0.6 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size

# use the random_split function to split the dataset into training, validation, and testing datasets
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# create data loaders for each dataset
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [7]:
print(image.shape)

(5000, 224, 224, 3)


## CNN Architecture

### Train pretrained model

In [8]:
import torch.nn as nn
import torchvision.models as models

# Load a pre-trained ResNet-18 model
resnet = models.resnet34(pretrained=True)

# Freeze all layers except the last two
for param in resnet.parameters():
    param.requires_grad = False
for param in resnet.fc.parameters():
    param.requires_grad = True

# Replace the last fully connected layer with a regression layer
resnet.fc = nn.Linear(resnet.fc.in_features, 1)
resnet = resnet.to(device)



### Train the whole model

In [9]:
import torch.nn as nn
import torchvision.models as models

# Load a pre-trained ResNet-18 model
resnet = models.resnet18(pretrained=False)

# Replace the last fully connected layer with a regression layer
resnet.fc = nn.Linear(resnet.fc.in_features, 1)
resnet = resnet.to(device)



## Train, validation, test

In [None]:
# Define the loss function and optimizer,
criterion = nn.HuberLoss()
optimizer = torch.optim.SGD(resnet.parameters(), lr=0.001, momentum= 0.9)

# Train the model
train_loss, validation_loss = [], []
# train_acc, validation_acc = [], []


# Note that we have set the number of epochs to be 10. You can choose to increase or decrease the number of epochs.
num_epochs = 30 
for epoch in range(num_epochs):
    
    resnet.train()
    running_loss = 0.
    # running_acc = 0

    for i, data in enumerate(train_dataloader):

        inputs, labels = data
        # 1. Store the inputs and labels in the GPU
        inputs = inputs.to(device)
        labels = labels.to(device)
        labels = labels.flatten()
        #print(labels)
        # 2. Get the model predictions
        predictions = resnet(inputs)
        predictions = predictions.flatten()
        #print(predictions)
        # 3. Zero the gradients out
        optimizer.zero_grad()

        # 4. Get the loss
        loss = criterion(predictions, labels)

        # 5. Calculate the gradients
        loss.backward()

        # 6. Update the weights
        optimizer.step()
            
        running_loss += loss.item()
        # acc = torch.abs(predictions-labels)/labels
        # acc = torch.sum(acc)
        # running_acc += acc.item()

    # train_acc.append(running_acc/ len(train_dataloader))
    train_loss.append(running_loss / len(train_dataloader))
            
    resnet.eval()
    running_loss = 0
    # running_acc = 0
    
    for i, data in enumerate(val_dataloader, 0):

        inputs, labels = data
        # 1. Store the inputs and labels in the GPU
        inputs = inputs.to(device)
        labels = labels.to(device)
        labels = labels.flatten()
        
        # 2. Get the model predictions
        predictions = resnet(inputs)
        predictions = predictions.flatten()
        
        # 3. Get the loss
        loss = criterion(predictions, labels)

        running_loss += loss.item()
        # acc = torch.abs(predictions-labels)/labels
        # acc = torch.sum(acc)
        # running_acc += acc.item()

    # validation_acc.append(running_acc/ len(val_dataloader))
    validation_loss.append(running_loss / len(val_dataloader))

    print(f"Epoch {epoch+1}:")

    print(f"Training Loss:", round(train_loss[epoch], 3))
    print(f"Validation Loss:", round(validation_loss[epoch], 3))
    
    # print(f"Training Accuracy:", round(train_acc[epoch], 3))
    # print(f"Validation Accuracy:", round(validation_acc[epoch], 3))

    print("------------------------------")
    model_path = "/content/drive/Shareddrives/CIS519_Spring2023/code/trained_models/resnet34_5000_epoch"+str(epoch)
    torch.save(resnet.state_dict(), model_path)
        

Epoch 1:
Training Loss: 1.437
Validation Loss: 1.785
------------------------------
Epoch 2:
Training Loss: 1.17
Validation Loss: 1.35
------------------------------
Epoch 3:
Training Loss: 1.056
Validation Loss: 1.16
------------------------------
Epoch 4:
Training Loss: 0.929
Validation Loss: 1.213
------------------------------
Epoch 5:
Training Loss: 0.858
Validation Loss: 1.162
------------------------------


### Loss curve

In [None]:
plt.plot(train_loss,label = "train")
plt.plot(validation_loss, label = "validation")
plt.legend()

## Save the model

In [None]:
PATH = "/content/drive/Shareddrives/CIS519_Spring2023/code/trained_models/resnet34_5000.pth"
torch.save(resnet.state_dict(), PATH)

## Inference

In [None]:
PATH = "/content/drive/Shareddrives/CIS519_Spring2023/code/trained_models/resnet34_5000.pth"
trained_resnet = models.resnet34(pretrained=False)
trained_resnet.fc = nn.Linear(trained_resnet.fc.in_features, 1)
state_dict = torch.load(PATH)
trained_resnet.load_state_dict(state_dict)
trained_resnet.eval()

In [None]:
for i in range(100):
  test_image, test_label = test_dataset[i]
  sq_test_image = test_image.unsqueeze(0)
  infer = trained_resnet(sq_test_image)
  print("GT:",test_label)
  print("Prediction:",infer)