In [None]:
import os
from torchvision.io import read_image
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import cv2

In [None]:
label_map = {'black_bear': 0, 'people': 1, 'birds': 2, 'dog': 3, 'brown_bear': 4, 'roe_deer': 5, 'wild_boar': 6, 'amur_tiger': 7, 'amur_leopard': 8, 'sika_deer': 9}
target_size = (224,224)
resize = transforms.Resize(target_size)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

In [None]:
class RussianWildlifeDataset(Dataset):

    def __init__(self, img_dir):

        self.img_dir = img_dir
        self.data_list = []
        for label in os.listdir(self.img_dir):
            for file in os.listdir(os.path.join(self.img_dir,label)):
                self.data_list.append((file,label))

    def __len__(self):

        return len(self.data_list)

    def __getitem__(self, idx):

        file_name, label = self.data_list[idx]
        img_path = os.path.join(self.img_dir, label, file_name)

        img = torch.from_numpy(cv2.imread(img_path))
        img = img.permute(2, 0, 1)
        img = resize(img)
        img = img/255
        img = normalize(img)
        return img.float(), label_map[label]

In [None]:
data = RussianWildlifeDataset('data')
print(len(data))

In [None]:
train_size = int(0.7 * len(data))
val_size = int(0.1 * len(data))
test_size = len(data) - train_size - val_size

train_set, val_set, test_set = random_split(data, [train_size, val_size, test_size])

train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=8)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False, num_workers=8)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=8)

print(len(train_set), len(val_set), len(test_set))

In [None]:
class_labels = list(label_map.keys())

train_counts = [0]*10
val_counts = [0]*10

for i in range(len(train_set)):
    _, label = train_set[i]
    train_counts[label] += 1

for i in range(len(val_set)):
    _, label = val_set[i]
    val_counts[label] += 1

# Plot bar charts
plt.figure(figsize=(10, 6))

plt.bar(class_labels, train_counts, label='Train')
plt.bar(class_labels, val_counts, label='Validation')
plt.xlabel('Class Label')
plt.ylabel('Number of Samples')
plt.title('Data Distribution in Train and Validation Sets')
plt.legend()
plt.show()


In [None]:
import wandb

wandb.login()
wandb.init(
    project="cv_ass1", 
    name=f"Q2_CNN_normalized", 
    config={
    "learning_rate": 0.001,
    "architecture": "CNN",
    "dataset": "Russian Wildlife Dataset",
    "epochs": 10,
    })

config = wandb.config   

In [None]:
import torch
import torch.nn as nn

class CNN(nn.Module):
  
  def __init__(self, num_classes):

    super(CNN, self).__init__()

    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=4, stride=4)
    )

    self.conv2 = nn.Sequential(
        nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )

    self.conv3 = nn.Sequential(
        nn.Conv2d(64, 128, kernel_size=3, padding=1, stride=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )

    self.flatten = nn.Flatten()
    self.fc = nn.Linear(25088, num_classes)

  def forward(self, x):
    x = self.conv1(x)
    x = self.conv2(x)
    x = self.conv3(x)
    x = self.flatten(x)
    x = self.fc(x)
    return x

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = CNN(10) # Assuming 10 classes
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

In [None]:
n_epoch = config.epochs
save_loss = 99999

print("Started Training")

for epoch in range(n_epoch):
    print('Training for epoch: ', epoch)

    model.train()
    tloss = 0
    tstep = 0

    for i, data in enumerate(train_loader, 0):
        img, label = data
        inputs = img.to(device)
        labels = label.to(device)
        optimizer.zero_grad()

        inputs = inputs.squeeze(1)
        outputs = model(inputs)

        train_loss =  criterion(outputs, labels)
        train_loss.backward()
        optimizer.step()
        tstep+=1
        tloss += train_loss.item()

    tstep+=1
    print('EPOCH:',epoch)
    print('Average train loss:', tloss/tstep)

    model.eval()
    vloss = 0
    vstep = 0

    for i, data in enumerate(val_loader, 0):
        img, label = data
        inputs = img.to(device)
        labels = label.to(device)

        inputs = inputs.squeeze(1)
        outputs = model(inputs)

        val_loss =  criterion(outputs, labels)
        vstep+=1
        vloss += val_loss.item()

    vstep+=1
    print('EPOCH:',epoch)
    print('Average val loss:', vloss/vstep)
    if(vloss/vstep<save_loss):
        save_loss = vloss/vstep
        state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()}
        torch.save(state, 'q2_cnn_best.pt')  

    log_metric = {"Epoch":epoch, "Train Loss": tloss/tstep, "Val Loss": vloss/vstep}
    wandb.log(log_metric)
        
print("Finished Training")

In [None]:
state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()}
torch.save(state, 'q2_cnn.pt')

The train loss keeps on reducing on training after each epoch which is expected, but after epoch 4. The val loss starts increasing which increase to 2.03. Thus the model is overtrained now
Only keeping the train limited to 5 epochs would have been ideal.

In [None]:
#In case best model is to be runned.
# model = CNN(10)
# state_dict = torch.load('q2_cnn_best.pt')
# model.load_state_dict(state_dict['model'])

In [None]:
print("Started Testing")

test_labels = []
test_predictions = []

model.eval()

with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        img, labels = data
        test_labels.extend(labels.numpy())

        inputs = img.to(device)
        inputs = inputs.squeeze(1)
        
        outputs = model(inputs).argmax(dim=1)
        test_predictions.extend(outputs.numpy())
        
print("Finished Testing")

In [None]:
import numpy as np
from sklearn.metrics import f1_score, confusion_matrix

test_labels = np.array(test_labels)
test_predictions = np.array(test_predictions)


correct = (test_labels==test_predictions).sum()
total = len(test_predictions)

acc = correct/total
f1score = f1_score(test_labels,test_predictions,average='macro')

print("Accuracy: ", acc)
print("F1 Score: ", f1score)

cm = confusion_matrix(test_labels, test_predictions)
wandb.log({"Accuracy":acc, "F1 Score":f1score})
print(cm)

In [None]:
plt.imshow(cm, cmap='Blues')
plt.show(plt)
wandb.log({"Confusion Matrix": wandb.Image(plt)})

In [None]:
wandb.finish()