**Task 1**

In [133]:
from google.colab import drive
import numpy as np
import pandas as pd
import os
import glob
import re
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.preprocessing.image import ImageDataGenerator

Check Cuda Availability

In [134]:
torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


Mount google drive

In [135]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Define data path

In [136]:
data_path = '/content/drive/My Drive/xyz/apex1'
images = [image for image in os.listdir(data_path) if image.endswith('.jpg')]

In [137]:
print(images)

['130_127_65373ece-110e-11ef-a573-a46bb606fa86.jpg', '81_118_6466ec38-110e-11ef-a573-a46bb606fa86.jpg', '116_122_65b16596-110e-11ef-a573-a46bb606fa86.jpg', '38_112_667294a0-110e-11ef-a573-a46bb606fa86.jpg', '44_117_66341a72-110e-11ef-a573-a46bb606fa86.jpg', '84_116_65d29acc-110e-11ef-a573-a46bb606fa86.jpg', '130_127_657396e4-110e-11ef-a573-a46bb606fa86.jpg', '84_115_65f0c56a-110e-11ef-a573-a46bb606fa86.jpg', '48_110_6690d294-110e-11ef-a573-a46bb606fa86.jpg', '38_113_66513b0c-110e-11ef-a573-a46bb606fa86.jpg', '116_121_659206ba-110e-11ef-a573-a46bb606fa86.jpg', '117_109_67737fae-110e-11ef-a573-a46bb606fa86.jpg', '123_109_67afe1d8-110e-11ef-a573-a46bb606fa86.jpg', '49_109_66b11360-110e-11ef-a573-a46bb606fa86.jpg', '117_109_67589f7c-110e-11ef-a573-a46bb606fa86.jpg', '56_111_66cce1c6-110e-11ef-a573-a46bb606fa86.jpg', '116_109_678fe022-110e-11ef-a573-a46bb606fa86.jpg', '136_112_682b1ee8-110e-11ef-a573-a46bb606fa86.jpg', '143_113_6849f66a-110e-11ef-a573-a46bb606fa86.jpg', '128_113_68088536-11

Feature Extraction

In [138]:
def extract_coordinates(file_name):
  match = re.match(r'(\d+)_(\d+)_.*\.jpg', file_name)
  if match:
    return list(map(int, match.groups()))
  else:
    return None

In [139]:
data = []

for image in images:
  coordination = extract_coordinates(image)
  if coordination:
    data.append((os.path.join(data_path, image), coordination))

In [140]:
print(data)

[('/content/drive/My Drive/xyz/apex1/130_127_65373ece-110e-11ef-a573-a46bb606fa86.jpg', [130, 127]), ('/content/drive/My Drive/xyz/apex1/81_118_6466ec38-110e-11ef-a573-a46bb606fa86.jpg', [81, 118]), ('/content/drive/My Drive/xyz/apex1/116_122_65b16596-110e-11ef-a573-a46bb606fa86.jpg', [116, 122]), ('/content/drive/My Drive/xyz/apex1/38_112_667294a0-110e-11ef-a573-a46bb606fa86.jpg', [38, 112]), ('/content/drive/My Drive/xyz/apex1/44_117_66341a72-110e-11ef-a573-a46bb606fa86.jpg', [44, 117]), ('/content/drive/My Drive/xyz/apex1/84_116_65d29acc-110e-11ef-a573-a46bb606fa86.jpg', [84, 116]), ('/content/drive/My Drive/xyz/apex1/130_127_657396e4-110e-11ef-a573-a46bb606fa86.jpg', [130, 127]), ('/content/drive/My Drive/xyz/apex1/84_115_65f0c56a-110e-11ef-a573-a46bb606fa86.jpg', [84, 115]), ('/content/drive/My Drive/xyz/apex1/48_110_6690d294-110e-11ef-a573-a46bb606fa86.jpg', [48, 110]), ('/content/drive/My Drive/xyz/apex1/38_113_66513b0c-110e-11ef-a573-a46bb606fa86.jpg', [38, 113]), ('/content/dr

Data Augmentation

In [141]:
def data_augmentation(x_image, y_image):

  if np.random.rand(1) > 0.5:
    x_image = cv2.flip(x_image, 1)
    y_image[1] = 1 - y_image[1]

  x_image = cv2.resize(x_image, (224, 224))

  return x_image, y_image

Load Data to x and y (features and label)

In [142]:
X_array = []
y_array = []

num_images_before = len(data)

for item in data:

  x_image = cv2.imread(item[0])
  y_image = item[1]
  if y_image[0] == 0 or y_image[1] == 0:
      continue

  x_image, y_image = data_augmentation(x_image, y_image)

  y_image_processed = y_image.copy()
  y_image_processed[0] = (float(int(y_image[0] - y_image[0] / 2))) / (y_image[0] / 2)  # height
  y_image_processed[1] = (float(int(y_image[1] - y_image[1] / 2))) / (y_image[1] / 2)  # width

  X_array.append(x_image)
  y_array.append(y_image_processed)

X_array = np.array(X_array)
y_array = np.array(y_array)

num_images_after = len(X_array)

print("Number of images before augmentation:", num_images_before)
print("Number of images after augmentation:", num_images_after)

Number of images before augmentation: 2633
Number of images after augmentation: 2633


In [143]:
np.save('images.npy', X_array)
np.save('coordinates.npy', y_array)

Convert Data to Tensor

In [144]:
X = torch.tensor(X_array, dtype=torch.float32)
y = torch.tensor(y_array, dtype=torch.float32)

Train-Test Split

In [145]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=122)
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

Load ResNet18

In [146]:
baseModel = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
baseModel.fc = nn.Linear(512, 512)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Create Model

In [147]:
hidden_units = 128
output_units = 2
activation_function = nn.ReLU()

In [148]:
headModel = nn.AdaptiveAvgPool2d((1,1))
hidden_layer = nn.Linear(512, hidden_units)
dropout_layer = nn.Dropout(p=0.5)
output_layer = nn.Linear(hidden_units, output_units)
model_resnet18_regression = nn.Sequential(baseModel, hidden_layer, activation_function, output_layer)
model_resnet18_regression.to(device)

Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

Train Data

In [149]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,train_size=0.9, test_size=0.1, random_state=42)

epochs = 50
batch_size = 16
loss_function = nn.MSELoss()
num_batches_train = (len(X_train) + batch_size - 1) // batch_size
num_batches_val = (len(X_val) + batch_size - 1) // batch_size

optimizer = optim.Adam(model_resnet18_regression.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

for epoch in range(epochs):

  model_resnet18_regression.train().to(device)
  train_loss = 0

  for i in range(num_batches_train):
    batch_inputs = X_train[i * batch_size:(i + 1) * batch_size].permute(0, 3, 1, 2) # (N, C, H, W, C, N)
    batch_labels = y_train[i * batch_size:(i + 1) * batch_size]

    optimizer.zero_grad()
    outputs = model_resnet18_regression(batch_inputs)
    loss = loss_function(outputs, batch_labels)
    loss.backward()
    nn.utils.clip_grad_norm_(model_resnet18_regression.parameters(), max_norm=2.0)
    optimizer.step()
    train_loss += loss.item()

  train_loss /= num_batches_train

  model_resnet18_regression.eval().to(device)
  val_loss = 0

  with torch.no_grad():
    for j in range(num_batches_val):
      batch_inputs = X_val[j * batch_size:(j + 1) * batch_size].permute(0, 3, 1, 2)
      batch_labels = y_val[j * batch_size:(j + 1) * batch_size]

      outputs = model_resnet18_regression(batch_inputs)
      loss = loss_function(outputs, batch_labels)
      val_loss += loss.item()

    val_loss /= num_batches_val
    scheduler.step(val_loss)

  epoch_train_loss = train_loss / num_batches_train
  print(f'epoch {epoch+1} Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}')



epoch 1 Train Loss: 0.017729, Val Loss: 0.001562
epoch 2 Train Loss: 0.000959, Val Loss: 0.000653
epoch 3 Train Loss: 0.000807, Val Loss: 0.000576
epoch 4 Train Loss: 0.001061, Val Loss: 0.000389
epoch 5 Train Loss: 0.000859, Val Loss: 0.000380
epoch 6 Train Loss: 0.000901, Val Loss: 0.000242
epoch 7 Train Loss: 0.001065, Val Loss: 0.000551
epoch 8 Train Loss: 0.000675, Val Loss: 0.000531
epoch 9 Train Loss: 0.000657, Val Loss: 0.000407
epoch 10 Train Loss: 0.001020, Val Loss: 0.000357
epoch 11 Train Loss: 0.000553, Val Loss: 0.000365
epoch 12 Train Loss: 0.000877, Val Loss: 0.000920
epoch 13 Train Loss: 0.000603, Val Loss: 0.000265
epoch 14 Train Loss: 0.000670, Val Loss: 0.000245
epoch 15 Train Loss: 0.000556, Val Loss: 0.000965
epoch 16 Train Loss: 0.000560, Val Loss: 0.000452
epoch 17 Train Loss: 0.000439, Val Loss: 0.000532
epoch 18 Train Loss: 0.000307, Val Loss: 0.000198
epoch 19 Train Loss: 0.000205, Val Loss: 0.000254
epoch 20 Train Loss: 0.000204, Val Loss: 0.000182
epoch 21 

Save Model

In [150]:
torch.save(model_resnet18_regression.state_dict(), 'model_resnet18_regression.pth')

Load Model

In [151]:
baseModel = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
baseModel.fc = torch.nn.Linear(512, 512)
headModel = torch.nn.AdaptiveAvgPool2d((1,1))
hidden_layer = torch.nn.Linear(512, 128)
dropout_layer = torch.nn.Dropout(p=0.5)
output_layer = torch.nn.Linear(128, 2)
model_resnet18_regression = torch.nn.Sequential(baseModel, hidden_layer, torch.nn.ReLU(), output_layer)
model_resnet18_regression = model_resnet18_regression.to(device).eval()
model_resnet18_regression.load_state_dict(torch.load('model_resnet18_regression.pth'))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


<All keys matched successfully>

Model Evaluation

In [152]:
model_resnet18_regression.eval().to(device)

num_batches_test = (len(X_test) + batch_size - 1) // batch_size
actuals = []
predictions = []

with torch.no_grad():
  for i in range(num_batches_test):
    batch_inputs_test = X_test[i * batch_size:(i + 1) * batch_size].permute(0, 3, 1, 2)
    batch_labels_test = y_test[i * batch_size:(i + 1) * batch_size]

    outputs = model_resnet18_regression(batch_inputs_test)
    actuals.append(batch_labels_test.cpu().numpy())
    predictions.append(outputs.cpu().numpy())

actuals = np.concatenate(actuals, axis=0)
predictions = np.concatenate(predictions, axis=0)
print('Mean Squared Error: ', mean_squared_error(actuals, predictions))

Mean Squared Error:  0.00021555019
