Import all the libraries required for this notebook

In [None]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pandas as pd
import requests
import io
import urllib.parse
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import functools
from PIL import Image  # Image utilities.
import tqdm
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import imageio as io_temp
from skimage.transform import resize
from sklearn.metrics import roc_curve, auc, roc_auc_score, classification_report, confusion_matrix


cudnn.benchmark = True
plt.ion()   # interactive mode

Downloading the dataset from online resources

In [None]:
SOURCE_URL = 'https://storage.googleapis.com/dm-turtle-recall/images.tar'
IMAGE_DIR = './turtle_recall/images'
TAR_PATH = os.path.join(IMAGE_DIR, os.path.basename(SOURCE_URL))
EXPECTED_IMAGE_COUNT = 13891

%sx mkdir --parents "{IMAGE_DIR}"
if len(os.listdir(IMAGE_DIR)) != EXPECTED_IMAGE_COUNT:
  %sx wget --no-check-certificate -O "{TAR_PATH}" "{SOURCE_URL}"
  %sx tar --extract --file="{TAR_PATH}" --directory="{IMAGE_DIR}"
  %sx rm "{TAR_PATH}"

print(f'The total number of images is: {len(os.listdir(IMAGE_DIR))}')

In [None]:
BASE_URL = 'https://storage.googleapis.com/dm-turtle-recall/'


def read_csv_from_web(file_name):
  url = urllib.parse.urljoin(BASE_URL, file_name)
  content = requests.get(url).content
  return pd.read_csv(io.StringIO(content.decode('utf-8')))

# Read in csv files.
train = read_csv_from_web('train.csv')
test = read_csv_from_web('test.csv')
extra = pd.read_csv('extra_images_common_updated.csv')
sample_submission = read_csv_from_web('sample_submission.csv')

# Convert image_location strings to lowercase.
for df in [train, test]:
  df.image_location = df.image_location.apply(lambda x: x.lower())
  assert set(df.image_location.unique()) == set(['left', 'right', 'top'])

In [None]:
train.head()

In [None]:
test.head()

In [None]:
extra.head()

In [None]:
train.shape, test.shape, extra.shape

In [None]:
print(f"There are {train.turtle_id.nunique()} unique turtles in the train set.")
print(f"There are {extra.turtle_id.nunique()} unique turtles in the train set.")

In [None]:
train_images_per_turtle = pd.value_counts(train['turtle_id'])
extra_images_per_turtle = pd.value_counts(extra['turtle_id'])

In [None]:
train_images_per_turtle

In [None]:
extra_images_per_turtle

Capturing all the turtle IDs and assigning a number to each turtle

In [None]:
ls = pd.unique(train['turtle_id'])
classes = {}
for i in range(len(ls)):
    classes[ls[i]] = i
class_names = classes

In [None]:
class_names

In [None]:
num_classes = len(class_names)

In [None]:
train = pd.concat([train, extra], ignore_index=True, sort=False)

In [None]:
train

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train[['image_id','image_location']], train['turtle_id'], test_size=0.20, random_state=42, stratify=train['turtle_id'])
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.50, random_state=42, stratify=y_test)

print(X_train.shape, X_test.shape, X_val.shape ,y_train.shape, y_test.shape, y_val.shape)

train_ds = pd.concat([X_train, y_train], axis=1)
train_ds['type'] = "train"
test_ds = pd.concat([X_test, y_test], axis=1)
test_ds['type'] = "test"
val_ds = pd.concat([X_val, y_val], axis=1)
val_ds['type'] = "val"

In [None]:
dataset_sizes = {'train': len(train_ds), 'test': len(test_ds), 'val': len(val_ds)}

In [None]:
train_ds

In [None]:
dataset = pd.concat([train_ds,test_ds,val_ds], axis=0)

In [None]:
dataset

In [None]:
dataset['image_id'] = IMAGE_DIR + "/" + dataset['image_id'].astype(str) + ".JPG"

Replacing the turtle IDs with numbers for easier training by the deep learning model

In [None]:
dataset['turtle_id'] = dataset['turtle_id'].map(class_names)

In [None]:
dataset.to_csv('./dataset.csv')

In [None]:
train_ds = dataset.loc[dataset['type'] == "train"]
train_ds = dataset.loc[dataset['image_location'] == "right"]
test_ds = dataset.loc[dataset['type'] == "test"]
test_ds = dataset.loc[dataset['image_location'] == "right"]
val_ds = dataset.loc[dataset['type'] == "val"]
val_ds = dataset.loc[dataset['image_location'] == "right"]

In [None]:
train_ds

Creating a Dataloader for our dataset which can be used for pytorch. This only makes use of the image data and the turtle ID. The turtles head orientation is not used

In [None]:
class TurtleDataset(Dataset):
    """Turtle dataset."""

    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.csv = csv_file
        self.transform = transform

    def __len__(self):
        return len(self.csv)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.csv.iloc[idx, 0]
        image = io_temp.imread(img_name)
        details = self.csv.iloc[idx, 1:]
        details = np.array([details])
        details = details[:1]
        sample = {'image': image, 'image_orientation': details[0][0], 'turtle_id': details[0][1]}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [None]:
def show_pics(image, image_orientation, turtle_id):
    plt.imshow(image)
    plt.pause(0.001)  # pause a bit so that plots are updated


In [None]:
face_dataset = TurtleDataset(csv_file=train_ds)

fig = plt.figure()

for i in range(len(face_dataset)):
    sample = face_dataset[i]

    print(i, sample['image'].shape, sample['turtle_id'])

    ax = plt.subplot(1, 4, i + 1)
    plt.tight_layout()
    ax.set_title('Sample #{}'.format(i))
    ax.axis('off')
    show_pics(**sample)

    if i == 3:
        plt.show()
        break

Rescale helps reformat the image by changing its dimensions and cropping it to allow it to be used as an input in our model

In [None]:
class Rescale(object):
    """Rescale the image in a sample to a given size.

    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size = 224):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, turtle_id = sample['image'],  sample['turtle_id'] 
        # image, image_orientation = sample['image'], sample['image_orientation']

        h, w = image.shape[:2]
        crop_size = min(w, h)
        crop = image[(h - crop_size) // 2 : (h + crop_size) // 2, (w - crop_size) // 2 : (w + crop_size) // 2]
        img = resize(crop, (self.output_size, self.output_size))

        # return {'image': img, 'turtle_id': turtle_id}
        return [img.transpose((2,0,1)).astype(np.double),turtle_id]
        # return [img.transpose((2,0,1)).astype(np.double),image_orientation]

In [None]:
scale = Rescale(256)

In [None]:
train_transformed_dataset = TurtleDataset(csv_file=train_ds,
                                               transform=transforms.Compose([
                                               Rescale(256)
                                           ]))
test_transformed_dataset = TurtleDataset(csv_file=test_ds,
                                               transform=transforms.Compose([
                                               Rescale(256)
                                           ]))
val_transformed_dataset = TurtleDataset(csv_file=val_ds,
                                               transform=transforms.Compose([
                                               Rescale(256)
                                           ]))

In [None]:
dataloaders = {'train' : DataLoader(train_transformed_dataset, batch_size=16,
                        shuffle=True, num_workers=2),
              'test' : DataLoader(test_transformed_dataset, batch_size=16,
                        shuffle=True, num_workers=2),
              'val' : DataLoader(val_transformed_dataset, batch_size=16,
                        shuffle=True, num_workers=2)}


In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=classes)

train_model is a funciton to train any model with any criterion and optimizer. We can change the model input with other pretrained models to experiment further!

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                #inputs = inputs.type(torch.LongTensor)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs.float())
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs.float())
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
device

In [None]:
model_ft = models.resnet18(pretrained=True, progress=True) ### Can change this to try out other models available 
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, num_classes)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)

In [None]:
torch.save(model_ft.state_dict(),"turtle_new_right.pth")

In [None]:
def apk(actual, predicted, k=5):
  """Computes the average precision at k.

  Args:
    actual: The turtle ID to be predicted.
    predicted : A list of predicted turtle IDs (order does matter).
    k : The maximum number of predicted elements.

  Returns:
    The average precision at k.
  """
  if len(predicted) > k:
    predicted = predicted[:k]

  score = 0.0
  num_hits = 0.0

  for i, p in enumerate(predicted):
    if p == actual and p not in predicted[:i]:
      num_hits += 1.0
      score += num_hits / (i + 1.0)

  return score


def mapk(actual, predicted, k=5):
  """ Computes the mean average precision at k.

    The turtle ID at actual[i] will be used to score predicted[i][:k] so order
    matters throughout!

    actual: A list of the true turtle IDs to score against.
    predicted: A list of lists of predicted turtle IDs.
    k: The size of the window to score within.

    Returns:
      The mean average precision at k.
  """
  return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])

In [None]:
def test_model(model, criterion, optimizer, scheduler):

    model.eval()   # Set model to evaluate mode

    t_output = []
    t_pred = []
    y_test = []
    top_k = []
    # Iterate over data.
    i = 1
    for inputs, labels in dataloaders['test']:
        # inputs = inputs.type(torch.DoubleTensor)
        inputs = inputs.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)
        y_test.append(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(True):
            outputs = model(inputs.float())
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            t_output.append(outputs)
            t_pred.append(preds)
            temp1, temp2 = outputs.topk(5)
            top_k.append(temp2)

        i+=1
        if i == 11:
            break
    
    y_test = torch.cat(y_test).cpu().detach().numpy() 
    y_test_num = torch.cat(t_pred).cpu().detach().numpy() 
    y_pred = torch.cat(top_k).cpu().detach().numpy() 
    mapk_result = mapk(y_test, y_pred, k=5)
    print("With real set labels, our mapk with k=5 is", mapk_result)
    print('\nConfusion Matrix')
    conf_mt = confusion_matrix(y_test_num, y_test)
    print(conf_mt)
    plt.matshow(conf_mt)
    plt.show()
    print('\nClassification Report')
    print(classification_report(y_test_num, y_test))

In [None]:
test_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler)

In [None]:
def pred(model, criterion, optimizer, scheduler):

    model.eval()   # Set model to evaluate mode

    t_output = []
    t_pred = []
    y_test = []
    top_k = []
    # Iterate over data.
    i = 1
    for inputs, labels in dataloaders['extra']:
        # inputs = inputs.type(torch.DoubleTensor)
        inputs = inputs.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)
        y_test.append(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(True):
            outputs = model(inputs.float())
            _, preds = torch.max(outputs, 1)
            print(labels)
            loss = criterion(outputs, labels)
            t_output.append(outputs)
            t_pred.append(preds)
            temp1, temp2 = outputs.topk(5)
            top_k.append(temp2)
        print(preds)
        i+=1
        if i == 50:
            break


    print(t_pred)

In [None]:
pred(model_ft, criterion, optimizer_ft, exp_lr_scheduler)
