In [83]:
import numpy as np
import pandas as pd
import cv2
import torch
from sklearn import preprocessing

In [82]:
df = pd.read_csv('data/metadata.csv')
df.head()

Unnamed: 0,image_id,path,height,width,viewpoint,date,timestamp,whale_id,encounter_id
0,train0000,images/train0000.jpg,463,150,top,2017-08-07,2017-08-07 20:38:36,whale000,whale000-000
1,train0001,images/train0001.jpg,192,81,top,2019-08-05,2019-08-05 16:49:13,whale001,whale001-000
2,train0002,images/train0002.jpg,625,183,top,2017-08-07,2017-08-07 22:12:19,whale002,whale002-000
3,train0003,images/train0003.jpg,673,237,top,2017-08-07,2017-08-07 20:40:59,whale003,whale003-000
4,train0004,images/train0004.jpg,461,166,top,2018-08-10,2018-08-10 21:45:30,whale004,whale004-000


In [81]:
df['whal'].describe()

count             5902
unique            1617
top       whale015-000
freq                59
Name: encounter_id, dtype: object

In [20]:
df['height'].describe()

count    5902.000000
mean      443.657743
std       152.373968
min        70.000000
25%       339.000000
50%       432.000000
75%       535.000000
max      1796.000000
Name: height, dtype: float64

In [19]:
df['width'].describe()

count    5902.000000
mean      291.271433
std       528.631940
min        34.000000
25%       123.000000
50%       157.000000
75%       198.000000
max      6561.000000
Name: width, dtype: float64

In [21]:
df['viewpoint'].describe()

count     5902
unique       3
top        top
freq      5434
Name: viewpoint, dtype: object

In [43]:
def mean_of_image(path = None):
    img = cv2.imread(path)
    return img.mean(axis=(0,1))

def std_of_image(path = None):
    img = cv2.imread(path)
    return img.std(axis=(0,1))

In [53]:
means = np.array(list(map(mean_of_image, ['data/' + p for p in df['path']])))

In [58]:
stds = np.array(list(map(std_of_image, ['data/' + p for p in df['path']])))

In [59]:
mean = means.mean(axis=0)
std = stds.mean(axis=0)

In [62]:
print('Mean of the dataset = {}\nStddev of the dataset = {}'.format(mean, std))

Mean of the dataset = [118.3311038  108.94562059 107.9743398 ]
Stddev of the dataset = [48.07930915 44.44048544 44.72591547]


In [86]:
labels = df['whale_id'].tolist()

In [91]:
le = preprocessing.LabelEncoder()
targets = le.fit_transform(labels)


LabelEncoder()


In [71]:
%load_ext autoreload
%autoreload 2

import torchvision.models as models
from torchsummary import summary
import torch.nn as nn
from models import WhaleDoModel
import torch

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [62]:
config = {
    'backbone_model': 'resnet18',
    'input_dim': (5, 224, 224),
    'rep_dim': 512,
    'pretrained': True,
    'device': 'cpu',

    'projector' : {
        'hidden_dim': 1024,
        'output_dim': 2
    }
}

model = WhaleDoModel(config)

In [63]:
summary(model, config['input_dim'])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]          15,680
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [74]:
model.eval()
for name, param in model.named_parameters():
    print(name, param.requires_grad)

backbone.model.conv1.weight True
backbone.model.bn1.weight True
backbone.model.bn1.bias True
backbone.model.layer1.0.conv1.weight True
backbone.model.layer1.0.bn1.weight True
backbone.model.layer1.0.bn1.bias True
backbone.model.layer1.0.conv2.weight True
backbone.model.layer1.0.bn2.weight True
backbone.model.layer1.0.bn2.bias True
backbone.model.layer1.1.conv1.weight True
backbone.model.layer1.1.bn1.weight True
backbone.model.layer1.1.bn1.bias True
backbone.model.layer1.1.conv2.weight True
backbone.model.layer1.1.bn2.weight True
backbone.model.layer1.1.bn2.bias True
backbone.model.layer2.0.conv1.weight True
backbone.model.layer2.0.bn1.weight True
backbone.model.layer2.0.bn1.bias True
backbone.model.layer2.0.conv2.weight True
backbone.model.layer2.0.bn2.weight True
backbone.model.layer2.0.bn2.bias True
backbone.model.layer2.0.downsample.0.weight True
backbone.model.layer2.0.downsample.1.weight True
backbone.model.layer2.0.downsample.1.bias True
backbone.model.layer2.1.conv1.weight True


In [93]:
from dataloader import WhaleDoDataset
from torch.utils.data import DataLoader

In [94]:
train = WhaleDoDataset('data/metadata.csv')

In [101]:
train_loader = DataLoader(train, batch_size=64, shuffle=True)

In [103]:
for x_batch, y_batch in train_loader:
    print(x_batch.shape)
    break

torch.Size([64, 3, 443, 291])
