In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from PIL import Image
from matplotlib.pyplot import imshow
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data

# Set paths
train_path = os.path.abspath('./train')
test_path = os.path.abspath('./test')
csv_path = os.path.abspath('./train.csv')

# Read train.csv into Pandas dataframe
train_csv = pd.read_csv(csv_path)
print('{} unique whale IDs'.format(len(train_csv['Id'].unique())))
# Add absolute image path to make reading easier
train_csv['Path'] = [os.path.join(train_path, img) for img in train_csv['Image']]
print(train_csv.head())

# Explore some whale pics
random_whales = train_csv['Path'].sample(5)
for whale in random_whales:
    img = Image.open(whale)
    plt.imshow(img)
    plt.show(img)

5005 unique whale IDs
           Image         Id                                               Path
0  0000e88ab.jpg  w_f48451c  /n/fs/pvl/dfan/kaggle/humpback-whale/train/000...
1  0001f9222.jpg  w_c3d896a  /n/fs/pvl/dfan/kaggle/humpback-whale/train/000...
2  00029d126.jpg  w_20df2c5  /n/fs/pvl/dfan/kaggle/humpback-whale/train/000...
3  00050a15a.jpg  new_whale  /n/fs/pvl/dfan/kaggle/humpback-whale/train/000...
4  0005c1ef8.jpg  new_whale  /n/fs/pvl/dfan/kaggle/humpback-whale/train/000...


<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

In [3]:
# Dataset subclass for loading images efficiently in memory
class WhalesDataset(data.dataset.Dataset):
    def __init__(self, is_train, transform):
        self.is_train = is_train
        self.transform = transform
    def __len__(self):
        if self.is_train:
            return train_csv.shape[0] # training images
        return train_csv.shape[0] # testing images

    def __getitem__(self, index):
        data_type = 'train' if self.is_train else 'test'
        whale_id = train_csv['Id'][index]
        img_path = train_csv['Path'][index]
        img = Image.open(img_path) # 128 x 128 x 3
        if self.transform:
            img = self.transform(img) # ToTensor converts (HxWxC) -> (CxHxW)
        return index, whale_id, img

In [None]:
def train():
    num_classes = train_csv.shape[0]
    # Hyperparameters
    num_epochs = 1;
    learning_rate = 0.001;
    train_params = {'batch_size': 15, 'shuffle': True, 'num_workers': 5}
    # Load Data
    preprocess_steps = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize(200),
        transforms.CenterCrop(200),
        transforms.ToTensor()
    ])
    
    train_set = WhalesDataset(is_train = True, transform=preprocess_steps)
    train_loader = data.DataLoader(train_set, **train_params)
    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = torchvision.models.resnet50(pretrained=True).to(device)
    # Freeze all layers
    for i, param in model.named_parameters():
        param.requires_grad = False
    # ImageNet has 1000 classes, so we need to change last layer to accomodate the number of classes we have
    imagenet_features = model.fc.in_features
    model.fc = nn.Linear(imagenet_features, num_classes)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    # Train the network
    total_steps = len(train_loader)
    iterations = []
    losses = []
    for epoch in range(num_epochs):
        for i, (_, whale_ids, images) in enumerate(train_loader):
            # Send tensors to GPU
            whale_ids = images.to(device) # batch_size x 3 x 128 x 128
            images = images.to(device)   # batch_size x 128 x 128

            model.train() # reset model to training mode

            # Forward pass
            outputs = model(images)
            print(outputs.shape)
            # Rearrange outputs to batch_size x 128 x 128 x 3 to apply masks
            # Output is now _ x 3 (rows of length 3 vectors)
            outputs = outputs.permute(0,2,3,1)[masks,:]
            truths = normals.permute(0,2,3,1)[masks,:]
            loss = get_loss(outputs, truths)
            loss = torch.mean(loss)
            # use backward() to do backprop on loss variable
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 50 == 0:
                curr_iter = epoch * len(train_loader) + i
                iterations.append(curr_iter)
                losses.append(loss.item())
                print ('Epoch [{}/{}], Step [{}/{}], Batch Loss: {:.4f}'.format
                      (epoch+1, num_epochs, i+1, total_steps, loss.item()))
                sys.stdout.flush()
    
    # Calculate loss over entire training set instead of batch
    final_acc = evaluate(model, device, train_loader)
    print('Final training set accuracy: {}'.format(final_acc))
    print('Making predictions on testing set:')
    make_predictions(model, True, device, test_loader)
    
if __name__ == '__main__':
    train()