#### Import packages

In [1]:
# import torch.multiprocessing as mp
# mp.set_start_method('spawn') 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import shutil
import itertools
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import random
import cv2

import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from tensorboardX import SummaryWriter
import torch.utils.data
import torch.utils.data as data_utils

seed = 0
# random.seed(seed)
torch.backends.cudnn.deterministic = True
seed = 1
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
# from utils_3 import *
from utils import *

#### Load and Prepare Data

In [2]:
data = pd.read_json('./Data/train.json')
test = pd.read_json('./Data/test.json')

In [3]:
data['band_1'] = data['band_1'].apply(lambda x : np.array(x).reshape(75, 75))
data['band_2'] = data['band_2'].apply(lambda x : np.array(x).reshape(75, 75))

test['band_1'] = test['band_1'].apply(lambda x : np.array(x).reshape(75, 75))
test['band_2'] = test['band_2'].apply(lambda x : np.array(x).reshape(75, 75))

data['inc_angle'] = pd.to_numeric(data['inc_angle'], errors='coerce').fillna(0.0)
test['inc_angle'] = pd.to_numeric(test['inc_angle'], errors='coerce')

train, val = train_test_split(data, test_size=0.2, random_state=42)

In [4]:
def scale_range (input_, min_, max_):
    input_ += -(np.min(input_))
    input_ /= np.max(input_) / (max_ - min_)
    input_ += min_
    return input_

In [5]:
band_1_tr = np.concatenate([im for im in train['band_1']]).reshape(-1, 75, 75)
band_2_tr = np.concatenate([im for im in train['band_2']]).reshape(-1, 75, 75)
X_train = np.stack((band_1_tr, band_2_tr), axis=1)
X_train = [X_train, np.array(train['inc_angle']).reshape((len(train), 1))]

band_1_val = np.concatenate([im for im in val['band_1']]).reshape(-1, 75, 75)
band_2_val = np.concatenate([im for im in val['band_2']]).reshape(-1, 75, 75)
X_val = np.stack((band_1_val, band_2_val), axis=1)
X_val = [X_val, np.array(val['inc_angle']).reshape((len(val), 1))]

band_1_test = np.concatenate([im for im in test['band_1']]).reshape(-1, 75, 75)
band_2_test = np.concatenate([im for im in test['band_2']]).reshape(-1, 75, 75)
# band_3_test = scale_range(band_1_test/band_2_test, -1, 1)
rgb = np.stack((band_1_test, band_2_test), axis=1)
X_test = [rgb, np.array(test['inc_angle']).reshape((len(test), 1))]

y_train = train['is_iceberg'].values.astype(np.float32)
y_val = val['is_iceberg'].values.astype(np.float32)

#### PyTorch Model

In [6]:
# Hyper Parameters
width = 75
height = 75
channels = 2
padding = 0

In [7]:
# Data Loader (Input Pipeline)
class icebergDataset(data_utils.Dataset):
    """Iceberg-Ship dataset."""

    def __init__(self, X, y=None, transform=None, u_ed=0.2, u_zo=0.5, u_noisy=0.4, u_shift=0.3):
        self.X_images = X[0]
        self.X_angles = torch.from_numpy(X[1]).float()
        if y!=None:
            self.y = torch.from_numpy(y).long()
        else:
            self.y=None
        self.transform = transform
        
        self.u_ed = u_ed
        self.u_zo = u_zo
        self.u_noisy = u_noisy
        self.u_shift = u_shift

    def __len__(self):
        return len(self.X_images)

    def __getitem__(self, idx):
        im = self.X_images[idx]
        if self.transform:
#             if np.random.random() < 0.2:
#                 im = cv2.blur(im, (2,2))
            im = randomErodeDilate(im, u=self.u_ed)
            im = randomZoomOut(im, u=self.u_zo)
            im = randomNoisy(im, u=self.u_noisy)
            im = randomShift(im, u=self.u_shift)
        
        try:
            if self.y==None:
                return [torch.from_numpy(im).float(), self.X_angles[idx]]
        except:
            pass
            
        return [torch.from_numpy(im).float(), self.X_angles[idx], self.y[idx]]

In [8]:
def save_checkpoint(state, is_best, filename='./Models/checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, './Models/model_best.pth.tar')

In [9]:
train_dataset = icebergDataset(X_train, y_train, transform=True)
val_dataset = icebergDataset(X_val, y_val)
test_dataset = icebergDataset(X_test)



In [10]:
act = nn.LeakyReLU()
class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.batch = nn.BatchNorm2d(channels)
        self.batch1D = nn.BatchNorm1d(1)
        self.layer1 = nn.Sequential(
            nn.Conv2d(channels, 9, kernel_size=3, padding=padding),
            nn.BatchNorm2d(9),
            act,
            nn.Conv2d(9, 18, kernel_size=3, padding=padding),
            nn.BatchNorm2d(18),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(18, 24, kernel_size=3, padding=padding),
            nn.BatchNorm2d(24),
            act,
            nn.Conv2d(24, 36, kernel_size=3, padding=padding),
            nn.BatchNorm2d(36),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(36, 72, kernel_size=2, padding=padding),
            nn.BatchNorm2d(72),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer4 = nn.Sequential(
            nn.Conv2d(72, 144, kernel_size=2, padding=padding),
            nn.BatchNorm2d(144),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer_shallow = nn.Sequential(
            nn.Conv2d(channels, 128, kernel_size=3, padding=padding),
            nn.BatchNorm2d(128),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.fc1 = nn.Sequential(
            nn.Linear(1+(3*3*144), 512),
            act,
            nn.Dropout(0.5))
        self.fc2 = nn.Sequential(
            nn.Linear(512, 196),
            act,
            nn.Dropout(0.5))
        self.fc3 = nn.Linear(196, 2)
        self.sig = nn.Sigmoid()
        
        
    def forward(self, x_im, x_angle):
        x_im = self.batch(x_im)
        x_angle = self.batch1D(x_angle)
        out = self.layer1(x_im)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
#         out_shallow = self.layer_shallow(x_im)
#         out_shallow  = F.max_pool2d(out_shallow, kernel_size=out_shallow.size()[2:])
#         out_shallow = out_shallow.view(out_shallow.size(0), -1)
        out = torch.cat([out, x_angle], dim=1)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [11]:
# k = 12
# padding = 1
# act = nn.LeakyReLU()
# class net(nn.Module):
#     def __init__(self):
#         super(net, self).__init__()
#         self.batch = nn.BatchNorm2d(channels)
#         self.batch1D = nn.BatchNorm1d(1)
#         self.init = nn.Sequential(
#             nn.Conv2d(channels, k*2, kernel_size=3, padding=padding),
#             nn.BatchNorm2d(k*2),
#             act,
#             nn.Dropout(0))
#         self.b1l1 = nn.Sequential(
#             nn.Conv2d(k*2, k, kernel_size=3, padding=padding),
#             nn.BatchNorm2d(k),
#             act,
#             nn.Dropout(0))
#         self.b1l2 = nn.Sequential(
#             nn.Conv2d(k*3, k, kernel_size=3, padding=padding),
#             nn.BatchNorm2d(k),
#             act,
#             nn.Dropout(0))
#         self.transition1 = nn.Sequential(
#             nn.Conv2d(k*4, k*4, kernel_size=1, padding=0),
#             nn.BatchNorm2d(k*4),
#             act,
#             nn.MaxPool2d(2),
#             nn.Dropout(0))
#         self.b2l1 = nn.Sequential(
#             nn.Conv2d(k*4, k, kernel_size=2, padding=0),
#             nn.BatchNorm2d(k),
#             act,
#             nn.Dropout(0))
#         self.b2l2 = nn.Sequential(
#             nn.Conv2d(k*5, k, kernel_size=2, padding=0),
#             nn.BatchNorm2d(k),
#             act,
#             nn.Dropout(0))
#         self.transition2 = nn.Sequential(
#             nn.Conv2d(k*6, k*6, kernel_size=1, padding=0),
#             nn.BatchNorm2d(k*6),
#             act,
#             nn.MaxPool2d(2),
#             nn.Dropout(0))
#         self.b3l1 = nn.Sequential(
#             nn.Conv2d(k*6, k, kernel_size=2, padding=0),
#             nn.BatchNorm2d(k),
#             act,
#             nn.Dropout(0))
#         self.b3l2 = nn.Sequential(
#             nn.Conv2d(k*7, k, kernel_size=2, padding=0),
#             nn.BatchNorm2d(k),
#             act,
#             nn.Dropout(0))
#         self.transition3 = nn.Sequential(
#             nn.Conv2d(k*8, k*8, kernel_size=1, padding=0),
#             nn.BatchNorm2d(k*8),
#             act,
#             nn.MaxPool2d(2),
#             nn.Dropout(0))
#         self.layer3 = nn.Sequential(
#             nn.Conv2d(k*8, k*6, kernel_size=2, padding=0),
#             nn.BatchNorm2d(k*6),
#             act,
#             nn.MaxPool2d(2),
#             nn.Dropout(0))
#         self.layer4 = nn.Sequential(
#             nn.Conv2d(k*6, k*4, kernel_size=2, padding=0),
#             nn.BatchNorm2d(k*4),
#             act,
#             nn.MaxPool2d(2),
#             nn.Dropout(0))
#         self.fc1 = nn.Sequential(
#             nn.Linear(1+(4*4*k*6), 512),
#             act,
#             nn.Dropout(0.5))
#         self.fc2 = nn.Sequential(
#             nn.Linear(512, 196),
#             act,
#             nn.Dropout(0.3))
#         self.fc3 = nn.Linear(196, 2)
#         self.sig = nn.Sigmoid()
        
        
#     def forward(self, x_im, x_angle):
#         x_im = self.batch(x_im)
#         x_angle = self.batch1D(x_angle)
#         out_prev = self.init(x_im)
#         out = self.b1l1(out_prev)
#         out_prev = torch.cat([out, out_prev], dim=1)
#         out = self.b1l2(out_prev)
#         out_prev = torch.cat([out, out_prev], dim=1)
#         out_prev = self.transition1(out_prev)
#         out = self.b2l1(out_prev)
#         out_prev = torch.cat([F.pad(out, (0,1,0,1), "constant", 0), out_prev], dim=1)
#         out = self.b2l2(out_prev)
#         out_prev = torch.cat([F.pad(out, (0,1,0,1), "constant", 0), out_prev], dim=1)
#         out_prev = self.transition2(out_prev)
#         out = self.b3l1(out_prev)
#         out_prev = torch.cat([F.pad(out, (0,1,0,1), "constant", 0), out_prev], dim=1)
#         out = self.b3l2(out_prev)
#         out_prev = torch.cat([F.pad(out, (0,1,0,1), "constant", 0), out_prev], dim=1)
#         out_prev = self.transition3(out_prev)
#         out = self.layer3(out_prev)
# #         out = self.layer4(out)
#         out = out.view(out.size(0), -1)
# #         out_shallow = self.layer_shallow(x_im)
# #         out_shallow  = F.max_pool2d(out_shallow, kernel_size=out_shallow.size()[2:])
# #         out_shallow = out_shallow.view(out_shallow.size(0), -1)
#         out = torch.cat([out, x_angle], dim=1)
#         out = self.fc1(out)
#         out = self.fc2(out)
#         out = self.fc3(out)
#         return out

In [12]:
def weight_init(m): 
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform(m.weight.data)
    if isinstance(m, nn.Linear):
        torch.nn.init.orthogonal(m.weight.data)

In [13]:
writer = SummaryWriter()
iceNet = net()
iceNet.apply(weight_init).cuda()

net (
  (batch): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True)
  (batch1D): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True)
  (layer1): Sequential (
    (0): Conv2d(2, 9, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(9, eps=1e-05, momentum=0.1, affine=True)
    (2): LeakyReLU (0.01)
    (3): Conv2d(9, 18, kernel_size=(3, 3), stride=(1, 1))
    (4): BatchNorm2d(18, eps=1e-05, momentum=0.1, affine=True)
    (5): LeakyReLU (0.01)
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (7): Dropout (p = 0.2)
  )
  (layer2): Sequential (
    (0): Conv2d(18, 24, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True)
    (2): LeakyReLU (0.01)
    (3): Conv2d(24, 36, kernel_size=(3, 3), stride=(1, 1))
    (4): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True)
    (5): LeakyReLU (0.01)
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (7): Dropout (p = 0.2)
  )
  (layer3): Sequential (
    (0): 

In [14]:
num_epochs = 200
batch_size = 32
learning_rate = 0.001

In [15]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(iceNet.parameters(), lr=learning_rate, weight_decay=0.0001)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

def accuracy(loader):
    iceNet.eval()
    correct = 0
    total = 0
    loss = []
    for features, features_angle, labels in loader:
        features = features.cuda()
        labels = labels.cuda()
        features_angle = features_angle.cuda()
        features = Variable(features, volatile=True)
        features_angle = Variable(features_angle, volatile=True)
        labels = Variable(labels, volatile=True)
        outputs = iceNet(features, features_angle)
        _loss = criterion(outputs, labels)
        loss.append(_loss)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
        
    return np.mean(loss).data[0], (100 * correct / total)

In [16]:
# Train the Model
best_prec1 = 1
for epoch in range(num_epochs):
    epoch_train_loss = []
    for i, (features, features_angle, labels) in enumerate(train_loader):
        iceNet.train()
        features = features.cuda()
        labels = labels.cuda()
        features = Variable(features).float()
        features_angle = Variable(features_angle).cuda()
        labels = Variable(labels).long()
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = iceNet(features, features_angle)
        writer.add_graph(iceNet, outputs)
#         writer.add_histogram('hist_fc1', iceNet.fc1[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc2', iceNet.fc2[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc3', iceNet.fc3.weight.data.cpu().numpy(), i)
        loss = criterion(outputs, labels)
        epoch_train_loss.append(loss)
        loss.backward()
        optimizer.step()
        
    prec1 = accuracy(val_loader)[0]
    print ('Epoch [%d/%d] \nTraining Loss: %.4f' % (epoch+1, num_epochs, np.mean(epoch_train_loss).data[0]))
    print('Validation Loss: %.4f, Accuracy: %.2f%%' % accuracy(val_loader))
    
    is_best = prec1 < best_prec1
    best_prec1 = min(prec1, best_prec1)
#     save_checkpoint({
#         'epoch': epoch + 1,
#         'state_dict': iceNet.state_dict(),
#         'best_prec1': best_prec1,
#         'optimizer' : optimizer.state_dict(),
#     }, is_best)

print(best_prec1)
# export scalar data to JSON for external processing
writer.close()

Epoch [1/200] 
Training Loss: 0.6991
Validation Loss: 0.6543, Accuracy: 64.80%
Epoch [2/200] 
Training Loss: 0.6406
Validation Loss: 0.6147, Accuracy: 70.72%
Epoch [3/200] 
Training Loss: 0.5921
Validation Loss: 0.5030, Accuracy: 70.09%
Epoch [4/200] 
Training Loss: 0.5500
Validation Loss: 0.5145, Accuracy: 75.08%
Epoch [5/200] 
Training Loss: 0.4971
Validation Loss: 0.3902, Accuracy: 80.69%
Epoch [6/200] 
Training Loss: 0.4976
Validation Loss: 0.3870, Accuracy: 81.31%
Epoch [7/200] 
Training Loss: 0.4698
Validation Loss: 0.3783, Accuracy: 83.49%
Epoch [8/200] 
Training Loss: 0.4198
Validation Loss: 0.3471, Accuracy: 84.11%
Epoch [9/200] 
Training Loss: 0.4284
Validation Loss: 0.3241, Accuracy: 85.67%
Epoch [10/200] 
Training Loss: 0.4338
Validation Loss: 0.2853, Accuracy: 86.92%
Epoch [11/200] 
Training Loss: 0.4502
Validation Loss: 0.2748, Accuracy: 87.85%
Epoch [12/200] 
Training Loss: 0.4213
Validation Loss: 0.2817, Accuracy: 87.85%
Epoch [13/200] 
Training Loss: 0.3677
Validation 

KeyboardInterrupt: 

In [16]:
# Train the Model
best_prec1 = 1
for epoch in range(num_epochs):
    epoch_train_loss = []
    for i, (features, features_angle, labels) in enumerate(train_loader):
        iceNet.train()
        features = features.cuda()
        labels = labels.cuda()
        features = Variable(features).float()
        features_angle = Variable(features_angle).cuda()
        labels = Variable(labels).long()
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = iceNet(features, features_angle)
        writer.add_graph(iceNet, outputs)
#         writer.add_histogram('hist_fc1', iceNet.fc1[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc2', iceNet.fc2[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc3', iceNet.fc3.weight.data.cpu().numpy(), i)
        loss = criterion(outputs, labels)
        epoch_train_loss.append(loss)
        loss.backward()
        optimizer.step()
        
    prec1 = accuracy(val_loader)[0]
    print ('Epoch [%d/%d] \nTraining Loss: %.4f' % (epoch+1, num_epochs, np.mean(epoch_train_loss).data[0]))
    print('Validation Loss: %.4f, Accuracy: %.2f%%' % accuracy(val_loader))
    
    is_best = prec1 < best_prec1
    best_prec1 = min(prec1, best_prec1)
#     save_checkpoint({
#         'epoch': epoch + 1,
#         'state_dict': iceNet.state_dict(),
#         'best_prec1': best_prec1,
#         'optimizer' : optimizer.state_dict(),
#     }, is_best)

print(best_prec1)
# export scalar data to JSON for external processing
writer.close()

Epoch [1/200] 
Training Loss: 0.6790
Validation Loss: 0.6388, Accuracy: 65.11%
Epoch [2/200] 
Training Loss: 0.6355
Validation Loss: 0.6136, Accuracy: 69.47%
Epoch [3/200] 
Training Loss: 0.5811
Validation Loss: 0.4759, Accuracy: 72.59%
Epoch [4/200] 
Training Loss: 0.5514
Validation Loss: 0.4475, Accuracy: 76.95%
Epoch [5/200] 
Training Loss: 0.5214
Validation Loss: 0.4208, Accuracy: 78.82%
Epoch [6/200] 
Training Loss: 0.4891
Validation Loss: 0.3607, Accuracy: 81.31%
Epoch [7/200] 
Training Loss: 0.4892
Validation Loss: 0.3700, Accuracy: 85.05%
Epoch [8/200] 
Training Loss: 0.4616
Validation Loss: 0.3697, Accuracy: 83.49%
Epoch [9/200] 
Training Loss: 0.4540
Validation Loss: 0.3349, Accuracy: 85.98%
Epoch [10/200] 
Training Loss: 0.4480
Validation Loss: 0.3157, Accuracy: 85.98%
Epoch [11/200] 
Training Loss: 0.4417
Validation Loss: 0.2621, Accuracy: 88.47%
Epoch [12/200] 
Training Loss: 0.4271
Validation Loss: 0.2540, Accuracy: 89.72%
Epoch [13/200] 
Training Loss: 0.3835
Validation 

Epoch [104/200] 
Training Loss: 0.2253
Validation Loss: 0.2214, Accuracy: 89.10%
Epoch [105/200] 
Training Loss: 0.2270
Validation Loss: 0.1969, Accuracy: 90.97%
Epoch [106/200] 
Training Loss: 0.2243
Validation Loss: 0.1922, Accuracy: 91.28%
Epoch [107/200] 
Training Loss: 0.2205
Validation Loss: 0.2045, Accuracy: 89.41%
Epoch [108/200] 
Training Loss: 0.2093
Validation Loss: 0.2181, Accuracy: 90.65%
Epoch [109/200] 
Training Loss: 0.2162
Validation Loss: 0.2180, Accuracy: 89.41%
Epoch [110/200] 
Training Loss: 0.2386
Validation Loss: 0.2187, Accuracy: 90.03%
Epoch [111/200] 
Training Loss: 0.2200
Validation Loss: 0.2048, Accuracy: 91.59%
Epoch [112/200] 
Training Loss: 0.2314
Validation Loss: 0.2174, Accuracy: 89.10%
Epoch [113/200] 
Training Loss: 0.2096
Validation Loss: 0.2209, Accuracy: 89.41%
Epoch [114/200] 
Training Loss: 0.2639
Validation Loss: 0.1913, Accuracy: 91.90%
Epoch [115/200] 
Training Loss: 0.2698
Validation Loss: 0.2058, Accuracy: 90.65%
Epoch [116/200] 
Training Lo

In [123]:
print("=> loading checkpoint")
best_model = torch.load('./Models/model_best.pth.tar')
print('best_prec1 = ', best_model['best_prec1'])
iceNet.load_state_dict(best_model['state_dict'])
optimizer.load_state_dict(best_model['optimizer'])

=> loading checkpoint
best_prec1 =  0.16314317286014557


In [124]:
temp_dataset = icebergDataset(X_train)

In [129]:
# Predict
iceNet.eval()
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)
results = []
for features, features_angle in test_loader:
    iceNet.eval()
    features = Variable(features, volatile=True).cuda()
    features_angle = Variable(features_angle, volatile=True).cuda()
    outputs = F.softmax(iceNet(features, features_angle))
#     outputs = iceNet(features, features_angle)

    results.append(outputs.data[0][1])

In [126]:
sum(y_train>0.5)/len(y_train)

0.45908028059236167

In [130]:
sum(np.array(results)>0.5)/len(results)

0.30282526115859448

In [132]:
sub = pd.read_csv('./Data/sample_submission.csv')

In [133]:
sub['is_iceberg'] = results

In [135]:
sub.to_csv('./Submissions/sub_30Oct_val_1631.csv', index=False)

#### KFold

In [71]:
def scale_test_range (input_, train_min, train_max, min_, max_):
    input_ += -(train_min)
    input_ /= train_max / (max_ - min_)
    input_ += min_
    return input_

def std_test_range (input_, mean, sd):
    input_ -= mean
    input_ /= sd
    return input_

In [72]:
def weight_init(m): 
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform(m.weight.data)
    if isinstance(m, nn.Linear):
        torch.nn.init.orthogonal(m.weight.data)

In [73]:
band_1_KF = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
band_2_KF = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
# band_3_KF = scale_range(band_1_KF/band_2_KF, -1, 1)
rgb = np.stack((band_1_KF, band_2_KF), axis=1)
X_KF = [rgb, np.array(data['inc_angle']).reshape((len(data), 1))]

y_KF = data['is_iceberg'].values.astype(np.float32)

In [74]:
def accuracy(loader):
    iceNet.eval()
    correct = 0
    total = 0
    loss = []
    for features, features_angle, labels in loader:
        features = Variable(features, volatile=True).cuda()
        features_angle = Variable(features_angle, volatile=True).cuda()
        labels = Variable(labels, volatile=True).cuda()
        outputs = iceNet(features, features_angle)
        _loss = criterion(outputs, labels)
        loss.append(_loss)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
        
    return np.mean(loss).data[0], (100 * correct / total)

In [79]:
num_epochs = 300
num_hyper_search_epochs = 20
batch_size = 32
learning_rate = 0.001
aug_probs = [[0.1,0.2,0.3], [0.3,0.4,0.5,0.6], [0.2,0.3,0.4,0.5,0.6], [0.1,0.2,0.3,0.4]]

In [80]:
kfold = 5
kfold_scores = []

test_dataset = icebergDataset(X_test)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

results = []
sss = KFold(n_splits=kfold, random_state=0)
for i, (train_index, test_index) in enumerate(sss.split(X_KF[0], X_KF[1], y_KF)):
    print('Fold [%d/%d]' % (i+1, kfold))
    
    X_train_KF, X_valid_KF = [X_KF[0][train_index], X_KF[1][train_index]], [X_KF[0][test_index], X_KF[1][test_index]]
    y_train_KF, y_valid_KF = y_KF[train_index], y_KF[test_index]
    
    val_dataset_KF = icebergDataset(X_valid_KF, y_valid_KF)
    val_loader_KF = torch.utils.data.DataLoader(dataset=val_dataset_KF, batch_size=batch_size, shuffle=False)
    
    # ------------------------- Hyperparameter search for image augmentation -------------------------- #
    best_prec_overall = 1
    for params in list(itertools.product(aug_probs[0], aug_probs[1], aug_probs[2], aug_probs[3])):
        # Define model
        iceNet = net().apply(weight_init).cuda()

        # Loss and Optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(iceNet.parameters(), lr=learning_rate, weight_decay=0.0001)
        
        # Data Loader
        train_dataset_KF = icebergDataset(X_train_KF, y_train_KF, True, 
                                          params[0], params[1], params[2], params[3])
        train_loader_KF = torch.utils.data.DataLoader(dataset=train_dataset_KF, batch_size=batch_size, shuffle=True)
        
        # Train
        best_prec1 = 1
        for epoch in range(num_hyper_search_epochs):
            epoch_train_loss = []
            for idx, (features, features_angle, labels) in enumerate(train_loader_KF):
                iceNet.train()
                features = Variable(features).cuda()
                features_angle = Variable(features_angle).cuda()
                labels = Variable(labels).cuda()
                # Forward + Backward + Optimize
                optimizer.zero_grad()
                outputs = iceNet(features, features_angle)
                loss = criterion(outputs, labels)
                epoch_train_loss.append(loss)
                loss.backward()
                optimizer.step()

            prec1 = accuracy(val_loader_KF)[0]

            # Save best model
            is_best = prec1 < best_prec1
            best_prec1 = min(prec1, best_prec1)
            
        # Save best model
        is_params_best = best_prec1 < best_prec_overall
        best_prec_overall = min(best_prec1, best_prec_overall)
        if is_params_best:
            best_params = params
    
    print('Hyperparameter search complete')
    print('Selected hyperparameters : ', str(best_params))
    # ------------------------ Complete training using selected hyperparameters ------------------------- #
    
    # Data Loader
    train_dataset_KF = icebergDataset(X_train_KF, y_train_KF, True, 
                                      best_params[0], best_params[1], best_params[2], best_params[3])
    train_loader_KF = torch.utils.data.DataLoader(dataset=train_dataset_KF, batch_size=batch_size, shuffle=True)
    
    # Define model
    iceNet = net().apply(weight_init).cuda()
    
    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(iceNet.parameters(), lr=learning_rate, weight_decay=0.0001)
    
    # Train
    best_prec1 = 1
    for epoch in range(num_epochs):
        epoch_train_loss = []
        for idx, (features, features_angle, labels) in enumerate(train_loader_KF):
            iceNet.train()
            features = Variable(features).cuda()
            features_angle = Variable(features_angle).cuda()
            labels = Variable(labels).cuda()
            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = iceNet(features, features_angle)
            loss = criterion(outputs, labels)
            epoch_train_loss.append(loss)
            loss.backward()
            optimizer.step()

        prec1 = accuracy(val_loader_KF)[0]
        
        # Save best model
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': iceNet.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filename='./Models/v1Nov_v1/model_fold_'+str(i+1)+'.pth.tar')
        
    print('Val Score : %f' % (best_prec1))
    kfold_scores.append(best_prec1)
    # Load best model
    best_model = torch.load('./Models/v1Nov_v1/model_fold_'+str(i+1)+'.pth.tar')
    iceNet.load_state_dict(best_model['state_dict'])
    optimizer.load_state_dict(best_model['optimizer'])
    
    # Predict
    iceNet.eval()
    
    results_fold = []
    for features, features_angle in test_loader:
        iceNet.eval()
        features = Variable(features, volatile=True).cuda()
        features_angle = Variable(features_angle, volatile=True).cuda()
        outputs = F.softmax(iceNet(features, features_angle))
    #     outputs = iceNet(features, features_angle)

        results_fold.append(outputs.data[0][1])
    
    results.append(results_fold)

Fold [1/5]




KeyboardInterrupt: 

In [104]:
np.array(kfold_scores).mean()

0.14790893048048021

In [85]:
np.array(sub['is_iceberg']<0.5).sum()

8424

In [79]:
sub = pd.read_csv('./Data/sample_submission.csv')
sub['is_iceberg'] = np.array(results).mean(axis=0)
sub.to_csv('./Submissions/Sub 12 - 5-fold _ Val-1395.csv', index=False)

In [108]:
sub = pd.read_csv('./Data/sample_submission.csv')

In [109]:
sub1 = pd.read_csv('./Submissions/Sub 3 - 5 fold _ Val - 0.1504.csv')['is_iceberg']
# sub2 = pd.read_csv('./Submissions/Sub 4 - 10-fold _ Val-1269.csv')['is_iceberg']
sub3 = pd.read_csv('./Submissions/Sub 5 - 5-fold _ Val-1538.csv')['is_iceberg']
sub4 = pd.read_csv('./Submissions/Sub 6 - 5-fold _ Val-1480.csv')['is_iceberg']
sub5 = pd.read_csv('./Submissions/Sub 8 - 5-fold _ Val-1479.csv')['is_iceberg']
# sub6 = pd.read_csv('./Submissions/Sub 9 - 5-fold _ Val-1480.csv')['is_iceberg']

In [110]:
results = np.mean((np.array(sub1), np.array(sub3),
                   np.array(sub4), np.array(sub5)), axis=0)
sub['is_iceberg'] = np.array(results)
sub.to_csv('./Submissions/Sub 13 - Ensemble_3_5_6_8.csv', index=False)