#### Import packages

In [1]:
# import torch.multiprocessing as mp
# mp.set_start_method('spawn') 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import itertools
import shutil
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import random
import cv2

import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from tensorboardX import SummaryWriter
import torch.utils.data
import torch.utils.data as data_utils

seed = 0
# random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
np.random.seed(seed)
# from utils_3 import *
from utils import *

#### Load and Prepare Data

In [2]:
data = pd.read_json('./Data/train.json')
test = pd.read_json('./Data/test.json')

In [3]:
data['band_1'] = data['band_1'].apply(lambda x : np.array(x).reshape(75, 75))
data['band_2'] = data['band_2'].apply(lambda x : np.array(x).reshape(75, 75))

test['band_1'] = test['band_1'].apply(lambda x : np.array(x).reshape(75, 75))
test['band_2'] = test['band_2'].apply(lambda x : np.array(x).reshape(75, 75))

data['inc_angle'] = pd.to_numeric(data['inc_angle'], errors='coerce').fillna(0.0)
test['inc_angle'] = pd.to_numeric(test['inc_angle'], errors='coerce')
test['not_machine_generated'] = test['inc_angle'].apply(lambda x: len(str(x))) <= 7

In [4]:
def scale_range (input_, min_, max_):
    input_ += -(np.min(input_))
    input_ /= np.max(input_) / (max_ - min_)
    input_ += min_
    return input_

In [5]:
band_1_tr = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
band_2_tr = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
X = np.stack((band_1_tr, band_2_tr), axis=1)
# X_Real_std = np.stack((band_1_tr, band_2_tr), axis=1)
X_ang = np.array(data['inc_angle']).reshape((len(data), 1))
y = data['is_iceberg'].values.astype(np.float32)

X_train, X_val, X_ang_train, X_ang_val, y_train, y_val = train_test_split(X, X_ang, y, test_size=0.2, random_state=42)

X_train = [X_train, X_ang_train]
X_val = [X_val, X_ang_val]

band_1_test = np.concatenate([im for im in test['band_1']]).reshape(-1, 75, 75)
band_2_test = np.concatenate([im for im in test['band_2']]).reshape(-1, 75, 75)
# band_3_test = scale_range(band_1_test/band_2_test, -1, 1)
X_test = np.stack((band_1_test, band_2_test), axis=1)

#### PyTorch Model

In [6]:
# Hyper Parameters
width = 75
height = 75
channels = 2
padding = 0

In [7]:
# Data Loader (Input Pipeline)
class icebergDataset(data_utils.Dataset):
    """Iceberg-Ship dataset."""

    def __init__(self, X, y=None, transform=None, u_ed=0.2, u_zo=0.5, u_noisy=0.4, u_shift=0.3):
        self.X_images = X[0]
        self.X_angles = torch.from_numpy(X[1]).float()
        if y!=None:
            self.y = torch.from_numpy(y).long()
        else:
            self.y=None
        self.transform = transform
        
        self.u_ed = u_ed
        self.u_zo = u_zo
        self.u_noisy = u_noisy
        self.u_shift = u_shift

    def __len__(self):
        return len(self.X_images)

    def __getitem__(self, idx):
        im = self.X_images[idx]
        if self.transform:
#             if np.random.random() < 0.2:
#                 im = cv2.blur(im, (2,2))
            im = randomErodeDilate(im, u=self.u_ed)
            im = randomZoomOut(im, u=self.u_zo)
            im = randomNoisy(im, u=self.u_noisy)
            im = randomShift(im, u=self.u_shift)
        
        try:
            if self.y==None:
                return [torch.from_numpy(im).float(), self.X_angles[idx]]
        except:
            pass
            
        return [torch.from_numpy(im).float(), self.X_angles[idx], self.y[idx]]

In [8]:
def save_checkpoint(state, is_best, filename='./Models/checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, './Models/model_best.pth.tar')

In [9]:
train_dataset = icebergDataset(X_train, y_train, transform=True)
val_dataset = icebergDataset(X_val, y_val)
test_dataset = icebergDataset(X_test)



In [10]:
act = nn.LeakyReLU()
class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.batch = nn.BatchNorm2d(channels)
        self.batch1D = nn.BatchNorm1d(1)
        self.layer1 = nn.Sequential(
            nn.Conv2d(channels, 9, kernel_size=3, padding=padding),
            nn.BatchNorm2d(9),
            act,
            nn.Conv2d(9, 18, kernel_size=3, padding=padding),
            nn.BatchNorm2d(18),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(18, 24, kernel_size=3, padding=padding),
            nn.BatchNorm2d(24),
            act,
            nn.Conv2d(24, 36, kernel_size=3, padding=padding),
            nn.BatchNorm2d(36),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(36, 72, kernel_size=2, padding=padding),
            nn.BatchNorm2d(72),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer4 = nn.Sequential(
            nn.Conv2d(72, 144, kernel_size=2, padding=padding),
            nn.BatchNorm2d(144),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer_shallow = nn.Sequential(
            nn.Conv2d(channels, 128, kernel_size=3, padding=padding),
            nn.BatchNorm2d(128),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.fc1 = nn.Sequential(
            nn.Linear(1+(3*3*144), 512),
            act,
            nn.Dropout(0.5))
        self.fc2 = nn.Sequential(
            nn.Linear(512, 196),
            act,
            nn.Dropout(0.5))
        self.fc3 = nn.Linear(196, 2)
        self.sig = nn.Sigmoid()
        
        
    def forward(self, x_im, x_angle):
        x_im = self.batch(x_im)
        x_angle = self.batch1D(x_angle)
        out = self.layer1(x_im)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
#         out_shallow = self.layer_shallow(x_im)
#         out_shallow  = F.max_pool2d(out_shallow, kernel_size=out_shallow.size()[2:])
#         out_shallow = out_shallow.view(out_shallow.size(0), -1)
        out = torch.cat([out, x_angle], dim=1)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [30]:
def weight_init(m): 
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform(m.weight.data)
    if isinstance(m, nn.Linear):
        torch.nn.init.orthogonal(m.weight.data)

In [422]:
writer = SummaryWriter()
iceNet = net()
iceNet.apply(weight_init).cuda()

net (
  (batch): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True)
  (batch1D): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True)
  (layer1): Sequential (
    (0): Conv2d(2, 9, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(9, eps=1e-05, momentum=0.1, affine=True)
    (2): LeakyReLU (0.01)
    (3): Conv2d(9, 18, kernel_size=(5, 5), stride=(1, 1))
    (4): BatchNorm2d(18, eps=1e-05, momentum=0.1, affine=True)
    (5): LeakyReLU (0.01)
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (7): Dropout (p = 0.2)
  )
  (layer_skip): Sequential (
    (0): Conv2d(2, 5, kernel_size=(4, 4), stride=(1, 1))
    (1): BatchNorm2d(5, eps=1e-05, momentum=0.1, affine=True)
    (2): LeakyReLU (0.01)
    (3): Dropout (p = 0.2)
    (4): Conv2d(5, 10, kernel_size=(4, 4), stride=(2, 2))
    (5): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True)
    (6): LeakyReLU (0.01)
    (7): Dropout (p = 0.2)
  )
  (layer2): Sequential (
    (0): Conv2d(18, 24, kernel_size=(5, 5), str

In [423]:
num_epochs = 200
batch_size = 32
learning_rate = 0.001

In [424]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(iceNet.parameters(), lr=learning_rate, weight_decay=0.0001)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

def accuracy(loader):
    iceNet.eval()
    correct = 0
    total = 0
    loss = []
    is_iceberg_tot = 0
    is_iceberg_correct = 0
    for features, features_angle, labels in loader:
        features = features.cuda()
        labels = labels.cuda()
        features_angle = features_angle.cuda()
        features = Variable(features, volatile=True)
        features_angle = Variable(features_angle, volatile=True)
        labels = Variable(labels, volatile=True)
        outputs = iceNet(features, features_angle)
        _loss = criterion(outputs, labels)
        loss.append(_loss)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
        
    return np.mean(loss).data[0], (100 * correct / total)

In [425]:
# Train the Model
best_prec1 = 1
for epoch in range(num_epochs):
    epoch_train_loss = []
    for i, (features, features_angle, labels) in enumerate(train_loader):
        iceNet.train()
        features = features.cuda()
        labels = labels.cuda()
        features = Variable(features).float()
        features_angle = Variable(features_angle).cuda()
        labels = Variable(labels).long()
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = iceNet(features, features_angle)
        writer.add_graph(iceNet, outputs)
#         writer.add_histogram('hist_l1_1', iceNet.layer1[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_l1_2', iceNet.layer1[3].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_l2_1', iceNet.layer2[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_l2_2', iceNet.layer2[3].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_l3_1', iceNet.layer3[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_l3_2', iceNet.layer3[3].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc1', iceNet.fc1[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc2', iceNet.fc2[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc3', iceNet.fc3.weight.data.cpu().numpy(), i)
        loss = criterion(outputs, labels)
        epoch_train_loss.append(loss)
        loss.backward()
        optimizer.step()
        
    prec1 = accuracy(val_loader)[0]
    print ('Epoch [%d/%d] \nTraining Loss: %.4f' % (epoch+1, num_epochs, np.mean(epoch_train_loss).data[0]))
    print('Validation Loss: %.4f, Accuracy: %.2f%%, Ice Berg Accuracy: %.2f%%' % accuracy(val_loader))
    
    is_best = prec1 < best_prec1
    best_prec1 = min(prec1, best_prec1)
#     save_checkpoint({
#         'epoch': epoch + 1,
#         'state_dict': iceNet.state_dict(),
#         'best_prec1': best_prec1,
#         'optimizer' : optimizer.state_dict(),
#     }, is_best)

print(best_prec1)
# export scalar data to JSON for external processing
writer.close()

Epoch [1/200] 
Training Loss: 0.6475
Validation Loss: 0.6377, Accuracy: 68.54%, Ice Berg Accuracy: 99.39%
Epoch [2/200] 
Training Loss: 0.5652
Validation Loss: 0.7135, Accuracy: 63.55%, Ice Berg Accuracy: 42.07%
Epoch [3/200] 
Training Loss: 0.5129
Validation Loss: 0.4428, Accuracy: 80.06%, Ice Berg Accuracy: 95.73%
Epoch [4/200] 
Training Loss: 0.5060
Validation Loss: 0.4430, Accuracy: 80.69%, Ice Berg Accuracy: 87.80%
Epoch [5/200] 
Training Loss: 0.4727
Validation Loss: 0.4242, Accuracy: 80.69%, Ice Berg Accuracy: 98.78%
Epoch [6/200] 
Training Loss: 0.4639
Validation Loss: 0.3626, Accuracy: 82.87%, Ice Berg Accuracy: 84.76%
Epoch [7/200] 
Training Loss: 0.4385
Validation Loss: 0.3642, Accuracy: 83.80%, Ice Berg Accuracy: 93.29%
Epoch [8/200] 
Training Loss: 0.3975
Validation Loss: 0.3255, Accuracy: 86.60%, Ice Berg Accuracy: 87.20%
Epoch [9/200] 
Training Loss: 0.3916
Validation Loss: 0.3080, Accuracy: 85.98%, Ice Berg Accuracy: 92.68%
Epoch [10/200] 
Training Loss: 0.4030
Validati

Epoch [78/200] 
Training Loss: 0.2271
Validation Loss: 0.2177, Accuracy: 92.52%, Ice Berg Accuracy: 91.46%
Epoch [79/200] 
Training Loss: 0.2264
Validation Loss: 0.2344, Accuracy: 89.72%, Ice Berg Accuracy: 93.29%
Epoch [80/200] 
Training Loss: 0.2170
Validation Loss: 0.2080, Accuracy: 91.28%, Ice Berg Accuracy: 91.46%
Epoch [81/200] 
Training Loss: 0.2198
Validation Loss: 0.2776, Accuracy: 88.47%, Ice Berg Accuracy: 95.73%
Epoch [82/200] 
Training Loss: 0.2876
Validation Loss: 0.2659, Accuracy: 90.65%, Ice Berg Accuracy: 88.41%
Epoch [83/200] 
Training Loss: 0.2880
Validation Loss: 0.2415, Accuracy: 90.97%, Ice Berg Accuracy: 88.41%
Epoch [84/200] 
Training Loss: 0.2114
Validation Loss: 0.2404, Accuracy: 90.97%, Ice Berg Accuracy: 89.02%
Epoch [85/200] 
Training Loss: 0.2387
Validation Loss: 0.2322, Accuracy: 91.90%, Ice Berg Accuracy: 90.24%
Epoch [86/200] 
Training Loss: 0.2588
Validation Loss: 0.2345, Accuracy: 90.97%, Ice Berg Accuracy: 87.80%
Epoch [87/200] 
Training Loss: 0.2658

KeyboardInterrupt: 

In [212]:
print("=> loading checkpoint")
best_model = torch.load('./Models/model_best.pth.tar')
print('best_prec1 = ', best_model['best_prec1'])
iceNet.load_state_dict(best_model['state_dict'])
optimizer.load_state_dict(best_model['optimizer'])

=> loading checkpoint
best_prec1 =  0.11030842363834381
While copying the parameter named layer_shallow.0.weight, whose dimensions in the model are torch.Size([128, 2, 4, 4]) and whose dimensions in the checkpoint are torch.Size([128, 2, 3, 3]), ...


RuntimeError: invalid argument 2: sizes do not match at /opt/conda/conda-bld/pytorch_1503970438496/work/torch/lib/THC/THCTensorCopy.cu:31

In [124]:
temp_dataset = icebergDataset(X_train)

In [129]:
# Predict
iceNet.eval()
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)
results = []
for features, features_angle in test_loader:
    iceNet.eval()
    features = Variable(features, volatile=True).cuda()
    features_angle = Variable(features_angle, volatile=True).cuda()
    outputs = F.softmax(iceNet(features, features_angle))
#     outputs = iceNet(features, features_angle)

    results.append(outputs.data[0][1])

In [126]:
sum(y_train>0.5)/len(y_train)

0.45908028059236167

In [130]:
sum(np.array(results)>0.5)/len(results)

0.30282526115859448

In [132]:
sub = pd.read_csv('./Data/sample_submission.csv')

In [133]:
sub['is_iceberg'] = results

In [135]:
sub.to_csv('./Submissions/sub_30Oct_val_1631.csv', index=False)

#### KFold

In [11]:
def weight_init(m): 
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform(m.weight.data)
    if isinstance(m, nn.Linear):
        torch.nn.init.orthogonal(m.weight.data)

def scale_range (input_, min_, max_):
    tr_min = np.min(input_)
    input_ += -(np.min(input_))
    tr_max = np.max(input_)
    input_ /= np.max(input_) / (max_ - min_)
    input_ += min_
    return input_, tr_min, tr_max

def scale_test_range (input_, train_min, train_max, min_, max_):
    input_ += -(train_min)
    input_ /= train_max / (max_ - min_)
    input_ += min_
    return input_

In [12]:
band_1_KF = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
band_2_KF = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
X_KF = np.stack((band_1_KF, band_2_KF), axis=1)
# temp = np.concatenate((X_KF, X_test[test['not_machine_generated']]), axis=0)
# X_KF, train_min, train_max = scale_range(X_KF, -1, 1)
# X_KF = scale_test_range(X_KF, train_min, train_max, -1, 1)
X_KF = [X_KF, np.array(data['inc_angle']).reshape((len(data), 1))]

# X_test[test['not_machine_generated']] = X_all[len(data):]
X_test = [X_test, np.array(test['inc_angle']).reshape((len(test), 1))]

# X_test = [scale_test_range(X_test, train_min, train_max, -1, 1), np.array(test['inc_angle']).reshape((len(test), 1))]
y_KF = data['is_iceberg'].values.astype(np.float32)

In [13]:
def accuracy(loader):
    iceNet.eval()
    correct = 0
    total = 0
    loss = []
    for features, features_angle, labels in loader:
        features = Variable(features, volatile=True).cuda()
        features_angle = Variable(features_angle, volatile=True).cuda()
        labels = Variable(labels, volatile=True).cuda()
        outputs = iceNet(features, features_angle)
        _loss = criterion(outputs, labels)
        loss.append(_loss)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
        
    return np.mean(loss).data[0], (100 * correct / total)

In [14]:
num_epochs = 300
batch_size = 32
learning_rate = 0.001

In [None]:
kfold = 5
kfold_scores = []

test_dataset = icebergDataset(X_test)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

results = []
sss = KFold(n_splits=kfold, random_state=0)
for i, (train_index, test_index) in enumerate(sss.split(X_KF[0], X_KF[1], y_KF)):
    X_train_KF, X_valid_KF = [X_KF[0][train_index], X_KF[1][train_index]], [X_KF[0][test_index], X_KF[1][test_index]]
    y_train_KF, y_valid_KF = y_KF[train_index], y_KF[test_index]
    
    seed = 0
    # random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    
    # Define model
    iceNet = net().apply(weight_init).cuda()
    
    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(iceNet.parameters(), lr=learning_rate, weight_decay=0.0001)
    
    # Data Loader
    train_dataset_KF = icebergDataset(X_train_KF, y_train_KF, transform=True)
    val_dataset_KF = icebergDataset(X_valid_KF, y_valid_KF)

    train_loader_KF = torch.utils.data.DataLoader(dataset=train_dataset_KF, batch_size=batch_size, shuffle=True)
    val_loader_KF = torch.utils.data.DataLoader(dataset=val_dataset_KF, batch_size=batch_size, shuffle=False)
    
    print('Fold [%d/%d]' % (i+1, kfold))
    # Train
    best_prec1 = 1
    for epoch in range(num_epochs):
        epoch_train_loss = []
        for idx, (features, features_angle, labels) in enumerate(train_loader_KF):
            iceNet.train()
            features = Variable(features).cuda()
            features_angle = Variable(features_angle).cuda()
            labels = Variable(labels).cuda()
            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = iceNet(features, features_angle)
            loss = criterion(outputs, labels)
            epoch_train_loss.append(loss)
            loss.backward()
            optimizer.step()

        prec1 = accuracy(val_loader_KF)[0]
        
        # Save best model
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': iceNet.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filename='./Models/v1Nov_v1/model_fold_'+str(i+1)+'.pth.tar')
        
    print('Val Score : %f' % (best_prec1))
    kfold_scores.append(best_prec1)
    # Load best model
    best_model = torch.load('./Models/v1Nov_v1/model_fold_'+str(i+1)+'.pth.tar')
    iceNet.load_state_dict(best_model['state_dict'])
    optimizer.load_state_dict(best_model['optimizer'])
    
    # Predict
    iceNet.eval()
    
    results_fold = []
    for features, features_angle in test_loader:
        iceNet.eval()
        features = Variable(features, volatile=True).cuda()
        features_angle = Variable(features_angle, volatile=True).cuda()
        outputs = F.softmax(iceNet(features, features_angle))
    #     outputs = iceNet(features, features_angle)

        results_fold.append(outputs.data[0][1])
    
    results.append(results_fold)



Fold [1/5]
Val Score : 0.175512
Fold [2/5]


In [32]:
np.array(kfold_scores).mean()

0.14218716472387313

In [33]:
sub = pd.read_csv('./Data/sample_submission.csv')
sub['is_iceberg'] = np.array(results).mean(axis=0)
sub.to_csv('./Submissions/Sub 16 - 5-fold _ Val-1422.csv', index=False)

In [34]:
np.array(sub['is_iceberg']>0.5).sum()

3334

In [109]:
sub1 = pd.read_csv('./Submissions/Sub 3 - 5 fold _ Val - 0.1504.csv')['is_iceberg']
# sub2 = pd.read_csv('./Submissions/Sub 4 - 10-fold _ Val-1269.csv')['is_iceberg']
sub3 = pd.read_csv('./Submissions/Sub 5 - 5-fold _ Val-1538.csv')['is_iceberg']
sub4 = pd.read_csv('./Submissions/Sub 6 - 5-fold _ Val-1480.csv')['is_iceberg']
sub5 = pd.read_csv('./Submissions/Sub 8 - 5-fold _ Val-1479.csv')['is_iceberg']
# sub6 = pd.read_csv('./Submissions/Sub 9 - 5-fold _ Val-1480.csv')['is_iceberg']

In [110]:
results = np.mean((np.array(sub1), np.array(sub3),
                   np.array(sub4), np.array(sub5)), axis=0)
sub['is_iceberg'] = np.array(results)
sub.to_csv('./Submissions/Sub 13 - Ensemble_3_5_6_8.csv', index=False)