#### Import packages

In [1]:
# import torch.multiprocessing as mp
# mp.set_start_method('spawn') 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import shutil
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import random
import cv2

import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from tensorboardX import SummaryWriter
import torch.utils.data
import torch.utils.data as data_utils

seed = 0
# random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
np.random.seed(seed)
from utils_diff_ch import *

#### Load and Prepare Data

In [2]:
data = pd.read_json('./Data/train.json')
test = pd.read_json('./Data/test.json')

In [3]:
data['band_1'] = data['band_1'].apply(lambda x : np.array(x).reshape(75, 75))
data['band_2'] = data['band_2'].apply(lambda x : np.array(x).reshape(75, 75))

test['band_1'] = test['band_1'].apply(lambda x : np.array(x).reshape(75, 75))
test['band_2'] = test['band_2'].apply(lambda x : np.array(x).reshape(75, 75))

data['inc_angle'] = pd.to_numeric(data['inc_angle'], errors='coerce').fillna(0.0)
test['inc_angle'] = pd.to_numeric(test['inc_angle'], errors='coerce')

In [4]:
def scale_range (input_, min_, max_):
    input_ += -(np.min(input_))
    input_ /= np.max(input_) / (max_ - min_)
    input_ += min_
    return input_

In [5]:
band_1_tr = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
band_2_tr = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
X = np.stack((band_1_tr, band_2_tr), axis=1)
X_Real_std = np.stack((band_1_tr, band_2_tr), axis=1)
X_ang = np.array(data['inc_angle']).reshape((len(data), 1))
y = data['is_iceberg'].values.astype(np.float32)

X_train, X_val, X_ang_train, X_ang_val, y_train, y_val = train_test_split(X, X_ang, y, test_size=0.2, random_state=42)

# X_train = [X_train, X_ang_train]
# X_val = [X_val, X_ang_val]

band_1_test = np.concatenate([im for im in test['band_1']]).reshape(-1, 75, 75)
band_2_test = np.concatenate([im for im in test['band_2']]).reshape(-1, 75, 75)
# band_3_test = scale_range(band_1_test/band_2_test, -1, 1)
X_test = np.stack((band_1_test, band_2_test), axis=1)
# X_test = [X_test, np.array(test['inc_angle']).reshape((len(test), 1))]

#### Load GAN Models and Data

In [6]:
class iceberg(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(100, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 1024),
#             nn.LeakyReLU(0.2, inplace=True),
#             nn.Linear(1024, 2048),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(1024, 75*75*2),
            nn.Tanh()
        )
    
    def forward(self, x):
        x = x.view(x.size(0), 100)
        out = self.model(x)
        out = out.view(out.size(0), 2, 75, 75)
        return out
    
act = nn.LeakyReLU(0.2, inplace=True)
class ship(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(100, 1024))
        self.fc2 = nn.Sequential(
            nn.Linear(1024, 2048))
        self.fc3 = nn.Sequential(
            nn.Linear(2048, 4*4*2*128))
        self.layer1 = nn.Sequential(
            nn.BatchNorm2d(256),
            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2, padding=1),
            nn.BatchNorm2d(128))
        self.layer2 = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2, padding=1),
            nn.BatchNorm2d(64))
        self.layer3 = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=padding),
            nn.BatchNorm2d(16))
        self.layer4 = nn.Sequential(
            nn.ConvTranspose2d(16, 8, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(8),
            nn.Conv2d(8, 2, kernel_size=3, padding=padding),
            nn.Tanh())
        
    
    def forward(self, x):
        x = x.view(x.size(0), 100)
        out = self.fc1(x)
        out = self.fc2(out)
        out = self.fc3(out)
        out = out.view(out.size(0), 256, 4, 4)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        return out

In [7]:
# GAN Models
gan_iceberg_gen, gan_ship_gen = iceberg().cuda(), ship().cuda() 
gan_iceberg_model = torch.load('./Models/GAN_iceberg_model_best.pth.tar')
gan_iceberg_gen.load_state_dict(gan_iceberg_model['state_dict'])
gan_ship_model = torch.load('./Models/GAN_ship_model_best.pth.tar')
gan_ship_gen.load_state_dict(gan_ship_model['state_dict'])

In [8]:
def gan_samples(num_test_samples=600):
    # Draw samples from the input distribution
    test_noise = Variable(torch.randn(num_test_samples, 100).cuda())
    ship_images = gan_ship_gen(test_noise)
    iceberg_images = gan_iceberg_gen(test_noise)

    ship_images = (ship_images.data.cpu().numpy()*X_Real_std.std(axis=0))+X_Real_std.mean(axis=0)
    iceberg_images = (iceberg_images.data.cpu().numpy()*X_Real_std.std(axis=0))+X_Real_std.mean(axis=0)
    
    return np.concatenate((ship_images, iceberg_images))

#### PyTorch Model

In [9]:
# Hyper Parameters
width = 75
height = 75
channels = 2
padding = 0

In [10]:
# Data Loader (Input Pipeline)
class icebergDataset(data_utils.Dataset):
    """Iceberg-Ship dataset."""

    def __init__(self, X, y=None, transform=None):
        self.X_images = X
        if y!=None:
            self.y = torch.from_numpy(y.reshape((len(y),1))).float()
        else:
            self.y=None
        self.transform = transform

    def __len__(self):
        return len(self.X_images)

    def __getitem__(self, idx):
        im = self.X_images[idx]
        if self.transform:
#             if np.random.random() < 0.2:
#                 im = cv2.blur(im, (2,2))
            im = randomErodeDilate(im, u=0.2)
            im = randomZoomOut(im, u=0.4)
            im = randomNoisy(im, u=0.2)
            im = randomShift(im, u=0.2)
            im = randomRotation(im, u=0.2)
        
        try:
            if self.y==None:
                return [torch.from_numpy(im).float()]
        except:
            pass
            
        return [torch.from_numpy(im).float(), self.y[idx]]

In [11]:
def save_checkpoint(state, is_best, filename='./Models/checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, './Models/model_best.pth.tar')

In [12]:
train_dataset = icebergDataset(X_train, y_train, transform=True)
val_dataset = icebergDataset(X_val, y_val)
test_dataset = icebergDataset(X_test)



In [13]:
act = nn.PReLU()
class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.batch = nn.BatchNorm2d(channels)
        self.batch1D = nn.BatchNorm1d(1)
        self.layer1 = nn.Sequential(
            nn.Conv2d(channels, 18, kernel_size=1, padding=padding),
            nn.BatchNorm2d(18),
            act,
            nn.Conv2d(18, 18, kernel_size=3, padding=padding),
            nn.BatchNorm2d(18),
            act,
            nn.Conv2d(18, 18, kernel_size=3, padding=padding),
            nn.BatchNorm2d(18),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.25))
        self.layer2 = nn.Sequential(
            nn.Conv2d(18, 36, kernel_size=3, padding=padding),
            nn.BatchNorm2d(36),
            act,
            nn.Conv2d(36, 54, kernel_size=3, padding=padding),
            nn.BatchNorm2d(54),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.25))
        self.layer3 = nn.Sequential(
            nn.Conv2d(54, 72, kernel_size=2, padding=padding),
            nn.BatchNorm2d(72),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.2))
        self.layer4 = nn.Sequential(
            nn.Conv2d(72, 144, kernel_size=2, padding=padding),
            nn.BatchNorm2d(144),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.25))
        self.layer_shallow = nn.Sequential(
            nn.Conv2d(channels, 128, kernel_size=3, padding=padding),
            nn.BatchNorm2d(128),
            act,
            nn.MaxPool2d(2),
            nn.Dropout(0.25))
        self.fc1 = nn.Sequential(
            nn.Linear((3*3*144), 512),
            act,
            nn.Dropout(0.5))
        self.fc2 = nn.Sequential(
            nn.Linear(512, 196),
            act,
            nn.Dropout(0.5))
        self.fc3 = nn.Linear(196, 1)
        self.sig = nn.Sigmoid()
        
        
    def forward(self, x_im):
        x_im = self.batch(x_im)
        out = self.layer1(x_im)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [14]:
def weight_init(m): 
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform(m.weight.data)
    if isinstance(m, nn.Linear):
        torch.nn.init.orthogonal(m.weight.data)

In [15]:
writer = SummaryWriter()
iceNet = net()
iceNet.apply(weight_init).cuda()

net (
  (batch): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True)
  (batch1D): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True)
  (layer1): Sequential (
    (0): Conv2d(2, 18, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(18, eps=1e-05, momentum=0.1, affine=True)
    (2): PReLU (1)
    (3): Conv2d(18, 18, kernel_size=(3, 3), stride=(1, 1))
    (4): BatchNorm2d(18, eps=1e-05, momentum=0.1, affine=True)
    (5): PReLU (1)
    (6): Conv2d(18, 18, kernel_size=(3, 3), stride=(1, 1))
    (7): BatchNorm2d(18, eps=1e-05, momentum=0.1, affine=True)
    (8): PReLU (1)
    (9): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (10): Dropout (p = 0.25)
  )
  (layer2): Sequential (
    (0): Conv2d(18, 36, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True)
    (2): PReLU (1)
    (3): Conv2d(36, 54, kernel_size=(3, 3), stride=(1, 1))
    (4): BatchNorm2d(54, eps=1e-05, momentum=0.1, affine=True)
    (5): PReLU (1)
    (6): Max

In [16]:
num_epochs = 200
batch_size = 32
learning_rate = 0.001
num_test_samples = 1200

In [17]:
# Loss and Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(iceNet.parameters(), lr=learning_rate, weight_decay=0.0001)

X_train_in = np.concatenate((X_train, gan_samples(num_test_samples)))
y_train_in = np.concatenate((y_train, np.zeros((num_test_samples), np.float32), np.ones((num_test_samples), np.float32)))
train_dataset = icebergDataset(X_train_in, y_train_in, transform=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

def accuracy(loader):
    iceNet.eval()
    correct = 0
    total = 0
    loss = []
    for features, labels in loader:
        features = features.cuda()
        labels = labels.cuda()
        features = Variable(features, volatile=True)
        labels = Variable(labels, volatile=True).float()
        outputs = iceNet(features)
        _loss = criterion(outputs, labels)
        loss.append(_loss)
        predicted = (F.sigmoid(outputs).data>0.5)
        total += labels.size(0)
        correct += (predicted == labels.byte().data).sum()
        
    return np.mean(loss).data[0], (100 * correct / total)



In [18]:
# Train the Model
best_prec1 = 1
for epoch in range(num_epochs):
    epoch_train_loss = []
    for i, (features, labels) in enumerate(train_loader):
        iceNet.train()
        features = features.cuda()
        labels = labels.cuda()
        features = Variable(features).float()
        labels = Variable(labels).float()
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = iceNet(features)
        writer.add_graph(iceNet, outputs)
#         writer.add_histogram('hist_fc1', iceNet.fc1[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc2', iceNet.fc2[0].weight.data.cpu().numpy(), i)
#         writer.add_histogram('hist_fc3', iceNet.fc3.weight.data.cpu().numpy(), i)
        loss = criterion(outputs, labels)
        epoch_train_loss.append(loss)
        loss.backward()
        optimizer.step()
    
    # Generate new GAN samples
    X_train_in = np.concatenate((X_train, gan_samples(num_test_samples)))
    y_train_in = np.concatenate((y_train, np.zeros((num_test_samples), np.float32), 
                                 np.ones((num_test_samples), np.float32)))
    train_dataset = icebergDataset(X_train_in, y_train_in, transform=True)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        
    prec1 = accuracy(val_loader)[0]
    print ('Epoch [%d/%d] \nTraining Loss: %.4f' % (epoch+1, num_epochs, np.mean(epoch_train_loss).data[0]))
    print('Validation Loss: %.4f, Accuracy: %.2f%%' % accuracy(val_loader))
    
    is_best = prec1 < best_prec1
    best_prec1 = min(prec1, best_prec1)
#     save_checkpoint({
#         'epoch': epoch + 1,
#         'state_dict': iceNet.state_dict(),
#         'best_prec1': best_prec1,
#         'optimizer' : optimizer.state_dict(),
#     }, is_best)

print(best_prec1)
# export scalar data to JSON for external processing
writer.close()



Epoch [1/200] 
Training Loss: 0.6234
Validation Loss: 0.6434, Accuracy: 65.42%
Epoch [2/200] 
Training Loss: 0.4899
Validation Loss: 0.6179, Accuracy: 62.62%
Epoch [3/200] 
Training Loss: 0.4510
Validation Loss: 0.6930, Accuracy: 56.07%
Epoch [4/200] 
Training Loss: 0.4169
Validation Loss: 0.6131, Accuracy: 65.73%
Epoch [5/200] 
Training Loss: 0.3883
Validation Loss: 0.7402, Accuracy: 66.67%
Epoch [6/200] 
Training Loss: 0.3941
Validation Loss: 0.4275, Accuracy: 80.37%
Epoch [7/200] 
Training Loss: 0.3545
Validation Loss: 0.4846, Accuracy: 75.70%
Epoch [8/200] 
Training Loss: 0.3362
Validation Loss: 0.5127, Accuracy: 74.77%
Epoch [9/200] 
Training Loss: 0.3473
Validation Loss: 0.4651, Accuracy: 75.70%
Epoch [10/200] 
Training Loss: 0.3515
Validation Loss: 0.4413, Accuracy: 79.13%
Epoch [11/200] 
Training Loss: 0.3324
Validation Loss: 0.5174, Accuracy: 74.45%


KeyboardInterrupt: 

In [35]:
print("=> loading checkpoint")
best_model = torch.load('./Models/model_best.pth.tar')
print('best_prec1 = ', best_model['best_prec1'])
iceNet.load_state_dict(best_model['state_dict'])
optimizer.load_state_dict(best_model['optimizer'])

=> loading checkpoint
best_prec1 =  0.20251186192035675


In [124]:
temp_dataset = icebergDataset(X_train)

In [129]:
# Predict
iceNet.eval()
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)
results = []
for features, features_angle in test_loader:
    iceNet.eval()
    features = Variable(features, volatile=True).cuda()
    features_angle = Variable(features_angle, volatile=True).cuda()
    outputs = F.softmax(iceNet(features, features_angle))
#     outputs = iceNet(features, features_angle)

    results.append(outputs.data[0][1])

In [126]:
sum(y_train>0.5)/len(y_train)

0.45908028059236167

In [130]:
sum(np.array(results)>0.5)/len(results)

0.30282526115859448

In [131]:
results[:50]

[0.02592930756509304,
 0.3247637450695038,
 1.0454600713242189e-14,
 0.9999004602432251,
 0.40994328260421753,
 0.6011435389518738,
 0.024099018424749374,
 0.9999616146087646,
 2.8529889561923483e-10,
 2.0306337167319555e-10,
 1.0756996557609658e-15,
 0.30658578872680664,
 1.2682083252002485e-05,
 0.29034730792045593,
 4.115712783914205e-07,
 0.0020702641922980547,
 0.0006665511173196137,
 0.001083467504940927,
 0.003712332108989358,
 0.9074830412864685,
 0.02027943916618824,
 0.18286937475204468,
 0.23720575869083405,
 0.12182788550853729,
 3.2119743469767245e-14,
 7.707850357974166e-09,
 0.007208712864667177,
 0.4834314286708832,
 0.1449306458234787,
 0.9999291896820068,
 1.087272768563255e-13,
 0.6034151315689087,
 0.4970799684524536,
 0.016889972612261772,
 0.7639496326446533,
 0.9922479391098022,
 0.0013621319085359573,
 0.2865104377269745,
 0.28295210003852844,
 0.8613411784172058,
 3.21758954014231e-15,
 0.5548590421676636,
 0.8960628509521484,
 2.0677757675002795e-06,
 0.616992

In [132]:
sub = pd.read_csv('./Data/sample_submission.csv')

In [133]:
sub['is_iceberg'] = results

In [135]:
sub.to_csv('./Submissions/sub_30Oct_val_1631.csv', index=False)

#### KFold

In [74]:
def scale_test_range (input_, train_min, train_max, min_, max_):
    input_ += -(train_min)
    input_ /= train_max / (max_ - min_)
    input_ += min_
    return input_

def std_test_range (input_, mean, sd):
    input_ -= mean
    input_ /= sd
    return input_

In [106]:
def weight_init(m): 
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform(m.weight.data)
    if isinstance(m, nn.Linear):
        torch.nn.init.orthogonal(m.weight.data)

In [102]:
band_1_KF = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
band_2_KF = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
band_3_KF = scale_range(band_1_KF/band_2_KF, -1, 1)
rgb = np.stack((band_1_KF, band_2_KF, band_3_KF), axis=1)
X_KF = [rgb, np.array(data['inc_angle']).reshape((len(data), 1))]

y_KF = data['is_iceberg'].values.astype(np.float32)

In [83]:
# band_1_KF = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
# band_2_KF = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
# band_3_KF = band_1_KF/band_2_KF

# band_1_test = np.concatenate([im for im in test['band_1']]).reshape(-1, 75, 75)
# band_2_test = np.concatenate([im for im in test['band_2']]).reshape(-1, 75, 75)
# band_3_test = band_1_test/band_2_test

# min_b1_KF, max_b1_KF = np.min(np.concatenate((band_1_KF, band_1_test), axis=0)), np.max(np.concatenate((band_1_KF, band_1_test), axis=0))
# min_b2_KF, max_b2_KF = np.min(np.concatenate((band_2_KF, band_2_test), axis=0)), np.max(np.concatenate((band_2_KF, band_2_test), axis=0))
# min_b3_KF, max_b3_KF = np.min(np.concatenate((band_3_KF, band_3_test), axis=0)), np.max(np.concatenate((band_3_KF, band_3_test), axis=0))

# band_1_KF = scale_test_range(band_1_KF, min_b1_KF, max_b1_KF, -1, 1)
# band_2_KF = scale_test_range(band_2_KF, min_b2_KF, max_b2_KF, -1, 1)
# band_1_test = scale_test_range(band_1_test, min_b1_KF, max_b1_KF, -1, 1)
# band_2_test = scale_test_range(band_2_test, min_b2_KF, max_b2_KF, -1, 1)
# band_3_KF = scale_test_range(band_3_KF, min_b3_KF, max_b3_KF, -1, 1)
# band_3_test = scale_test_range(band_3_test, min_b3_KF, max_b3_KF, -1, 1)
                          
# rgb = np.stack((band_1_KF, band_2_KF, band_3_KF), axis=1)
# X_KF = [rgb, np.array(data['inc_angle']).reshape((len(data), 1))]

# rgb = np.stack((band_1_test, band_2_test, band_3_test), axis=1)
# X_test = [rgb, np.array(test['inc_angle']).reshape((len(test), 1))]

# y_KF = data['is_iceberg'].values.astype(np.float32)

In [84]:
def accuracy(loader):
    iceNet.eval()
    correct = 0
    total = 0
    loss = []
    for features, features_angle, labels in loader:
        features = Variable(features, volatile=True).cuda()
        features_angle = Variable(features_angle, volatile=True).cuda()
        labels = Variable(labels, volatile=True).cuda()
        outputs = iceNet(features, features_angle)
        _loss = criterion(outputs, labels)
        loss.append(_loss)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
        
    return np.mean(loss).data[0], (100 * correct / total)

In [85]:
num_epochs = 300
batch_size = 32
learning_rate = 0.001

In [107]:
kfold = 5
kfold_scores = []

test_dataset = icebergDataset(X_test)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

results = []
sss = KFold(n_splits=kfold, random_state=0)
for i, (train_index, test_index) in enumerate(sss.split(X_KF[0], X_KF[1], y_KF)):
    X_train_KF, X_valid_KF = [X_KF[0][train_index], X_KF[1][train_index]], [X_KF[0][test_index], X_KF[1][test_index]]
    y_train_KF, y_valid_KF = y_KF[train_index], y_KF[test_index]
    
    # Define model
    iceNet = net().apply(weight_init).cuda()
    
    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(iceNet.parameters(), lr=learning_rate, weight_decay=0.0001)
    
    # Data Loader
    train_dataset_KF = icebergDataset(X_train_KF, y_train_KF, transform=True)
    val_dataset_KF = icebergDataset(X_valid_KF, y_valid_KF)

    train_loader_KF = torch.utils.data.DataLoader(dataset=train_dataset_KF, batch_size=batch_size, shuffle=True)
    val_loader_KF = torch.utils.data.DataLoader(dataset=val_dataset_KF, batch_size=batch_size, shuffle=False)
    
    print('Fold [%d/%d]' % (i+1, kfold))
    # Train
    best_prec1 = 1
    for epoch in range(num_epochs):
        epoch_train_loss = []
        for idx, (features, features_angle, labels) in enumerate(train_loader_KF):
            iceNet.train()
            features = Variable(features).cuda()
            features_angle = Variable(features_angle).cuda()
            labels = Variable(labels).cuda()
            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = iceNet(features, features_angle)
            loss = criterion(outputs, labels)
            epoch_train_loss.append(loss)
            loss.backward()
            optimizer.step()

        prec1 = accuracy(val_loader_KF)[0]
        
        # Save best model
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': iceNet.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filename='./Models/v1Nov_v1/model_fold_'+str(i+1)+'.pth.tar')
        
    print('Val Score : %f' % (best_prec1))
    kfold_scores.append(best_prec1)
    # Load best model
    best_model = torch.load('./Models/v1Nov_v1/model_fold_'+str(i+1)+'.pth.tar')
    iceNet.load_state_dict(best_model['state_dict'])
    optimizer.load_state_dict(best_model['optimizer'])
    
    # Predict
    iceNet.eval()
    
    results_fold = []
    for features, features_angle in test_loader:
        iceNet.eval()
        features = Variable(features, volatile=True).cuda()
        features_angle = Variable(features_angle, volatile=True).cuda()
        outputs = F.softmax(iceNet(features, features_angle))
    #     outputs = iceNet(features, features_angle)

        results_fold.append(outputs.data[0][1])
    
    results.append(results_fold)



Fold [1/5]
Val Score : 0.167804
Fold [2/5]
Val Score : 0.154238
Fold [3/5]


KeyboardInterrupt: 

In [104]:
np.array(kfold_scores).mean()

0.14790893048048021

In [77]:
kfold = 5
kfold_scores = []

test_dataset = icebergDataset(X_test)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

results = []
sss = KFold(n_splits=kfold, random_state=0)
for i, (train_index, test_index) in enumerate(sss.split(X_KF[0], X_KF[1], y_KF)):
    X_train_KF, X_valid_KF = [X_KF[0][train_index], X_KF[1][train_index]], [X_KF[0][test_index], X_KF[1][test_index]]
    y_train_KF, y_valid_KF = y_KF[train_index], y_KF[test_index]
    
    # Define model
    iceNet = net().apply(weight_init).cuda()
    
    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(iceNet.parameters(), lr=learning_rate, weight_decay=0.0001)
    
    # Data Loader
    train_dataset_KF = icebergDataset(X_train_KF, y_train_KF, transform=True)
    val_dataset_KF = icebergDataset(X_valid_KF, y_valid_KF)

    train_loader_KF = torch.utils.data.DataLoader(dataset=train_dataset_KF, batch_size=batch_size, shuffle=True)
    val_loader_KF = torch.utils.data.DataLoader(dataset=val_dataset_KF, batch_size=batch_size, shuffle=False)
    
    print('Fold [%d/%d]' % (i+1, kfold))
    # Train
    best_prec1 = 1
    for epoch in range(num_epochs):
        epoch_train_loss = []
        for idx, (features, features_angle, labels) in enumerate(train_loader_KF):
            iceNet.train()
            features = Variable(features).cuda()
            features_angle = Variable(features_angle).cuda()
            labels = Variable(labels).cuda()
            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = iceNet(features, features_angle)
            loss = criterion(outputs, labels)
            epoch_train_loss.append(loss)
            loss.backward()
            optimizer.step()

        prec1 = accuracy(val_loader_KF)[0]
        
        # Save best model
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': iceNet.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filename='./Models/v1Nov_v1/model_fold_'+str(i+1)+'.pth.tar')
        
    print('Val Score : %f' % (best_prec1))
    kfold_scores.append(best_prec1)
    # Load best model
    best_model = torch.load('./Models/v1Nov_v1/model_fold_'+str(i+1)+'.pth.tar')
    iceNet.load_state_dict(best_model['state_dict'])
    optimizer.load_state_dict(best_model['optimizer'])
    
    # Predict
    iceNet.eval()
    
    results_fold = []
    for features, features_angle in test_loader:
        iceNet.eval()
        features = Variable(features, volatile=True).cuda()
        features_angle = Variable(features_angle, volatile=True).cuda()
        outputs = F.softmax(iceNet(features, features_angle))
    #     outputs = iceNet(features, features_angle)

        results_fold.append(outputs.data[0][1])
    
    results.append(results_fold)



Fold [1/5]
Val Score : 0.139978
Fold [2/5]
Val Score : 0.147510
Fold [3/5]
Val Score : 0.188782
Fold [4/5]
Val Score : 0.123161
Fold [5/5]
Val Score : 0.098183


In [78]:
np.array(kfold_scores).mean()

0.13952273428440093

In [85]:
np.array(sub['is_iceberg']<0.5).sum()

8424

In [79]:
sub = pd.read_csv('./Data/sample_submission.csv')
sub['is_iceberg'] = np.array(results).mean(axis=0)
sub.to_csv('./Submissions/Sub 12 - 5-fold _ Val-1395.csv', index=False)

In [108]:
sub = pd.read_csv('./Data/sample_submission.csv')

In [109]:
sub1 = pd.read_csv('./Submissions/Sub 3 - 5 fold _ Val - 0.1504.csv')['is_iceberg']
# sub2 = pd.read_csv('./Submissions/Sub 4 - 10-fold _ Val-1269.csv')['is_iceberg']
sub3 = pd.read_csv('./Submissions/Sub 5 - 5-fold _ Val-1538.csv')['is_iceberg']
sub4 = pd.read_csv('./Submissions/Sub 6 - 5-fold _ Val-1480.csv')['is_iceberg']
sub5 = pd.read_csv('./Submissions/Sub 8 - 5-fold _ Val-1479.csv')['is_iceberg']
# sub6 = pd.read_csv('./Submissions/Sub 9 - 5-fold _ Val-1480.csv')['is_iceberg']

In [110]:
results = np.mean((np.array(sub1), np.array(sub3),
                   np.array(sub4), np.array(sub5)), axis=0)
sub['is_iceberg'] = np.array(results)
sub.to_csv('./Submissions/Sub 13 - Ensemble_3_5_6_8.csv', index=False)