In [1]:
import os
import numpy as np
import pandas as pd
from skimage.util.montage import montage2d
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
base_path = os.path.join('data')

### Data Initialization

In [2]:
#Load data
train = pd.read_json("../data/train.json")
test = pd.read_json("../data/test.json")
train.inc_angle = train.inc_angle.replace('na', 0)
train.inc_angle = train.inc_angle.astype(float).fillna(0.0)
print("done!")

done!


In [194]:
# Train data
x_band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
x_band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
X_train = np.concatenate([x_band1[:, :, :, np.newaxis]
                          ,x_band2[:, :, :, np.newaxis]
                         ,((x_band1+x_band2)/2)[:, :, :, np.newaxis]], axis=-1)
X_angle_train = np.array(train.inc_angle)
y_train = np.array(train["is_iceberg"])

# Test data
x_band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
x_band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
X_test = np.concatenate([x_band1[:, :, :, np.newaxis]
                          , x_band2[:, :, :, np.newaxis]
                         , ((x_band1+x_band2)/2)[:, :, :, np.newaxis]], axis=-1)
X_angle_test = np.array(test.inc_angle)

### Augmentation

##### Need to do: Augmentors should be functionalized, in order to keep X, y the same size

In [137]:
mean = nd.array([-20.655821,-26.320704,-23.488279])
std = nd.array([5.200841,3.3955173,3.8151529])
normalizer = image.ColorNormalizeAug(mean, std)
flip = image.HorizontalFlipAug(1)

In [182]:
X_train_new = [normalizer(nd.array(X_train[i])) for i in range(1604)]    #normalize
X_train_new.extend([flip(X_train_new[i]) for i in range(1604)])     #flip
y_train_new = np.append(y_train, y_train)  # y_train
X_test_new = [normalizer(nd.array(X_test[i])) for i in range(8424)] #X_test

In [222]:
#resize: 
X_train_new = [nd.transpose(X_train_new[i],(2,0,1)) for i in range(len(X_train_new))]
X_test_new = [nd.transpose(X_test_new[i],(2,0,1)) for i in range(len(X_test_new))]

### Try GPU

In [204]:
import mxnet as mx
try:
    ctx = mx.gpu()
    _ = nd.zeros((1,), ctx=ctx)
except:
    ctx = mx.cpu()
ctx

cpu(0)

### Model: ResNet 18

In [244]:
from mxnet.gluon import nn
from mxnet import init


class Residual(nn.HybridBlock):
    def __init__(self, channels, same_shape=True, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.same_shape = same_shape
        with self.name_scope():
            strides = 1 if same_shape else 2
            self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1, strides=strides)
            self.bn1 = nn.BatchNorm()
            self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1)
            self.bn2 = nn.BatchNorm()
            if not same_shape:
                self.conv3 = nn.Conv2D(channels, kernel_size=1, strides=strides)

    def hybrid_forward(self, F, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if not self.same_shape:
            x = self.conv3(x)
        return F.relu(out + x)


class ResNet(nn.HybridBlock):
    def __init__(self, num_classes, verbose=False, **kwargs):
        super(ResNet, self).__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope():
            net = self.net = nn.HybridSequential()
            # block 1
            net.add(nn.Conv2D(channels=32, kernel_size=3, strides=1, padding=1))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation='relu'))
            # block 2
            for _ in range(3):
                net.add(Residual(channels=32))
            # block 3
            net.add(Residual(channels=64, same_shape=False))
            for _ in range(2):
                net.add(Residual(channels=64))
            # block 4
            net.add(Residual(channels=128, same_shape=False))
            for _ in range(2):
                net.add(Residual(channels=128))
            # block 5
            net.add(nn.AvgPool2D(pool_size=8))
            net.add(nn.Flatten())
            net.add(nn.Dense(num_classes, activation='sigmoid'))

    def hybrid_forward(self, F, x):
        out = x
        for i, b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print('Block %d output: %s'%(i+1, out.shape))
        return out


def get_net(ctx):
    num_outputs = 2
    net = ResNet(num_outputs)
    net.initialize(ctx=ctx, init=init.Xavier())
    return net

In [224]:
#train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_train_new, y_train_new, test_size=0.2, random_state=66)

In [88]:
# for next use of angle:
X_train, X_valid, X_angle_train, X_angle_valid, y_train, y_valid = train_test_split(X_train_all
                    ,X_angle_train, y_train, random_state=66, train_size=0.75)



In [225]:
train_ds = [(X_train[i],y_train[i]) for i in range(len(y_train))]
valid_ds = [(X_test[i],y_test[i]) for i in range(len(y_test))]
train_valid_ds = [(X_train_new[i], y_train_new[i]) for i in range(len(y_train_new))]
test_ds = [(X_test_new[i], 0) for i in range(len(X_test_new))]

In [226]:
from mxnet import gluon
batch_size = 128
loader = gluon.data.DataLoader
train_data = loader(train_ds, batch_size, shuffle=True, last_batch='keep')
valid_data = loader(valid_ds, batch_size, shuffle=True, last_batch='keep')
train_valid_data = loader(train_valid_ds, batch_size, shuffle=True, last_batch='keep')
test_data = loader(test_ds, batch_size, shuffle = False, last_batch='keep')

In [227]:
import datetime
import sys
sys.path.append('..')

def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period, lr_decay):
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0.9, 'wd': wd})

    prev_time = datetime.datetime.now()
    for epoch in range(num_epochs):
        train_loss = 0.0
        train_acc = 0.0
        if epoch > 0 and epoch % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for data, label in train_data:
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
            train_acc += accuracy(output, label)
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        if valid_data is not None:
            valid_acc = evaluate_accuracy(valid_data, net, ctx)
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, "
                         % (epoch, train_loss / len(train_data),
                            train_acc / len(train_data), valid_acc))
        else:
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f, "
                         % (epoch, train_loss / len(train_data),
                            train_acc / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))

In [228]:
def accuracy(output, label):
    return np.mean(output.asnumpy().argmax(axis=1)==label.asnumpy())

def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc = nd.array([0])
    n = 0.
    if isinstance(data_iterator, mx.io.MXDataIter):
        data_iterator.reset()
    for batch in data_iterator:
        data, label, batch_size = _get_batch(batch, ctx)
        for X, y in zip(data, label):
            acc += nd.array([np.sum(net(X).asnumpy().argmax(axis=1)==y.asnumpy())]).copyto(mx.cpu())
        acc.wait_to_read() # don't push too many operators into backend
        n += batch_size
    return acc.asscalar() / n

def _get_batch(batch, ctx):
    """return data and label on ctx"""
    if isinstance(batch, mx.io.DataBatch):
        data = batch.data[0]
        label = batch.label[0]
    else:
        data, label = batch
    return (gluon.utils.split_and_load(data, ctx),
            gluon.utils.split_and_load(label, ctx),
            data.shape[0])


In [229]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()


In [239]:
num_epochs = 30
learning_rate = 0.05
weight_decay = 1e-4
lr_period = 25
lr_decay = 0.1

net = get_net(ctx)
net.hybridize()
train(net, train_data, valid_data, num_epochs, learning_rate, weight_decay, ctx, lr_period, lr_decay)

Epoch 0. Loss: 1.736137, Train acc 0.576141, Valid acc 0.552960, Time 00:05:41, lr 0.05
Epoch 1. Loss: 0.581291, Train acc 0.683036, Valid acc 0.641745, Time 00:05:55, lr 0.05
Epoch 2. Loss: 0.640282, Train acc 0.699281, Valid acc 0.714953, Time 00:05:54, lr 0.05
Epoch 3. Loss: 0.719692, Train acc 0.715650, Valid acc 0.735202, Time 00:05:55, lr 0.05
Epoch 4. Loss: 0.556444, Train acc 0.760665, Valid acc 0.693146, Time 00:05:55, lr 0.05
Epoch 5. Loss: 0.597260, Train acc 0.765625, Valid acc 0.738318, Time 00:05:55, lr 0.05
Epoch 6. Loss: 0.429417, Train acc 0.786582, Valid acc 0.641745, Time 00:05:54, lr 0.05
Epoch 7. Loss: 0.376101, Train acc 0.813864, Valid acc 0.794393, Time 00:05:56, lr 0.05
Epoch 8. Loss: 0.559733, Train acc 0.796007, Valid acc 0.806854, Time 00:05:54, lr 0.05
Epoch 9. Loss: 0.371577, Train acc 0.821305, Valid acc 0.850467, Time 00:05:54, lr 0.05
Epoch 10. Loss: 0.322743, Train acc 0.861979, Valid acc 0.644860, Time 00:05:54, lr 0.05
Epoch 11. Loss: 0.307963, Train

In [247]:
num_epochs = 25
learning_rate = 0.05
weight_decay = 1e-4
lr_period = 15
lr_decay = 0.1

net = get_net(ctx)
net.hybridize()
train(net, train_valid_data, None, num_epochs, learning_rate, weight_decay, ctx, lr_period, lr_decay)

Epoch 0. Loss: 0.595225, Train acc 0.691707, Time 00:07:08, lr 0.05
Epoch 1. Loss: 0.472683, Train acc 0.839844, Time 00:07:00, lr 0.05
Epoch 2. Loss: 0.481922, Train acc 0.828125, Time 00:06:56, lr 0.05
Epoch 3. Loss: 0.455753, Train acc 0.855168, Time 00:07:19, lr 0.05
Epoch 4. Loss: 0.432268, Train acc 0.877704, Time 00:07:04, lr 0.05
Epoch 5. Loss: 0.435757, Train acc 0.872296, Time 00:07:15, lr 0.05
Epoch 6. Loss: 0.412084, Train acc 0.898438, Time 00:07:09, lr 0.05
Epoch 7. Loss: 0.415136, Train acc 0.895132, Time 00:07:09, lr 0.05
Epoch 8. Loss: 0.409925, Train acc 0.896635, Time 00:07:08, lr 0.05
Epoch 9. Loss: 0.399246, Train acc 0.911959, Time 00:07:08, lr 0.05
Epoch 10. Loss: 0.395775, Train acc 0.913161, Time 00:07:06, lr 0.05
Epoch 11. Loss: 0.395223, Train acc 0.917668, Time 00:07:57, lr 0.05
Epoch 12. Loss: 0.407336, Train acc 0.901743, Time 00:08:00, lr 0.05
Epoch 13. Loss: 0.385708, Train acc 0.925180, Time 00:07:57, lr 0.05
Epoch 14. Loss: 0.369409, Train acc 0.943510

In [260]:
preds = []
for data, label in test_data:
    output = net(data.as_in_context(ctx))
    preds.extend(output[:,1].asnumpy())
    
df = pd.DataFrame({'id': test['id'], 'is_iceberg':preds})
df['id'] =df['id'].astype(str)

In [261]:
'06565646' in list(df['id'])

True

In [262]:
df.to_csv('../submit/submission11050016.csv',index=False)

In [242]:
mask = df.is_iceberg == 0
column_name = 'is_iceberg'
df.loc[mask, column_name] = 0.1
mask = df.is_iceberg == 1
df.loc[mask, column_name] = 0.9