In [1]:
import pickle 
import torch
from torch.autograd import Variable
from sub import subMNIST       # testing the subclass of MNIST dataset

Files already downloaded
3000
750


# Train Model

In [2]:
trainset_import = pickle.load(open("train_labeled.p", "rb"))
validset_import = pickle.load(open("validation.p", "rb"))
semiset_import = pickle.load(open('train_unlabeled.p', "rb"))
train_loader = torch.utils.data.DataLoader(trainset_import, batch_size=64, shuffle=True)
valid_loader = torch.utils.data.DataLoader(validset_import, batch_size=64, shuffle=True)
semi_loader = torch.utils.data.DataLoader(semiset_import, batch_size=64, shuffle=True)

In [3]:
semiset_import.train_labels = torch.LongTensor(len(semi_loader.dataset)).zero_()

In [4]:
semiset_import.train_labels


 0
 0
 0
⋮ 
 0
 0
 0
[torch.LongTensor of size 47000]

In [5]:
import random

def data_transformer(loader, rotation=True):
    result = []
    for data, target in loader:
        size = data.size(0)
        data = data.numpy().reshape(size, 784)
        if rotation:
            data_3D = data.reshape(size, 28, 28) #64*28*28
            for i in range(9):
                data_new = np.empty([0,28,28])
                for graph in data_3D:
                    b = scipy.misc.imrotate(graph, random.randint(-15, 15)).reshape(1,28,28)
                    data_new = np.concatenate((data_new, b), axis=0)    
                max_val = np.max(data_new)
                min_val = np.min(data_new)
                data_new_norm = (data_new-min_val) / (max_val-min_val) #64*784
                Y = torch.from_numpy(data_new_norm.reshape(size, 784))
                Y = torch.FloatTensor(size,784).copy_(Y)
                #target = torch.FloatTensor(size).copy_(target)
                pair = (Y, target)
                result.append(pair)
        
        max_val = np.max(data)
        min_val = np.min(data)
        data_norm = (data-min_val) / (max_val-min_val) #64*784
            
        X = torch.from_numpy(data_norm)
        pair = (X, target)
        result.append(pair)
    return result

In [6]:
import numpy as np
import scipy.misc
train_trans = data_transformer(train_loader, rotation=True)
valid_trans = data_transformer(valid_loader, rotation=False)
semi_trans = data_transformer(semi_loader, rotation=False)

In [7]:
import torch.nn.functional as F
import torch.optim as optim
#import argparse
#from torchvision import datasets, transforms
#import torch.autograd as autograd
#from __future__ import print_function

mb_size = 64
Z_dim = 100
X_dim = 784
h_dim = 128
y_dim = 10
lr = 1e-3

def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True)

In [8]:
# =============================== Q(z|X) ======================================

Wxh = xavier_init(size=[X_dim, h_dim])
bxh = Variable(torch.zeros(h_dim), requires_grad=True)

Whz_mu = xavier_init(size=[h_dim, Z_dim])
bhz_mu = Variable(torch.zeros(Z_dim), requires_grad=True)

Whz_var = xavier_init(size=[h_dim, Z_dim])
bhz_var = Variable(torch.zeros(Z_dim), requires_grad=True)

Whz_y = xavier_init(size=[h_dim, y_dim])
bhz_y = Variable(torch.zeros(y_dim), requires_grad=True)


def Q(X):
    h_initial = F.relu(X @ Wxh + bxh.repeat(X.size(0), 1))
    dropout = torch.nn.Dropout(p = 0.0)
    h = dropout(h_initial)
    z_mu = h @ Whz_mu + bhz_mu.repeat(h.size(0), 1)
    z_var = h @ Whz_var + bhz_var.repeat(h.size(0), 1)
    recon_y = h @ Whz_y + bhz_y.repeat(h.size(0), 1)
    prop_y = F.log_softmax(recon_y)
    return z_mu, z_var, prop_y


def sample_z(mu, log_var):
    size = mu.size(0)
    eps = Variable(torch.randn(size, Z_dim))
    return mu + torch.exp(log_var / 2) * eps


# =============================== P(X|z) ======================================

# Wzh = xavier_init(size=[Z_dim, h_dim])
# bzh = Variable(torch.zeros(h_dim), requires_grad=True)

# Whx = xavier_init(size=[h_dim, X_dim])
# bhx = Variable(torch.zeros(X_dim), requires_grad=True)


# def P(z):
#     h = F.relu(z @ Wzh + bzh.repeat(z.size(0), 1))
#     X = F.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1))
#     return X
Wzh = xavier_init(size=[Z_dim + y_dim, h_dim])
bzh = Variable(torch.zeros(h_dim), requires_grad=True)

Whx = xavier_init(size=[h_dim, X_dim])
bhx = Variable(torch.zeros(X_dim), requires_grad=True)


def P(z, c):
    inputs = torch.cat([z, c], 1)
    h = F.relu(inputs @ Wzh + bzh.repeat(inputs.size(0), 1))
    X = F.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1))
    return X


# =============================== TRAINING ====================================

params = [Wxh, bxh, Whz_mu, bhz_mu, Whz_var, bhz_var, Whz_y, bhz_y,
          Wzh, bzh, Whx, bhx]

solver = optim.Adam(params, lr=lr)
Train_loss, Train_label_loss, Test_loss, Test_label_loss = [], [], [], []

In [9]:
import os
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
def train(epoch, train_trans = train_trans, lamba=0):
    train_loss = 0
    label_loss = 0
    correct = 0
    for data, target in train_trans:
        X, target = Variable(data), Variable(target)
        size = len(target)
        z_mu, z_var, recon_y = Q(X)
        z = sample_z(z_mu, z_var)
        X_sample = P(z, recon_y)
#         X_sample = P(z)

    # Loss
        recon_loss = F.binary_cross_entropy(X_sample, X, size_average=False) /size
        kl_loss = torch.mean(0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1. - z_var, 1))
#         print(target)
        predict_loss = F.nll_loss(recon_y, target)
        loss = recon_loss + kl_loss + lamba* predict_loss
        train_loss += recon_loss.data[0]
        label_loss += predict_loss.data[0]
        pred = recon_y.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    # Backward
        loss.backward()

    # Update
        solver.step()
#         print(pred)

    # Housekeeping
        for p in params:
            p.grad.data.zero_()

    # Print and plot every now and then
    train_loss /= len(train_trans)  # loss function already averages over batch size
    label_loss /= len(train_trans) 
    Train_loss.append(train_loss)
    Train_label_loss.append(label_loss)
    print('Train set: Train loss: {:.4f}, label loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
    train_loss, label_loss, correct, len(train_trans)*64,
    100. * correct / (len(train_trans)*64)))

    samples = P(z, recon_y).data.numpy()[:16]
#     samples = P(z).data.numpy()[:16]

    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    if not os.path.exists('out/'):
        os.makedirs('out/')

    plt.savefig('out/{}.png'.format(str(epoch).zfill(3)), bbox_inches='tight')
    plt.close(fig)

def test(epoch):
    test_loss = 0
    label_loss = 0
    correct = 0
    for data, target in valid_trans:
        X, target = Variable(data), Variable(target)
        z_mu, z_var, recon_y = Q(X)
        z = sample_z(z_mu, z_var)
        X_sample = P(z, recon_y)
#         X_sample = P(z)

    # Loss
        recon_loss = F.binary_cross_entropy(X_sample, X, size_average=False) / mb_size
        kl_loss = torch.mean(0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1. - z_var, 1))
        predict_loss = F.nll_loss(recon_y, target)
        loss = recon_loss + kl_loss + predict_loss
        test_loss += recon_loss.data[0]
        label_loss += predict_loss.data[0]
        pred = recon_y.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()
    test_loss /= len(valid_loader) # loss function already averages over batch size
    label_loss /= len(valid_loader)
    Test_loss.append(test_loss)
    Test_label_loss.append(label_loss)
    print('Valid set: Valid loss: {:.4f}, label loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, label_loss, correct, len(valid_loader.dataset),
        100. * correct / len(valid_loader.dataset)))

def semi():    
    for index, (data, target) in enumerate(semi_trans):
        size = len(target)
        X, target = Variable(data), Variable(target)
        z_mu, z_var, recon_y = Q(X)
        z = sample_z(z_mu, z_var)
        X_sample = P(z, recon_y)
#         X_sample = P(z)
        pred = recon_y.data.max(1)[1] # get the index of the max log-probability
        new = torch.LongTensor(size).copy_(pred)
#         print(new)
        semi_trans[index][1].copy_(new)
#     print(semi_trans[-1][1])
    
        

In [10]:
new_train_trans = train_trans+semi_trans[:]
for epoch in range(50):
    print('epoch: {:}'.format(epoch))
    if epoch<0:
        Train_trans = new_train_trans
        Lamba = 0
    else:
        Train_trans = train_trans
        Lamba = 5000
    train(epoch, Train_trans, Lamba)
    test(epoch) 

epoch: 0
Train set: Train loss: 199.7100, label loss: 0.4429, Accuracy: 26228/30080 (87%)
Valid set: Valid loss: 144.8090, label loss: 0.3421, Accuracy: 8922/10000 (89%)
epoch: 1
Train set: Train loss: 141.8414, label loss: 0.1822, Accuracy: 28453/30080 (95%)
Valid set: Valid loss: 130.2178, label loss: 0.2738, Accuracy: 9194/10000 (92%)
epoch: 2
Train set: Train loss: 132.6433, label loss: 0.1028, Accuracy: 29199/30080 (97%)
Valid set: Valid loss: 123.9047, label loss: 0.2454, Accuracy: 9303/10000 (93%)
epoch: 3
Train set: Train loss: 127.7139, label loss: 0.0636, Accuracy: 29532/30080 (98%)
Valid set: Valid loss: 119.6071, label loss: 0.2426, Accuracy: 9333/10000 (93%)
epoch: 4
Train set: Train loss: 124.3730, label loss: 0.0416, Accuracy: 29728/30080 (99%)
Valid set: Valid loss: 116.7172, label loss: 0.2491, Accuracy: 9350/10000 (94%)
epoch: 5
Train set: Train loss: 121.8324, label loss: 0.0272, Accuracy: 29842/30080 (99%)
Valid set: Valid loss: 114.5529, label loss: 0.2627, Accurac

KeyboardInterrupt: 

In [11]:
semi()
new_train_trans = train_trans+semi_trans[:]
for epoch in range(40):
    print('epoch: {:}'.format(epoch))
    train(epoch, new_train_trans, lamba=5000)
    test(epoch) 

epoch: 0
Train set: Train loss: 97.9662, label loss: 0.0474, Accuracy: 75829/77120 (98%)
Valid set: Valid loss: 93.0450, label loss: 0.3804, Accuracy: 9514/10000 (95%)
epoch: 1
Train set: Train loss: 97.1852, label loss: 0.0163, Accuracy: 76516/77120 (99%)
Valid set: Valid loss: 91.9468, label loss: 0.3938, Accuracy: 9504/10000 (95%)
epoch: 2
Train set: Train loss: 96.7133, label loss: 0.0096, Accuracy: 76744/77120 (100%)
Valid set: Valid loss: 91.7368, label loss: 0.3917, Accuracy: 9517/10000 (95%)
epoch: 3
Train set: Train loss: 96.5887, label loss: 0.0078, Accuracy: 76798/77120 (100%)
Valid set: Valid loss: 91.6890, label loss: 0.3997, Accuracy: 9511/10000 (95%)
epoch: 4
Train set: Train loss: 96.6079, label loss: 0.0086, Accuracy: 76781/77120 (100%)
Valid set: Valid loss: 91.3914, label loss: 0.4188, Accuracy: 9510/10000 (95%)
epoch: 5
Train set: Train loss: 96.5050, label loss: 0.0071, Accuracy: 76813/77120 (100%)
Valid set: Valid loss: 91.8424, label loss: 0.4333, Accuracy: 9509/

KeyboardInterrupt: 















# Create Sample Submission

In [None]:
pickle.dump(testset, open("test.p", "wb" ))

In [None]:
test_loader = torch.utils.data.DataLoader(testset,batch_size=64, shuffle=False)

## Test Accuuracy

In [None]:
test(1, test_loader)

In [None]:
label_predict = np.array([])
model.eval()
for data, target in test_loader:
    data, target = Variable(data, volatile=True), Variable(target)
    output = model(data)
    temp = output.data.max(1)[1].numpy().reshape(-1)
    label_predict = np.concatenate((label_predict, temp))

In [None]:
label_predict

In [None]:
label_true = test_loader.dataset.test_labels.numpy()

In [None]:
diff_array = label_true - label_predict

In [None]:
len(np.where(diff_array != 0)[0])

In [None]:
import pandas as pd
true_label = pd.DataFrame(label_true, columns=['label'])
true_label.reset_index(inplace=True)
true_label.rename(columns={'index': 'ID'}, inplace=True)

In [None]:
true_label.head()

In [None]:
predict_label = pd.DataFrame(label_predict, columns=['label'], dtype=int)
predict_label.reset_index(inplace=True)
predict_label.rename(columns={'index': 'ID'}, inplace=True)

In [None]:
predict_label.head()

In [None]:
predict_label.to_csv('sample_submission.csv', index=False)
true_label.to_csv('true_label.csv', index=False)