In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2

data = np.load('../input/train-1.npy')
data = np.vstack((data, np.load('../input/train-2.npy')))
data = np.vstack((data, np.load('../input/train-3.npy')))
data = np.vstack((data, np.load('../input/train-4.npy'))) 

In [None]:
width, height = 8, 8

fig = plt.figure(figsize=(16, 20))
for n, (image, tag) in enumerate(data, 1):
    if n > width * height:
        break
    plt.subplot(height, width, n)
    plt.title(tag)
    plt.imshow(image, cmap='gray')   
plt.show()   

In [None]:
import torch
from torch import utils
from torchvision import datasets, transforms
import matplotlib
import matplotlib.pyplot as plt
from torch.autograd import Variable
import sys
sys.path.append('../')
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn import datasets

%matplotlib inline
        
def plot_graphs(log, epochs, title, tpe='loss'):
    keys = log.keys()
    logs = {k:[z for z in zip(*log[k])] for k in log.keys()}
    epochs = {k:epochs for k in log.keys()}
    #print(epochs)
    if tpe == 'loss':
        handlers, = zip(*[plt.plot(epochs[k], logs[k][0], label=k) for k in log.keys()])
        plt.title(title + ', errors')
        plt.xlabel('epoch')
        plt.ylabel('error')
        plt.legend(handles=handlers)
        plt.show()
    elif tpe == 'accuracy':
        handlers, = zip(*[plt.plot(epochs[k], logs[k][1], label=k) for k in log.keys()])
        plt.title(title + ', accuracy')
        plt.xlabel('epoch')
        plt.ylabel('accuracy')
        plt.legend(handles=handlers)
        plt.show() 

In [None]:
data_s = data.shape[0]
test_s = data_s // 5
train_s = data_s - test_s
x_train, x_test, y_train, y_test = train_test_split(data[:, 0], data[:, 1], test_size=test_s)  
print(x_train[1].shape, y_train.shape, x_test.shape, y_test.shape)

In [None]:
y_train = y_train.astype(np.long)
y_test = y_test.astype(np.long)
lables = np.unique(np.concatenate([y_test, y_train]))
lables.sort()
uni_to_class = {}
class_to_uni = {}
for i in range(1000):
    uni_to_class[lables[i]] = i
    class_to_uni[i] = lables[i]
target_train = torch.zeros(train_s, dtype=torch.long)
target_test = torch.zeros(test_s, dtype=torch.long)
for i in range(train_s):
    target_train[i] = uni_to_class[y_train[i]]
for i in range(test_s):
    target_test[i] = uni_to_class[y_test[i]]    

In [None]:
new_size = 48

def resize(img):
    h, w  = img.shape
    diff = abs(h - w)
    vframe = np.ones((h, diff // 2)) * 255
    hframe = np.ones((diff // 2, w)) * 255
    if h > w:
        img = np.hstack((vframe, img))
        img = np.hstack((img, vframe))
    else:
        img = np.vstack((hframe, img))
        img = np.vstack((img, hframe))
    img = cv2.resize(img, (new_size, new_size))
    img = torch.from_numpy(img.astype(np.float32))
    return img    


def regularize(x):
    mean = torch.mean(x, 0)
    std = torch.std(x)
    x -= mean
    x /= std
    return mean, std
    
#img = resize(img)
#print(img)
#print(img.shape)
print(x_test[0], x_test[0].shape)

In [None]:
for i in range(train_s):
    x_train[i] = resize(x_train[i])
for i in range(test_s):
    x_test[i] = resize(x_test[i])

In [None]:
class CNN2(torch.nn.Module):    
    def __init__(self):
        super(CNN2, self).__init__()
        # batch_size x 1 x 48 x 48 
        self.conv1 = torch.nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2)
        # batch_size x 16 x 48 x 48
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        #batch_size x 16 x 24 x 24
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = torch.nn.Conv2d(32, 48, kernel_size=3, stride=1, padding=1)
        self.conv4 = torch.nn.Conv2d(48, 64, kernel_size=3, stride=1, padding=1)
        # batch_size x 64 x 24 x 24
        #pool
        #batch_size x 64 x 12 x 12
        self.fc1 = torch.nn.Linear(64 * 12 * 12, 2000)
        self.fc2 = torch.nn.Linear(2000, 1000)
        
    def forward(self, x):
        x = x.view(-1, 1, 48, 48)
        x = F.leaky_relu(self.conv1(x))
        x = self.pool(x)
        x = F.leaky_relu(self.conv2(x))
        x = F.leaky_relu(self.conv3(x))
        x = F.leaky_relu(self.conv4(x))
        x = self.pool(x)
        x = x.view(-1, 64 * 12 * 12)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return(x)

In [None]:
class CNN3(torch.nn.Module):    
    def __init__(self):
        super(CNN3, self).__init__()
        # batch_size x 1 x 48 x 48 
        self.conv1 = torch.nn.Conv2d(1, 16, kernel_size=7, stride=1, padding=3)
        # batch_size x 16 x 48 x 48
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        #batch_size x 16 x 24 x 24
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2)
        #pool
        # batch_size x 32 x 12 x 12
        self.conv3 = torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        # batch_size x 64 x 12 x 12
        #pool
        #batch_size x 64 x 6 x 6
        self.fc1 = torch.nn.Linear(64 * 6 * 6, 2000)
        self.fc2 = torch.nn.Linear(2000, 1000)
        
    def forward(self, x):
        x = x.view(-1, 1, 48, 48)
        x = F.leaky_relu(self.conv1(x))
        x = self.pool(x)
        x = F.leaky_relu(self.conv2(x))
        x = self.pool(x)
        x = F.leaky_relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(-1, 64 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return(x)

In [None]:
class CNN4(torch.nn.Module):    
    def __init__(self):
        super(CNN4, self).__init__()
        # batch_size x 1 x 48 x 48 
        self.conv1 = torch.nn.Conv2d(1, 16, kernel_size=7, stride=1, padding=3)
        # batch_size x 16 x 48 x 48
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        #batch_size x 16 x 24 x 24
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2)
        #pool
        # batch_size x 32 x 12 x 12
        self.conv3 = torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv4 = torch.nn.Conv2d(64, 96, kernel_size=3, stride=1, padding=1)
        self.conv5 = torch.nn.Conv2d(96, 128, kernel_size=3, stride=1, padding=1)
        # batch_size x 128 x 12 x 12
        #pool
        #batch_size x 128 x 6 x 6
        self.fc1 = torch.nn.Linear(128 * 6 * 6, 2000)
        self.fc2 = torch.nn.Linear(2000, 1000)
        
    def forward(self, x):
        x = x.view(-1, 1, 48, 48)
        x = F.leaky_relu(self.conv1(x))
        x = self.pool(x)
        x = F.leaky_relu(self.conv2(x))
        x = self.pool(x)
        x = F.leaky_relu(self.conv3(x))
        x = F.leaky_relu(self.conv4(x))
        x = F.leaky_relu(self.conv5(x))
        x = self.pool(x)
        x = x.view(-1, 128 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return(x)

In [None]:
class CNN5(torch.nn.Module):    
    def __init__(self):
        super(CNN5, self).__init__()
        # batch_size x 1 x 48 x 48 
        self.conv1 = torch.nn.Conv2d(1, 16, kernel_size=7, stride=1, padding=3)
        # batch_size x 16 x 48 x 48
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        #batch_size x 16 x 24 x 24
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2)
        #pool
        # batch_size x 32 x 12 x 12
        self.conv3 = torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv4 = torch.nn.Conv2d(64, 96, kernel_size=3, stride=1, padding=1)
        self.conv5 = torch.nn.Conv2d(96, 128, kernel_size=3, stride=1, padding=1)
        # batch_size x 128 x 12 x 12
        self.fc1 = torch.nn.Linear(128 * 12 * 12, 2000)
        self.fc2 = torch.nn.Linear(2000, 1000)
        
    def forward(self, x):
        x = x.view(-1, 1, 48, 48)
        x = F.leaky_relu(self.conv1(x))
        x = self.pool(x)
        x = F.leaky_relu(self.conv2(x))
        x = self.pool(x)
        x = F.leaky_relu(self.conv3(x))
        x = F.leaky_relu(self.conv4(x))
        x = F.leaky_relu(self.conv5(x))
        x = x.view(-1, 128 * 12 * 12)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return(x)

In [None]:
x_train = torch.stack(x_train.tolist())
x_test = torch.stack(x_test.tolist())

In [None]:
mean, std = regularize(x_train)
x_test -= mean
x_test /= std
print(x_train.size())

In [None]:
l1_lmbd = 0.15
l2_lmbd = 0.25
def li_loss(layer, i):
    return torch.norm(layer.weight.data, p=i) 

In [None]:
batch_s = 64
def train(model, epoch):
    loss = 0
    idx = 0
    for i in range(train_s // batch_s):
        data, target = x_train[idx : idx + batch_s], target_train[idx : idx + batch_s]
        idx += batch_s    
        optimizer.zero_grad()
        data, target = data.cuda().float(), target.cuda().long()
        output = model(data)
        loss = F.cross_entropy(output, target) + l1_lmbd * li_loss(model.fc1, 1) \
        +  l1_lmbd * li_loss(model.fc2, 1) +  l2_lmbd * li_loss(model.fc1, 2) +  l2_lmbd * li_loss(model.fc2, 2)
        #loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if i % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, idx, len(x_train),
                100. * idx / len(x_train), loss.data.item()))

        
def test(model, log=None):
    with torch.no_grad():
        test_loss = 0
        test_correct = 0
        idx = 0
        for i in range(test_s // batch_s):
            data, target = x_test[idx : idx + batch_s], target_test[idx : idx + batch_s]
            idx += batch_s
            data, target = Variable(data), Variable(target)
            data, target = data.cuda().float(), target.cuda().long()
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').data.item() 
            pred = output.data.max(1, keepdim=True)[1]
            test_correct += pred.eq(target.data.view_as(pred)).sum()
            test_loss /= train_s
            test_correct_percent = 100. * test_correct / test_s

        train_loss = 0
        train_correct = 0
        idx = 0
        for i in range(train_s // batch_s):
            data, target = x_train[idx : idx + batch_s], target_train[idx : idx + batch_s]
            data, target = data.cuda().float(), target.cuda().long()
            idx += batch_s
            data, target = Variable(data), Variable(target)
            output = model(data)
            train_loss += F.cross_entropy(output, target, reduction='sum').data.item()
            pred = output.data.max(1, keepdim=True)[1]
            train_correct += pred.eq(target.data.view_as(pred)).sum()
        train_loss /= train_s
        train_correct_percent = 100. * train_correct / train_s
    
    print('\nTest set:  Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, test_correct, test_s, test_correct_percent))
    print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        train_loss, train_correct, train_s, train_correct_percent))
    
    log['test'].append((test_loss, test_correct_percent))
    log['train'].append((train_loss, train_correct_percent))  

In [None]:
def weights_init(model): 
    if type(model) == nn.Linear:
        torch.nn.init.xavier_normal_(model.weight, gain=1.2)
        model.bias.data.fill_(0.01)

        
def train_n_test(model, title, epoc_num, train=train):
    #model.apply(weights_init)
    err_log = {'test': [], 'train': []}
    epochs = range(1, epoc_num)
    for epoch in epochs:
        train(model, epoch)
        print("epochs: ", epoch)
        test(model, err_log)
    plot_graphs(err_log, epochs, title, 'loss')
    plot_graphs(err_log, epochs, title, 'accuracy')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model4 = CNN4().to(device)
optimizer = optim.Adam(model4.parameters(), lr=0.0005)
train_n_test(model4, "Adam", 12)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model5 = CNN5().to(device)
optimizer = optim.Adam(model5.parameters(), lr=0.0005)
train_n_test(model5, "Adam", 12)

In [None]:
test_data = np.load('../input/test.npy')
test_data_s = len(test_data)
print(test_data[0].shape)
for i in range(test_data_s):
    test_data[i] = resize(test_data[i])
test_data = torch.stack(test_data.tolist())
test_data -= mean
test_data /=std
print(test_data.shape)

In [None]:
from IPython.display import HTML
import pandas as pd
import numpy as np
import base64

# function that takes in a dataframe and creates a text link to  
# download it (will only work for files < 2MB or so)

def create_download_link(df, title = "Download CSV file", filename = "data.csv"):  
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

In [None]:
import csv

def out(model, num):
    with open('out' + num + '.csv', 'w') as out_file:
        fieldnames = ['Id', 'Category']
        wr = csv.DictWriter(out_file, fieldnames=fieldnames)
        wr.writeheader()
        with torch.no_grad():
            idx = 0
            for i in range(test_data_s // batch_s):
                data_batch = test_data[idx : idx + batch_s]
                data_batch = Variable(data_batch)
                data_batch = data_batch.cuda()
                output = model(data_batch)
                pred = output.data.max(1, keepdim=True)[1]
                for i in range(len(pred)):
                    wr.writerow({'Id' : i + idx + 1 , 'Category' : class_to_uni[pred[i].item()]})
                idx += batch_s
            data_batch = test_data[idx : ]
            data_batch = Variable(data_batch)
            data_batch = data_batch.cuda()
            output = model(data_batch)
            pred = output.data.max(1, keepdim=True)[1]
            for i in range(len(pred)):
                wr.writerow({'Id' : i + idx + 1 , 'Category' : class_to_uni[pred[i].item()]})
    #create_download_link(out_file)            

In [None]:
out(model4, '4')

In [None]:
out(model5, '5')