In [1]:
import numpy as np
import cv2
import os

import random
import torch
from torch.autograd import Variable
output_path = "mnist_train_images/"
train_path = "mnist_train.csv"
test_path = "mnist_test.csv"

In [278]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [325]:
def convert_dataset(train_path):
    dataset = np.genfromtxt(train_path, delimiter=',', skip_header=1)
    labels = dataset[:, 0].astype(np.uint8)
    values = dataset[:, 1:].astype(np.uint8)
    images = np.reshape(values, (-1, 28, 28))
    return images, labels

In [326]:
# dataset = np.genfromtxt("mnist_train.csv", delimiter=',', skip_header=1)
# labels = dataset[:, 0].astype(np.uint8)
# values = dataset[:,1:].astype(np.uint8)
# images = np.reshape(values, (-1,28,28))
# images.shape

images, labels = convert_dataset(train_path)

In [327]:
test_images, test_labels = convert_dataset(test_path)

In [328]:
learning_rate = 0.01
batch_size = 200
epochs = 10
log_interval = 10

In [313]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [314]:
class Net(nn.Module):
    def __init__(self, num_conv1, num_conv2, fc_output):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,num_conv1,kernel_size=5, stride=1)
        self.conv2 = nn.Conv2d(num_conv1, num_conv2,kernel_size=5,stride=1, padding=2)
        self.maxpool = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(6*6*num_conv2,fc_output)
        self.fc2 = nn.Linear(fc_output, 10)
        # self.fc1 = nn.Linear(28*28, 200)
        # self.fc2 = nn.Linear(200, 200)
        # self.fc3 = nn.Linear(200, 10)
    def forward(self, x):
        out = self.conv1(x)
        out = F.relu(out)
        out = self.maxpool(out)
        out = self.conv2(out)
        out = F.relu(out)
        out = self.maxpool(out)
        out = out.reshape(x.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        # x = F.relu(self.fc1(x))
        # x = F.relu(self.fc2(x))
        # x = self.fc3(x)
        return out

In [315]:
net = Net(num_conv1=32,num_conv2=64,fc_output=1000)
net.to(device)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2304, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=10, bias=True)
)

In [329]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [330]:
def rotate(image, label):
        degree = random.randint(1,180)
        if (label == 6 or label == 9) and degree == 90:
            while degree != 90:
                degree = random.randint(1, 180)
        rot_img = np.uint8(np.zeros(image.shape))
        height, width = rot_img.shape
        midx,midy = (width//2, height//2)
        for i in range(rot_img.shape[0]):
            for j in range(rot_img.shape[1]):
                x= (i-midx)*np.cos(degree)+(j-midy)*np.sin(degree)
                y= -(i-midx)*np.sin(degree)+(j-midy)*np.cos(degree)
                x=round(x)+midx
                y=round(y)+midy
                if (x>=0 and y>=0 and x<image.shape[0] and  y<image.shape[1]):
                    rot_img[i,j] = image[x,y]
        return rot_img

In [331]:
def noise(im):
    noize = np.zeros(im.shape,np.uint8)
    cv2.randn(noize,0,50)
    n_im = cv2.add(im,noize)
    return n_im

In [332]:
def normalize(im):
    imin = float(im.min())
    imax = float(im.max())
    return (im - imin)/(imax - imin)

In [333]:
def conf(pred, target):
    confusion_vector = pred/target
    tp = torch.sum(confusion_vector==1).item()
    fp = torch.sum(confusion_vector == float('inf')).item()
    tn = torch.sum(torch.isnan(confusion_vector)).item()
    fn = torch.sum(confusion_vector == 0).item()
    return tp,fp,tn,fn

In [334]:
def saveModel(x):
    path = './' + x + '.pth'
    torch.save(net.state_dict(),path)

In [335]:
average_loss = 0.0
for epoch in range(epochs - 2):
    for i in range(len(images) // batch_size):
        batch = batch_size * i
        i_batch = images[batch:batch + batch_size]
        l_batch = labels[batch:batch + batch_size]
        for j in range(len(i_batch)):
            i_batch[j] = noise(i_batch[j])
            # i_batch[j] = rotate(i_batch[j], l_batch[j])
        data = torch.from_numpy(normalize(i_batch)).float()
        target = torch.from_numpy(l_batch)
        data = data.unsqueeze(1)
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        net_out = net(data)
        loss = criterion(net_out, target)
        loss.backward()
        optimizer.step()
    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, i * len(data), len(images),
               100. * batch / len(images), loss.data))
    average_loss += loss.data
average_loss = average_loss / epochs
print(average_loss)
saveModel(str(float(average_loss)))

tensor(0.1856)


In [336]:
norm_test_values = normalize(test_images)

In [337]:
test_loss = 0
correct = 0
tp= fp = tn = fn = 0
for i in range(len(norm_test_values)//batch_size):
    batch = batch_size * i
    im_batch = torch.from_numpy(norm_test_values[batch:batch + batch_size]).float()
    lb_batch = torch.from_numpy(test_labels[batch:batch + batch_size])
    data, target = Variable(im_batch, volatile=True), Variable(lb_batch)
    data = data.unsqueeze(1)
    net_out = net(data)
    test_loss += criterion(net_out, target).data
    pred = net_out.data.max(1)[1]
    correct += pred.eq(target.data).sum()
    tp1,fp1,tn1,fn1 = conf(pred,target.data)
    tp += tp1
    fp += fp1
    tn += tn1
    fn += fn1
test_loss /= len(test_images)
precision = tp/(tp + fp)
recall = tp/(tp + fn)
f1 = 2*precision * recall/(precision + recall)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_images),
    100. * correct / len(test_images)))
print("Precision %.4f "%precision,'Recall %.4f '%recall, "F1 %.4f "%f1 )

  data, target = Variable(im_batch, volatile=True), Variable(lb_batch)



Test set: Average loss: 0.0023, Accuracy: 8860/10000 (89%)

Precision 0.9804  Recall 0.9978  F1 0.9890 
