In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # for plotting beautiful graphs

# train test split from sklearn
from sklearn.model_selection import train_test_split

# Import Torch 
import torch
import torch.nn as nn
from torchvision import transforms, models
# from torch.utils.data import SubsetRandomSampler
from torch.autograd import Variable
from torch import nn, optim
import torch.nn.functional as F

# What's in the current directory?
import os

AttributeError: module 'torch' has no attribute '_utils_internal'

In [2]:
import pickle

In [3]:
!ls ../input

digit-recognizer


In [4]:
train = pd.read_csv("train.csv", dtype=np.float32)
final_test = pd.read_csv("test.csv", dtype=np.float32)
sample_sub = pd.read_csv("sample_submission.csv")
train.label.head()

0    1.0
1    0.0
2    1.0
3    4.0
4    0.0
Name: label, dtype: float32

In [5]:
# Seperate the features and labels
targets_np = train.label.values
features_np = train.loc[:, train.columns != 'label'].values/255

# Split into training and test set
features_train, features_test, target_train, target_test = train_test_split(features_np, targets_np, test_size=0.2, random_state=42)


In [6]:
# If a GPU is available, use it
# Pytorch uses an elegant way to keep the code device agnostic
if torch.cuda.is_available():
    device = torch.device("cuda")
    use_cuda = True
else:
    device = torch.device("cpu")
    use_cuda = False
    
print(device)

cpu


In [7]:
# create feature and targets tensor for train set. As you remember we need variable to accumulate gradients. Therefore first we create tensor, then we will create variable
featuresTrain = torch.from_numpy(features_train)
targetsTrain = torch.from_numpy(target_train).type(torch.LongTensor) # data type is long

# create feature and targets tensor for test set.
featuresTest = torch.from_numpy(features_test)
targetsTest = torch.from_numpy(target_test).type(torch.LongTensor) # data type is long

In [8]:
batch_size = 256

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)
test = torch.utils.data.TensorDataset(featuresTest,targetsTest)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)

In [9]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        # 5 Hidden Layer Network
        self.fc1 = nn.Linear(28*28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)
        
        # Dropout module with 0.2 probbability
        self.dropout = nn.Dropout(p=0.2)
        # Add softmax on output layer
        self.log_softmax = F.log_softmax
        
    def forward(self, x):
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))
        
        x = self.log_softmax(self.fc5(x), dim=1)
        
        return x

In [10]:
# Surrogate loss used for training
loss_fn = nn.CrossEntropyLoss()
test_loss_fn = nn.CrossEntropyLoss(reduction='sum')

# spot to save your learning curves, and potentially checkpoint your models
savedir = 'results'
if not os.path.exists(savedir):
    os.makedirs(savedir)

In [11]:
def train(model, train_loader, optimizer, epoch ):
    """Perform one epoch of training."""
    model.train()
    
    for batch_idx, (inputs, target) in enumerate(train_loader):
        inputs, target = inputs.to(device), target.to(device)
        
        # Let them code what's here
        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        ###
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(inputs), len(train_loader) *len(inputs) ,
                100. * batch_idx / len(train_loader), loss.item()))
            

In [12]:
def test(model, test_loader):
    """Evaluate the model by doing one pass over a dataset"""
    model.eval()
    
    test_loss = 0
    correct = 0
    test_size = 0
    
    with torch.no_grad():
        for inputs, target in test_loader:
            inputs, target = inputs.to(device), target.to(device)
            
            # TODO: code the evaluation loop
            output = model(inputs)
            test_size += len(inputs)
            test_loss += test_loss_fn(output, target).item() # sum up batch loss
            # output = batch size * n_classes
            pred = output.argmax(dim=1)
            correct += (pred == target).sum().item()
            # pred = output.max(1, keepdim=True)
            # pred = pred[1] # get the index of the max log-probability

            # correct += pred.eq(target.view_as(pred)).sum().item()
            # #

    test_loss /= test_size
    accuracy = correct / test_size
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, test_size,
        100. * accuracy))
    
    return test_loss, accuracy

In [13]:
model = Classifier().to(device)

#lr = 0.0005
optimizer = optim.Adam(model.parameters())

results = {'name':'basic', 'loss': [], 'accuracy':[]}
savefile = os.path.join(savedir, results['name']+'.pkl' )

for epoch in range(1, 200):
    train(model, train_loader, optimizer, epoch)
    loss, acc = test(model, test_loader)
    
    # save results every epoch
    results['loss'].append(loss)
    results['accuracy'].append(acc)
    with open(savefile, 'wb') as fout:
        pickle.dump(results, fout)


Test set: Average loss: 0.2489, Accuracy: 7746/8400 (92.21%)


Test set: Average loss: 0.1580, Accuracy: 7993/8400 (95.15%)


Test set: Average loss: 0.1317, Accuracy: 8064/8400 (96.00%)


Test set: Average loss: 0.1133, Accuracy: 8127/8400 (96.75%)


Test set: Average loss: 0.1103, Accuracy: 8141/8400 (96.92%)


Test set: Average loss: 0.1015, Accuracy: 8143/8400 (96.94%)


Test set: Average loss: 0.1109, Accuracy: 8138/8400 (96.88%)


Test set: Average loss: 0.0886, Accuracy: 8190/8400 (97.50%)


Test set: Average loss: 0.0998, Accuracy: 8180/8400 (97.38%)


Test set: Average loss: 0.0988, Accuracy: 8182/8400 (97.40%)


Test set: Average loss: 0.0931, Accuracy: 8189/8400 (97.49%)


Test set: Average loss: 0.0871, Accuracy: 8212/8400 (97.76%)


Test set: Average loss: 0.0881, Accuracy: 8198/8400 (97.60%)


Test set: Average loss: 0.1020, Accuracy: 8189/8400 (97.49%)


Test set: Average loss: 0.0968, Accuracy: 8202/8400 (97.64%)


Test set: Average loss: 0.1016, Accuracy: 8192/8400 (9

In [14]:
final_test_np = final_test.values/255
test_tn = torch.from_numpy(final_test_np)

In [15]:
fake_labels = np.zeros(final_test_np.shape)
fake_labels = torch.from_numpy(fake_labels)

In [16]:
fake_labels = np.zeros(final_test_np.shape)
fake_labels = torch.from_numpy(fake_labels)

In [17]:
submission_tn_data = torch.utils.data.TensorDataset(test_tn, fake_labels)

submission_loader = torch.utils.data.DataLoader(submission_tn_data, batch_size = 1, shuffle = False)

In [18]:
outputs = []
with torch.no_grad():
    for inputs, target in submission_loader:
        inputs, target = inputs.to(device), target.to(device)
            
        # TODO: code the evaluation loop
        output = model(inputs)
        pred = output.argmax(dim=1)
        outputs.append(pred.item())

In [19]:
len(outputs)

28000

In [20]:
sub =  open('mnist_sample_sub2.csv','w+')
sub.write('ImageId,Label\n')
for index, prediction in enumerate(outputs):
    sub.write(str(index+1) + ',' + str(prediction) + '\n')
sub.close()