In [1]:
import torch
import numpy as np 
import pandas as pd 

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is not available.  Training on CPU ...


In [2]:
data = pd.read_csv('train.csv')

labels = data['label'] 
features = data.drop('label', axis = 1)

In [3]:
features = torch.from_numpy(features.values.reshape(-1,1,28,28))
features = features.type(torch.float32)

labels = torch.from_numpy(labels.values)
labels = labels.type(torch.long)

In [4]:
from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import TensorDataset

train_data = TensorDataset(features, labels)

idx = list(range(len(train_data)))
mid = int(0.9*len(train_data))

# split and suffle indexes
train_idx = SubsetRandomSampler(idx[:mid])
test_idx = SubsetRandomSampler(idx[mid:])

# data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=20, sampler=train_idx)
test_loader = torch.utils.data.DataLoader(train_data, batch_size=20, sampler=test_idx)


In [5]:
print(len(train_loader)*20)
print(len(test_loader)*20)

37800
4200


## 1st Model

In [6]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class Net_1(nn.Module):
    def __init__(self):
        super(Net_1, self).__init__()        
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        # flatten image input
        x = x.view(-1, 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)   
        x = self.fc3(x)
        return x
model_1 = Net_1()

if train_on_gpu:
    model_1.cuda()
    
print(model_1)

Net_1(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=10, bias=True)
  (dropout): Dropout(p=0.2)
)


## 2nd Model

In [7]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class Net_2(nn.Module):
    def __init__(self):
        super(Net_2, self).__init__()        
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        # flatten image input
        x = x.view(-1, 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)   
        x = self.fc2(x)
        return x
model_2 = Net_2()

if train_on_gpu:
    model_2.cuda()
    
print(model_2)

Net_2(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
  (dropout): Dropout(p=0.2)
)


## 3rd Model(Selector)

In [8]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class Net_3(nn.Module):
    def __init__(self):
        super(Net_3, self).__init__()        
        self.fc1 = nn.Linear(20,10)
        self.fc2 = nn.Linear(10,10)
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        # flatten image input
        x = x.view(-1, 20)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)           
        x = F.log_softmax(self.fc2(x), dim=1)
        return x
model_3 = Net_3()

if train_on_gpu:
    model_3.cuda()
    
print(model_3)

Net_3(
  (fc1): Linear(in_features=20, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=10, bias=True)
  (dropout): Dropout(p=0.2)
)


In [9]:
# loss function
criterion_1 = nn.CrossEntropyLoss()
criterion_2 = nn.CrossEntropyLoss()
criterion_3 = nn.CrossEntropyLoss()

# optimizer
optimizer_1 = torch.optim.Adagrad(model_1.parameters(), lr=0.1)
optimizer_2 = torch.optim.Adagrad(model_2.parameters(), lr=0.1)
optimizer_3 = torch.optim.Adagrad(model_3.parameters(), lr=0.1)

In [11]:
# number of epochs 
epochs = 10

model_1.train() 
model_2.train()
model_3.train()

for epoch in range(epochs):
    
    train_loss_1 = 0.0
    train_loss_2 = 0.0
    train_loss_3 = 0.0
    
    total = 0.0
    
    for data, target in train_loader:
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        optimizer_1.zero_grad()
        optimizer_2.zero_grad()
        optimizer_3.zero_grad()
        
        output_1 = model_1(data)
        output_2 = model_2(data)
        
        output = torch.cat((output_1, output_2), 1)
        #output = (output_1+output_2)/2
        
        output_3 = model_3(output)       
        
        loss_3 = criterion_3(output_3, target)
        
        """
        back_to_MLP1_MLP2 = output-loss_3*model_3.state_dict()['fc1.weight'].mean(0)
        back_to_MLP1 = back_to_MLP1_MLP2[:,:10]
        back_to_MLP2 = back_to_MLP1_MLP2[:,10:] 
        """
       
        loss_2 = criterion_2(output_2, target)
        loss_1 = criterion_1(output_1, target)
        loss_3.backward(retain_graph=True)
        loss_2.backward(retain_graph=True)
        loss_1.backward()
        
        optimizer_1.step()
        optimizer_2.step()
        optimizer_3.step()
        train_loss_1 += loss_1.item()*data.size(0)
        train_loss_2 += loss_2.item()*data.size(0)
        train_loss_3 += loss_3.item()*data.size(0)
        
        total += len(data) 
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss_1 = train_loss_1/total    
    train_loss_2 = train_loss_2/total
    train_loss_3 = train_loss_3/total
    
    print('Epoch: {} \tTraining Loss_1: {:.6f} \tTraining Loss_2: {:.6f} \tTraining Loss_3: {:.6f}'\
          .format(epoch+1,train_loss_1, train_loss_2, train_loss_3)) 
   

Epoch: 1 	Training Loss_1: 1.728619 	Training Loss_2: 0.790370 	Training Loss_3: 0.858299
Epoch: 2 	Training Loss_1: 1.643722 	Training Loss_2: 0.779511 	Training Loss_3: 0.840399
Epoch: 3 	Training Loss_1: 1.538113 	Training Loss_2: 0.769190 	Training Loss_3: 0.815331
Epoch: 4 	Training Loss_1: 1.473631 	Training Loss_2: 0.753189 	Training Loss_3: 0.796022
Epoch: 5 	Training Loss_1: 1.397166 	Training Loss_2: 0.754880 	Training Loss_3: 0.792981
Epoch: 6 	Training Loss_1: 1.350845 	Training Loss_2: 0.740636 	Training Loss_3: 0.764003
Epoch: 7 	Training Loss_1: 1.281630 	Training Loss_2: 0.718438 	Training Loss_3: 0.744955
Epoch: 8 	Training Loss_1: 1.264511 	Training Loss_2: 0.716505 	Training Loss_3: 0.740776
Epoch: 9 	Training Loss_1: 1.243565 	Training Loss_2: 0.708146 	Training Loss_3: 0.721750
Epoch: 10 	Training Loss_1: 1.218782 	Training Loss_2: 0.703261 	Training Loss_3: 0.705842


In [14]:
correct_1 = 0.0
correct_2 = 0.0
correct_3 = 0.0
test_loss_1 = 0.0
test_loss_2 = 0.0
test_loss_3 = 0.0
total= 0.0

model_1.eval()
model_2.eval()
model_3.eval()

for data, target in test_loader:    
    if train_on_gpu:
        data, target = data.cuda(), target.cuda()
    output_1 = model_1(data)
    output_2 = model_2(data)
    output = torch.cat((output_1, output_2), 1)
    #output = (output_1+output_2)/2
    output_3 = model_3(output)        
    loss_3 = criterion_3(output_3, target)
    loss_2 = criterion_2(output_2, target)
    loss_1 = criterion_1(output_1, target)
    test_loss_1 += loss_1.item()*data.size(0)
    test_loss_2 += loss_2.item()*data.size(0)
    test_loss_3 += loss_3.item()*data.size(0)
    _, pred_1 = torch.max(output_1, 1)
    _, pred_2 = torch.max(output_2, 1)
    _, pred_3 = torch.max(output_3, 1)
    
    correct_1 += np.squeeze(pred_1.eq(target.data.view_as(pred_1))).sum()
    correct_2 += np.squeeze(pred_2.eq(target.data.view_as(pred_2))).sum()
    correct_3 += np.squeeze(pred_3.eq(target.data.view_as(pred_3))).sum()
    total += len(data)
    
# calculate and print average test loss
test_loss_1 = test_loss_1/total
test_loss_2 = test_loss_3/total
test_loss_3 = test_loss_3/total
print('Test Loss_1: {:.6f}\tTest Loss_2: {:.6f}\tTest Loss_3: {:.6f}'.format(test_loss_1,test_loss_2,test_loss_3)) 

# calculate and print accuracy
print('Test Accuracy_1: {:.2f}%\tTest Accuracy_2: {:.2f}%\tTest Accuracy_3: {:.2f}%\t ({}/{})' \
      .format(100. * np.float(correct_1) / total, 100. * np.float(correct_2) / total, 100. * np.float(correct_3) / total, correct_3, total))
        


Test Loss_1: 1.169330	Test Loss_2: 0.548858	Test Loss_3: 0.548858
Test Accuracy_1: 57.76%	Test Accuracy_2: 82.90%	Test Accuracy_3: 83.76%	 (3518/4200.0)


In [18]:
test_data = pd.read_csv('test.csv')
test_data = torch.from_numpy(test_data.values.reshape(-1,1,28,28))
test_data = test_data.type(torch.float)

In [19]:
model_1.eval()
model_2.eval()
model_3.eval()

result = []

for data in test_data:    
    if train_on_gpu:
        data = data.cuda()
    output_1 = model_1(data)
    output_2 = model_2(data)
    output = torch.cat((output_1, output_2), 1)
    #output = (output_1+output_2)/2
    output_3 = model_3(output)        
    _, pred = torch.max(output_3, 1)
    pred = pred.numpy()
    result += pred.tolist()

results = pd.Series(result,name="Label")
Imageld = pd.Series(range(1,len(result)+1),name="Imageld")

submission = pd.concat([Imageld,results],axis = 1)
submission.to_csv("submission.csv",index=False)

In [20]:
x = pd.read_csv('submission.csv')
print(x[:10])

   Imageld  Label
0        1      2
1        2      0
2        3      7
3        4      4
4        5      8
5        6      7
6        7      0
7        8      3
8        9      0
9       10      3
