In [1]:
import torch
import numpy as np 
import pandas as pd 

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is not available.  Training on CPU ...


In [2]:
data = pd.read_csv('train.csv')

labels = data['label'] 
features = data.drop('label', axis = 1)

In [3]:
features = torch.from_numpy(features.values.reshape(-1,1,28,28))
features = features.type(torch.float32)

labels = torch.from_numpy(labels.values)
labels = labels.type(torch.long)

In [4]:
from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import TensorDataset

train_data = TensorDataset(features, labels)

idx = list(range(len(train_data)))
mid = int(0.9*len(train_data))

# split and suffle indexes
train_idx = SubsetRandomSampler(idx[:mid])
test_idx = SubsetRandomSampler(idx[mid:])

# data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=20, sampler=train_idx)
test_loader = torch.utils.data.DataLoader(train_data, batch_size=20, sampler=test_idx)


In [5]:
print(len(train_loader)*20)
print(len(test_loader)*20)

37800
4200


## 1st Model

In [27]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class Net_1(nn.Module):
    def __init__(self):
        super(Net_1, self).__init__()        
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 256)
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        # flatten image input
        x = x.view(-1, 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)   
        x = F.log_softmax(self.fc3(x), dim=1)
        return x
model_1 = Net_1()
print(model_1)

Net_1(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.2)
)


## 2nd Model

In [28]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class Net_2(nn.Module):
    def __init__(self):
        super(Net_2, self).__init__()        
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        # flatten image input
        x = x.view(-1, 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)   
        x = F.log_softmax(self.fc2(x), dim=1)
        return x
model_2 = Net_2()
print(model_2)

Net_2(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.2)
)


## 3rd Model(Selector)

In [29]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class Net_3(nn.Module):
    def __init__(self):
        super(Net_3, self).__init__()        
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.linear(128,10)
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        # flatten image input
        x = x.view(-1, 256)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)           
        x = F.log_softmax(self.fc2(x), dim=1)
        return x
model_3 = Net_3()
print(model_3)

Net_3(
  (fc1): Linear(in_features=10, out_features=10, bias=True)
  (dropout): Dropout(p=0.2)
)


In [35]:
# loss function
criterion_1 = nn.MSELoss()
criterion_2 = nn.MSELoss()
criterion_3 = nn.CrossEntropyLoss()

# optimizer
optimizer_1 = torch.optim.Adagrad(model_1.parameters(), lr=0.1)
optimizer_2 = torch.optim.Adagrad(model_2.parameters(), lr=0.1)
optimizer_3 = torch.optim.Adagrad(model_3.parameters(), lr=0.1)

In [42]:
# number of epochs 
epochs = 20

model_1.train() 
model_2.train()
model_3.train()

for epoch in range(epochs):
    
    train_loss_1 = 0.0
    train_loss_2 = 0.0
    train_loss_3 = 0.0
    
    total = 0.0
    
    for data, target in train_loader:
        optimizer_1.zero_grad()
        optimizer_2.zero_grad()
        optimizer_3.zero_grad()
        
        output_1 = model_1(data)
        output_2 = model_2(data)
        
        #output = torch.cat((output_1, output_2), 1)
        output = (output_1+output_2)/2
        
        output_3 = model_3(output)       
        
        loss_3 = criterion_3(output_3, target)
        
        #back_to_MLP1_MLP2 = output-loss_3/model_3.state_dict()['fc1.weight'].mean(0)
        #back_to_MLP1 = back_to_MLP1_MLP2[:,:10]
        #back_to_MLP2 = back_to_MLP1_MLP2[:,10:] 
        
       
        #loss_2 = criterion_2(output_2, back_to_MLP2)
        #loss_1 = criterion_1(output_1, back_to_MLP1)
        
        loss_3.backward(retain_graph=True)
        #loss_2.backward(retain_graph=True)
        #loss_1.backward()
        
        optimizer_1.step()
        optimizer_2.step()
        optimizer_3.step()
        
        #train_loss_1 += loss_1.item()*data.size(0)
        #train_loss_2 += loss_2.item()*data.size(0)
        train_loss_3 += loss_3.item()*data.size(0)
        
        total += len(data) 
        
    # print training statistics 
    # calculate average loss over an epoch
    #train_loss_1 = train_loss_1/total
    #train_loss_2 = train_loss_2/total
    train_loss_3 = train_loss_3/total
    
    #print('Epoch: {} \tTraining Loss_1: {:.6f}'.format(epoch+1,train_loss_1)) 
    #print('          \tTraining Loss_2: {:.6f}'.format(train_loss_2))
    print('Epoch: {} \tTraining Loss_3: {:.6f}'.format(epoch+1,train_loss_3))

NameError: name 'loss_2' is not defined

In [41]:
model_3.state_dict()['fc1.weight'].mean(1)

tensor([-0.1186, -0.0740, -0.0740, -0.1310, -0.1347, -0.1122, -0.0912, -0.1203,
        -0.1531, -0.0996])

In [37]:
correct = 0.0
test_loss = 0.0
total= 0.0

model_1.eval()
model_2.eval()
model_3.eval()

for data, target in test_loader:
    output_1 = model_1(data)
    output_2 = model_2(data)
    #output = torch.cat((output_1, output_2), 1)
    output = (output_1+output_2)/2
    output_3 = model_3(output)        
    loss = criterion_3(output_3, target)  
    test_loss += loss.item()*data.size(0)
    _, pred = torch.max(output, 1)
    correct += np.squeeze(pred.eq(target.data.view_as(pred))).sum()
    total += len(data)
    
# calculate and print average test loss
test_loss = test_loss/total
print('Test Loss: {:.6f}'.format(test_loss)) 

# calculate and print accuracy
print('Test Accuracy: {:.2f}% ({}/{})' .format(100. * np.float(correct) / total, 
                                           correct, total))
        


Test Loss: 0.556282
Test Accuracy: 14.31% (601/4200.0)


In [38]:
correct = 0.0
test_loss = 0.0
total= 0.0

model.eval() # prep model for evaluation

for data, target in test_loader:
    output = model(data)
    loss = criterion(output, target)  
    test_loss += loss.item()*data.size(0)
    _, pred = torch.max(output, 1)
    correct += np.squeeze(pred.eq(target.data.view_as(pred))).sum()
    total += len(data)
    
# calculate and print average test loss
test_loss = test_loss/total
print('Test Loss: {:.6f}'.format(test_loss)) 

# calculate and print accuracy
print('Test Accuracy: {:.2f}% ({}/{})' .format(100. * np.float(correct) / total, 
                                           correct, total))
        


NameError: name 'model' is not defined

In [None]:
test_data = pd.read_csv('../input/test.csv')
test_data = torch.from_numpy(test_data.values.reshape(-1,1,28,28))
test_data = test_data.type(torch.float)

In [None]:
model.eval() 
result = []

for data in test_data:
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    _, pred = torch.max(output, 1)
    pred = pred.numpy()
    result += pred.tolist()

results = pd.Series(result,name="Label")
Imageld = pd.Series(range(1,len(result)+1),name="Imageld")

submission = pd.concat([Imageld,results],axis = 1)
submission.to_csv("submission.csv",index=False)

In [None]:
x = pd.read_csv('submission.csv')
print(x[:10])