In [1]:
from sklearn.datasets import fetch_openml
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import IterableDataset, DataLoader,TensorDataset
from torch import optim
from torch.autograd import Variable

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
mnist = fetch_openml('mnist_784')
data = np.array(mnist.data)
labels = np.array(mnist.target)

In [8]:
X_train = data[:60000]
y_train = labels[:60000]
train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float, device=device),\
                           torch.tensor([int(y) for y in y_train], dtype=torch.int64, device=device))

X_val = data[60000:61000]
y_val = labels[60000:61000]
val_data = TensorDataset(torch.tensor(X_val, dtype=torch.float, device=device), \
                          torch.tensor([int(y) for y in y_val], dtype=torch.int64, device=device))

X_test = data[61000:]
y_test = labels[61000:]
test_data = TensorDataset(torch.tensor(X_test, dtype=torch.float, device=device), \
                          torch.tensor([int(y) for y in y_test], dtype=torch.int64, device=device))

train_loader = DataLoader(train_data, num_workers=1, batch_size=32)
val_loader = DataLoader(val_data, num_workers=1, batch_size=32)
test_loader = DataLoader(test_data, num_workers=1, batch_size=32)

In [27]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2,),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        self.out = nn.Linear(32 * 7 * 7, 10)
        
    def forward(self, x):
        batch_size = x.size()[0]
        x = x.view(batch_size, 1, 28, 28)
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return output


In [40]:
def save_model(model, path='./latest_model.pt'):
    model_dict = model.state_dict()
    state_dict = {'model': model_dict}
    torch.save(state_dict, path)
    
def load_model(path='./latest_model.pt'):
    model = CNN()
    model.load_state_dict(torch.load(path)["model"])
    model.to(device)
    return model

In [48]:
model = CNN()
optimizer = optim.Adam(model.parameters(), lr = 0.0001)   
loss_func = nn.CrossEntropyLoss()

In [49]:
def train(num_epochs):
    
    model.train()
    patient = 0
    best_loss = 1<<30
    iter_loss = 0
    training_loss = 0
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        training_loss = 0
        for i, (images, labels) in enumerate(train_loader):
            output = model(images)             
            loss = loss_func(output, labels)
            optimizer.zero_grad()           
            loss.backward()    
            optimizer.step()   
            iter_loss += loss.item()
            training_loss += loss.item()
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, iter_loss/100.0))
                iter_loss = 0.0
        print("Training Loss is {:.4f}.".format(training_loss / total_step))
             
        val_loss, val_acc = evaluate(val_loader)
        if val_loss < best_loss:
            save_model(model, path='./latest_model.pt')
            best_loss = val_loss
            patient = 0
        else:
            patient += 1
            if patient >=5:
                break

In [50]:
def evaluate(data_loader):
    model.eval()
    correct = 0
    total = 0
    total_loss = 0
    with torch.no_grad():
        for images, labels in data_loader:
            output= model(images)
            loss = loss_func(output, labels)
            pred_y = torch.argmax(output, 1)
            correct += (pred_y == labels).sum().item()
            total += len(labels)
            total_loss += loss.item()
    accuracy = correct / total
    total_loss /= len(data_loader)
    print('Accuracy of the model on the evaluate images: %.2f' % accuracy)
    print('Loss of the model on the evaluate images: %.2f' % total_loss)
    
    model.train()
    return total_loss, accuracy


In [None]:
train(10)

Epoch [1/10], Step [100/1875], Loss: 3.4139
Epoch [1/10], Step [200/1875], Loss: 0.8555
Epoch [1/10], Step [300/1875], Loss: 0.7098
Epoch [1/10], Step [400/1875], Loss: 0.5216
Epoch [1/10], Step [500/1875], Loss: 0.5235
Epoch [1/10], Step [600/1875], Loss: 0.3527
Epoch [1/10], Step [700/1875], Loss: 0.2994
Epoch [1/10], Step [800/1875], Loss: 0.2588
Epoch [1/10], Step [900/1875], Loss: 0.2992
Epoch [1/10], Step [1000/1875], Loss: 0.2630
Epoch [1/10], Step [1100/1875], Loss: 0.2120
Epoch [1/10], Step [1200/1875], Loss: 0.2603
Epoch [1/10], Step [1300/1875], Loss: 0.2171
Epoch [1/10], Step [1400/1875], Loss: 0.2056
Epoch [1/10], Step [1500/1875], Loss: 0.2363
Epoch [1/10], Step [1600/1875], Loss: 0.1890
Epoch [1/10], Step [1700/1875], Loss: 0.1797
Epoch [1/10], Step [1800/1875], Loss: 0.1350
Training Loss is 0.4918.
Accuracy of the model on the evaluate images: 0.94
Loss of the model on the evaluate images: 0.72
Epoch [2/10], Step [100/1875], Loss: 0.2307
Epoch [2/10], Step [200/1875], L