<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#From-np.ndarray-to-torch.Tensor" data-toc-modified-id="From-np.ndarray-to-torch.Tensor-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>From <code>np.ndarray</code> to <code>torch.Tensor</code></a></span></li><li><span><a href="#Training-a-model-manually-defining-batches" data-toc-modified-id="Training-a-model-manually-defining-batches-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Training a model manually defining batches</a></span></li><li><span><a href="#Defining-TensorDatset-and-DataLoader" data-toc-modified-id="Defining-TensorDatset-and-DataLoader-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Defining <code>TensorDatset</code> and <code>DataLoader</code></a></span></li><li><span><a href="#Training-with-a-DataLoader-instance" data-toc-modified-id="Training-with-a-DataLoader-instance-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Training with a <code>DataLoader</code> instance</a></span></li></ul></div>

In [1]:
import numpy as np 
import pandas as pd 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable

from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv('../input/train.csv')
print(df.shape)

(42000, 785)


In [3]:
y = df['label'].values
X = df.drop(['label'],1).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

In [4]:
print(X_train.shape)
print(y_test.shape)

(35700, 784)
(6300,)


## From `np.ndarray` to `torch.Tensor`

One of the most basic methods that we need to benefit from pytorch is the capability to translate `np.ndarray` objects to `torch.Tensor` objects

- **`torch.from_numpy(X)`** creates a `torch.Tensor` from a `np.darray` object `X`


- **`torch.from_numpy(X).dtype(torch.LongTensor)`** creates a `torch.Tensor` from a `np.darray` object `X` and casts this object as `torch.LongTensor`.

In [5]:
BATCH_SIZE = 32

torch_X_train = torch.from_numpy(X_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long

# create feature and targets tensor for test set.
torch_X_test = torch.from_numpy(X_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor)   # data type is long

In [6]:
torch_X_train.dtype, torch_y_train.dtype

(torch.int64, torch.int64)

## Training a model manually defining batches


In [7]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(784,250)
        self.linear2 = nn.Linear(250,100)
        self.linear3 = nn.Linear(100,10)
    
    def forward(self,X):
        X = F.relu(self.linear1(X))
        X = F.relu(self.linear2(X))
        X = self.linear3(X)
        return F.log_softmax(X, dim=1)

mlp = MLP()
print(mlp)

MLP(
  (linear1): Linear(in_features=784, out_features=250, bias=True)
  (linear2): Linear(in_features=250, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=10, bias=True)
)



To train a model in a given minibatch we need:
    
- The model for the minibatch, store the results in `y_hat`.


- A loss function, in this case we will use `nn.CrossEntropyLoss()`


- The error between `y_hat_batch` and `y_batch`.
     - Note that the error is computed using `var_y_batch` which is a simple cast
       from numpy array to `torch.Tensor`.

In [8]:
def fit(model, X_train, y_train, batch_size):
    optimizer = torch.optim.Adam(model.parameters())#,lr=0.001, betas=(0.9,0.999))
    error = nn.CrossEntropyLoss()
    EPOCHS = 2
    model.train()
    n_examples, n_features = X_train.shape
    n_batches_per_epoch = int(np.ceil(n_examples/batch_size))

    for epoch in range(EPOCHS):
        correct = 0
        for batch_idx in range(n_batches_per_epoch):
            start_pos = batch_idx * batch_size
            end_pos = start_pos + batch_size
            X_batch = X_train[start_pos:end_pos]
            y_batch = y_train[start_pos:end_pos]
            
            # X_batch.shape -> torch.Size([32, 784])
            var_X_batch = Variable(X_batch).float() 
            # y_batch.shape -> torch.Size([32]) 
            var_y_batch = Variable(y_batch)

            optimizer.zero_grad()
            y_hat = model(var_X_batch)
            loss = error(y_hat, var_y_batch)
            loss.backward()
            optimizer.step()

            # Total correct predictions
            predicted = torch.max(y_hat.data, 1)[1] 
            correct += (predicted == var_y_batch).sum()

            #print(correct)
            if batch_idx % 50 == 0:
                print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, batch_idx*len(X_batch), n_examples, 
                    100.*batch_idx /n_examples, loss.data.item(),
                    float(correct*100) / float(BATCH_SIZE*(batch_idx+1))))
                

In [9]:
BATCH_SIZE = 32
fit(mlp, torch_X_train, torch_y_train, BATCH_SIZE)



## Defining `TensorDatset` and `DataLoader`

Once we have our `torch.Tensor` objects from numpy arrays we can create `TensorDataset` objects


- **`torch.utils.data.TensorDataset(Xtensor,ytensor)`**

This type of `TensorDataset` objects can be used for 

- Wrapping datasets from tensors.
- Each sample will be retrieved by indexing tensors along the first dimension.



In [10]:
# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

In [11]:
type(train)

torch.utils.data.dataset.TensorDataset

In [12]:
train.tensors

(tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]]), tensor([5, 9, 5,  ..., 9, 7, 6]))

Moreover, from a `TensorDataset` we can create our own loader with:
    
- **`torch.utils.data.DataLoader(Xtensor, batch_size, shuffle)`**


In [13]:
# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

Note that the train_loader is an iterable with length equal to the number of examples
divided by the batch size.

In [14]:
np.ceil(len(X_train)/BATCH_SIZE)

1116.0

In [15]:
len(train_loader)

1116

## Training with a `DataLoader` instance

Previously we have created an object named `train_loader` that is a `DataLoader` instance.

This object can be used to iterate over batches as follows:

```python
for X_batch, y_batch in train_loader:

    var_X_batch = Variable(X_batch).float() 
    var_y_batch = Variable(y_batch)
```

Therefore, it facilitates the generation of minibatches during learning.

In [16]:
type(train_loader), len(train_loader)

(torch.utils.data.dataloader.DataLoader, 1116)

In [17]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(784,250)
        self.linear2 = nn.Linear(250,100)
        self.linear3 = nn.Linear(100,10)
    
    def forward(self,X):
        X = F.relu(self.linear1(X))
        X = F.relu(self.linear2(X))
        X = self.linear3(X)
        return F.log_softmax(X, dim=1)

mlp = MLP()
print(mlp)

MLP(
  (linear1): Linear(in_features=784, out_features=250, bias=True)
  (linear2): Linear(in_features=250, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=10, bias=True)
)


In [18]:
def fit(model, train_loader):
    optimizer = torch.optim.Adam(model.parameters())#,lr=0.001, betas=(0.9,0.999))
    error = nn.CrossEntropyLoss()
    EPOCHS = 2
    model.train()
    for epoch in range(EPOCHS):
        correct = 0
        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):

            # X_batch.shape -> torch.Size([32, 784])
            var_X_batch = Variable(X_batch).float() 
            # y_batch.shape -> torch.Size([32]) 
            var_y_batch = Variable(y_batch)
            
            
            optimizer.zero_grad()
            y_hat = model(var_X_batch)
            loss  = error(y_hat, var_y_batch)
            loss.backward()
            optimizer.step()

            # Total correct predictions
            predicted = torch.max(y_hat.data, 1)[1] 
            correct += (predicted == var_y_batch).sum()
            
            #print(correct)
            if batch_idx % 50 == 0:
                print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, batch_idx*len(X_batch), len(train_loader.dataset), 
                    100.*batch_idx / len(train_loader), loss.data.item(), 
                    float(correct*100) / float(BATCH_SIZE*(batch_idx+1))))
                

In [19]:
fit(mlp, train_loader)



In [20]:
def evaluate(model):
#model = mlp
    correct = 0 
    for test_imgs, test_labels in test_loader:
        #print(test_imgs.shape)
        test_imgs = Variable(test_imgs).float()
        output = model(test_imgs)
        predicted = torch.max(output,1)[1]
        correct += (predicted == test_labels).sum()
    print("Test accuracy:{:.3f}% ".format( float(correct) / (len(test_loader)*BATCH_SIZE)))

In [21]:
evaluate(mlp)

Test accuracy:0.945% 
