In [49]:
import hangar
from hangar import Repository
from hangar import make_torch_dataset

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch import optim

In [5]:
hangar.__version__

'0.5.1'

In [7]:
repo = Repository('./')

In [8]:
repo.summary()

Summary of Contents Contained in Data Repository 
 
| Repository Info 
|----------------- 
|  Base Directory: /home/jjmachan/jjmachan/hangar_tutorial 
|  Disk Usage: 105.88 MB 
 
| Commit Details 
------------------- 
|  Commit: a=cdef6d59df587623e4767d9fac20b7670f97358d 
|  Created: Fri May  1 10:52:38 2020 
|  By: jjmachan 
|  Email: jjmachan@g.com 
|  Message: added all the mnist datasets 
 
| DataSets 
|----------------- 
|  Number of Named Columns: 6 
|
|  * Column Name: ColumnSchemaKey(column="mnist_test_images", layout="flat") 
|    Num Data Pieces: 10000 
|    Details: 
|    - column_layout: flat 
|    - column_type: ndarray 
|    - schema_hasher_tcode: 1 
|    - data_hasher_tcode: 0 
|    - schema_type: fixed_shape 
|    - shape: (784,) 
|    - dtype: float32 
|    - backend: 00 
|    - backend_options: {'complib': 'blosc:lz4hc', 'complevel': 5, 'shuffle': 'byte'} 
|
|  * Column Name: ColumnSchemaKey(column="mnist_test_labels", layout="flat") 
|    Num Data Pieces: 10000 
|   

In [28]:
co = repo.checkout()
co

sample_img = co['mnist_test_images'][0]
sample_label = co['mnist_test_labels'][0]


 Neither BRANCH or COMMIT specified.
 * Checking out writing HEAD BRANCH: master


## Create Dataloaders

Hangar provides two Dataloaders to import the data stored in the Hangar repositories directly for training in Tensorflow (*make_tf_dataset*) or PyTorch(*make_torch_dataset*). Both these take a list of columns and return a dataset with each index values in the columns.

In [48]:
# Create the train, test and val datasets using
# th make_torch dataset in hangar. This takes the 
# columns and creates a torch dataset out of it.

train_dataset = make_torch_dataset((co['mnist_training_images'], co['mnist_training_labels']))
test_dataset = make_torch_dataset((co['mnist_test_images'], co['mnist_test_labels']))
val_dataset = make_torch_dataset((co['mnist_validation_images'], co['mnist_validation_labels']))

In [50]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

## The model

In [78]:
class net(nn.Module):
    def __init__(self, inShape, outShape):
        super().__init__()
        self.fc1 = nn.Linear(inShape, 500)
        self.fc2 = nn.Linear(500, 200)
        self.fc3 = nn.Linear(200, outShape)
    
    def forward(self, input):
        out = F.relu(self.fc1(input))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        
        return out

model = net(784, 10)

In [36]:
sample_img = torch.tensor(sample_img)
sample_label = torch.tensor(sample_label)

  """Entry point for launching an IPython kernel.


In [37]:
model(sample_img),sample_label

(tensor([ 0.0046,  0.0118, -0.0002, -0.0126, -0.0314, -0.0218,  0.1049,  0.0653,
         -0.0027, -0.0422], grad_fn=<AddBackward0>), tensor([7]))

In [38]:
criterion = nn.CrossEntropyLoss()
output = model(sample_img).unsqueeze(0)
criterion(output, sample_label)

tensor(2.2458, grad_fn=<NllLossBackward>)

In [35]:
output.shape

torch.Size([1, 10])

In [41]:
dataset = make_torch_dataset((co['mnist_test_images'], co['mnist_test_labels']))



In [42]:
dataset[9]

BatchTuple_mnist_test_images_mnist_test_labels(mnist_test_images=array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.       

In [44]:
train_loader = DataLoader(dataset,
                          batch_size=32)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
epochs = 10

for epoch in range(epochs):
    total_loss_test = 0
    total_loss_train = 0
    accuracy = 0
    for img, label in train_loader:
        label = label.view(-1)
        optimizer.zero_grad()

        out = model(img)
        loss = criterion(out, label)
        loss.backward()
        optimizer.step()
        total_loss_train += loss.item()
        
    for img, label in test_loader:
        label = label.view(-1)
        with torch.no_grad():
            # Train Loss
            out = model(img)
            loss = criterion(out, label)
            total_loss_test += loss.item()
            
            # Accuracy
            _, indx = out.topk(1)
            correct = (indx.view(-1) == label).sum().item()
            acc = correct/batch_size
            accuracy += acc
    
    # Print losses for each epoch
    train_loss = total_loss_train/len(train_loader)
    test_loss = total_loss_test/len(test_loader)
    accuracy = accuracy/len(test_loader)
    print(f'[EPOCH {epoch}/{epochs}] Train Loss: {train_loss}')
    print(f'Test Loss: {test_loss} Accuracy: {accuracy}')

[EPOCH 0/10] Train Loss: 0.06814072782266706
Test Loss: 0.0918073209914596 Accuracy: 0.9716453674121406
[EPOCH 1/10] Train Loss: 0.06410053524654061
Test Loss: 0.08948530110020261 Accuracy: 0.9723442492012779
[EPOCH 2/10] Train Loss: 0.06032143013524422
Test Loss: 0.08731502429254877 Accuracy: 0.9728434504792333
[EPOCH 3/10] Train Loss: 0.056838085246138315
Test Loss: 0.08546964528010087 Accuracy: 0.9732428115015974
[EPOCH 4/10] Train Loss: 0.05359196147025799
Test Loss: 0.08372617617901117 Accuracy: 0.9734424920127795
[EPOCH 5/10] Train Loss: 0.050556435092588885
Test Loss: 0.08219187844603415 Accuracy: 0.9737420127795527
[EPOCH 6/10] Train Loss: 0.04774323870436546
Test Loss: 0.08069135308440123 Accuracy: 0.9740415335463258
[EPOCH 7/10] Train Loss: 0.04511407855756626
Test Loss: 0.07942048499828201 Accuracy: 0.974341054313099
[EPOCH 8/10] Train Loss: 0.04266405477471857
Test Loss: 0.07819276037175929 Accuracy: 0.9745407348242812


In [None]:
for img,