In [None]:
import torch
import os
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import Dataset
from torch import nn, optim
import pandas as pd
import numpy as np
from PIL import Image
from time import time

In [None]:
class CustomDatasetFromImages(Dataset):
    def __init__(self, csv_path):
        """
        Args:
            csv_path (string): path to csv file
            img_path (string): path to the folder where images are
            transform: pytorch transforms for transforms and tensor conversion
        """
        # Transforms
        self.to_tensor = transforms.ToTensor()
        # Read the csv file
        self.data_info = pd.read_csv(csv_path, header=None)
        # First column contains the image paths
        self.image_arr = np.asarray(self.data_info.iloc[:, 0])
        # Second column is the labels
        self.label_arr = np.asarray(self.data_info.iloc[:, 1])
        # Calculate len
        self.data_len = len(self.data_info.index)

    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_name = self.image_arr[index]
        # Open image
        img_as_img = Image.open("data/training/"+single_image_name)

        t = transforms.Resize((28,28))
        
        img_as_img = t(img_as_img)
        
        # Transform image to tensor
        img_as_tensor = self.to_tensor(img_as_img)

        # Get label(class) of the image based on the cropped pandas column
        single_image_label = self.label_arr[index]
        
        single_image_label = torch.tensor([single_image_label], dtype=torch.long)
        
        return (img_as_tensor, single_image_label)

    def __len__(self):
        return self.data_len

#if __name__ == "__main__":
    # Call dataset
#    custom_mnist_from_images =  \
#        CustomDatasetFromImages('data/training')

In [None]:
#TRAIN_PATH = 'data/training'
train_data = CustomDatasetFromImages('data/training/labels-tab-csv.csv')
#train_loader = torch.utils.data.DataLoader(train_data, batch_size=16, shuffle=True)



In [None]:
input_size = 784
hidden_sizes = [128, 64]
output_size = 11

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))

In [None]:
criterion = nn.NLLLoss()
images, labels = next(iter(train_data))
images = images.view(images.shape[0], -1)
logps = model(images) 
print(images.shape, labels.shape)
loss = criterion(logps, labels)


optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in train_data:
        images = images.view(images.shape[0], -1)
    
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(train_data)))

print("\nTraining Time (in minutes) =",(time()-time0)/60)