In [None]:
import torch
import os
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import Dataset
from torch import nn, optim
import pandas as pd
import numpy as np
from PIL import Image
from time import time
import matplotlib.pyplot as plt

In [None]:
class TrainDatasetFromImages(Dataset):
    def __init__(self, csv_path):
        """
        Args:
            csv_path (string): path to csv file
            img_path (string): path to the folder where images are
            transform: pytorch transforms for transforms and tensor conversion
        """
        # Transforms
        self.to_tensor = transforms.ToTensor()
        # Read the csv file
        self.data_info = pd.read_csv(csv_path, header=None)
        # First column contains the image paths
        self.image_arr = np.asarray(self.data_info.iloc[:, 0])
        # Second column is the labels
        self.label_arr = np.asarray(self.data_info.iloc[:, 1])
        # Calculate len
        self.data_len = len(self.data_info.index)

    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_name = self.image_arr[index]
        # Open image
        img_as_img = Image.open("data/training/"+single_image_name)

        t = transforms.Resize((60,171))
        
        img_as_img = t(img_as_img)
        
        # Transform image to tensor
        img_as_tensor = self.to_tensor(img_as_img)

        # Get label(class) of the image based on the cropped pandas column
        single_image_label = self.label_arr[index]

        single_image_label = torch.tensor([single_image_label], dtype=torch.long)
        
        return (img_as_tensor, single_image_label)

    def __len__(self):
        return self.data_len

#if __name__ == "__main__":
    # Call dataset
#    custom_mnist_from_images =  \
#        CustomDatasetFromImages('data/training')
print("low")

In [None]:
train_data = TrainDatasetFromImages('data/training/labels-tab-csv.csv')
print(train_data[0][0])

            
        


In [22]:
input_size = 10260
hidden_sizes = [750]
output_size = 11 #might be an issue

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.LogSoftmax(dim=1))

In [None]:
criterion = nn.NLLLoss()
images, labels = next(iter(train_data))
images = images.view(images.shape[0], -1)
logps = model(images) 
print(images.shape, labels.shape)
loss = criterion(logps, labels)


optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.85)
time0 = time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in train_data:
        images = images.view(images.shape[0], -1)
    
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(train_data)))

print("\nTraining Time (in minutes) =",(time()-time0)/60)

torch.Size([1, 10260]) torch.Size([1])
Epoch 0 - Training loss: 3.354875644972158
Epoch 1 - Training loss: 3.2225064956864644
Epoch 2 - Training loss: 3.1422515009724816
Epoch 3 - Training loss: 3.066126965506132
Epoch 4 - Training loss: 2.9939740384733953
Epoch 5 - Training loss: 2.9257804896942403
Epoch 6 - Training loss: 2.8614910314942517
Epoch 7 - Training loss: 2.8009846263846687
Epoch 8 - Training loss: 2.7440854491882547
Epoch 9 - Training loss: 2.690549212763476
Epoch 10 - Training loss: 2.640080735572549
Epoch 11 - Training loss: 2.592392921135869


In [21]:
valloader = TrainDatasetFromImages('data/training/labels-tab-csv.csv')

correct_count, all_count = 0, 0
for images,labels in valloader:
  for i in range(len(labels)):
    img = images[i].view(1, 10260)
    with torch.no_grad():
        logps = model(img)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    print(pred_label)
    
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))



7
8
9
4
5
6
8
8
8
6
7
4
7
4
5
6
7
8
8
7
7
8
8
5
5
6
7
8
8
8
8
8
8
5
5
6
7
8
8
8
8
9
3
4
5
6
7
8
9
9
8
3
3
4
5
6
9
8
8
6
7
9
3
4
5
6
7
8
9
8
9
9
9
4
5
6
7
9
9
9
7
7
7
5
5
6
7
8
8
8
7
8
9
4
5
6
7
8
9
8
7
7
7
4
5
6
7
8
9
8
7
7
7
4
5
6
7
8
9
7
8
8
8
4
5
6
7
8
9
8
7
7
7
4
5
6
7
8
9
10
7
8
7
4
5
6
7
8
9
8
8
8
8
4
7
6
7
8
8
8
8
9
8
4
5
6
9
8
9
6
7
5
7
4
5
6
7
8
8
8
7
6
3
4
5
6
7
8
9
10
4
4
4
4
4
7
4
4
8
8
8
8
7
4
5
6
7
8
8
7
8
7
7
4
7
7
7
8
9
8
7
5
8
7
5
6
8
8
8
8
7
3
8
6
5
6
7
8
9
10
8
8
3
4
5
6
8
8
8
9
7
8
7
4
5
6
7
8
8
8
8
6
6
4
5
6
7
8
8
10
8
8
6
6
5
6
7
8
8
8
7
7
3
4
5
6
7
8
8
7
9
9
3
4
5
6
9
8
9
5
4
8
3
4
5
6
7
3
3
5
3
9
3
4
6
6
7
9
9
6
3
8
3
6
5
6
7
8
9
5
7
7
3
6
5
6
7
8
9
6
8
8
3
6
5
6
7
8
9
8
8
6
3
4
5
6
7
8
9
10
9
8
9
7
7
6
7
8
8
8
4
8
4
4
4
8
7
8
9
9
8
9
8
4
6
6
7
8
8
8
8
7
7
4
5
6
7
8
7
6
7
9
3
5
5
6
8
8
9
6
8
8
7
8
8
8
7
8
8
8
3
3
3
4
5
6
3
9
9
6
6
2
3
4
5
6
7
8
9
10
3
2
3
4
5
6
7
8
9
10
8
3
3
5
5
6
7
8
9
6
7
9
7
4
5
6
7
8
9
10
3
3
3
6
5
6
7
9
9
10
3
2
3
4
5
6
7
8
9
5
8
8
8
4
8
8