In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as transform
from torch.nn.functional import normalize
from PIL import Image
import glob
import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import albumentations
import cv2

### 1. Get training dataset in usable format

- images are in jpg format, need to convert these to PyTorch tensors
- tensors also have to be normalized  

In [None]:
#torchvision.io.read_image()


In [57]:
### class to load and store relevant data 
class LoadImageData:   
    def __init__(self, n_files_per_class=10000):  ### n_files == number of files of each letter to train on
        self.label_converter = dict()
        self.convert_label_to_int()
        self.n_files_per_class = n_files_per_class
        self.train_data    = [] # train data paths, no longer in tensor format
        self.test_data     = [] # test data paths, no longer in tensor format
        self.train_labels = [] # truth LETTER label for training data
        self.test_labels   = [] # truth LETTER label for test data
        self.train_labels_num =  [] # truth NUMBER label for training data
        self.test_labels_num  =  [] # truth NUMBER label for test data
        self.image_path_dict = dict() # dictionary of all training file paths
        self.load_train_data()
        self.load_test_data()
        
        self.transf = albumentations.Compose([
            albumentations.Resize(224, 224, always_apply=True),
        ])
    ### helper function to convert str letters/del/space into numbers 
    def convert_label_to_int(self):
        alphabet = "A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/del/nothing/space"
        for iii,label in enumerate(alphabet.split("/")):
            self.label_converter[label] = iii
    
    ### load training data into the instance variable train_data
    def load_train_data(self):
        now = time.time()
        train_path = "datasets/asl_alphabet_train/asl_alphabet_train/"
        train_directories = glob.glob(train_path+"/*")
        n_files = 0
        print(" ----- Loading training dataset -----")

        for dir in train_directories:
            letter = dir.split("/")[-1]
            self.image_path_dict[letter] = []
            n_test_for_class = 0 
            for image_file in glob.glob(dir+"/*"):
                if n_test_for_class > (self.n_files_per_class-1):
                    break ### move onto the next letter 
                self.image_path_dict[letter].append(image_file)
                self.train_data.append( image_file )
                self.train_labels_num.append(self.label_converter[letter])
                self.train_labels.append(letter)
                n_test_for_class+=1
                n_files +=1
            print("Finished importing %s"%letter)
        print("Done with training dataset - loaded paths for %i files. Took %f seconds"%(n_files, np.around(time.time()-now)))
        return
    ### load test data into the instance variable train_data
    def load_test_data(self):
        now = time.time()
        test_path = "datasets/asl_alphabet_test/asl_alphabet_test/"
        test_directories = glob.glob(test_path+"/*")
        n_files = 0
        print("----- Loading test dataset -----")
        for image_file in test_directories:
            letter = image_file.split("_")[-2].split("/")[-1]
            self.image_path_dict[letter] = []
            self.image_path_dict[letter].append(image_file)
            self.test_data.append(image_file   )
            self.test_labels_num.append(self.label_converter[letter])
            self.test_labels.append(letter)
            n_files +=1
        print("Done with test dataset - loaded paths for %i files. Took %f seconds"%(n_files, np.around(time.time()-now,4)))
        return
    def __getitem__(self,i):
        image = cv2.imread(self.train_data[i])
        image = self.transf(image=np.array(image))['image']
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        label = self.train_labels[i]
        return torch.tensor(image, dtype=torch.float), torch.tensor(label, dtype=torch.long)
### lighter class to store test/train paths and labels
class ImageData:
    def __init__(self,paths, labels):
        self.X = paths
        self.y = labels
        self.transf = albumentations.Compose([
            albumentations.Resize(224, 224, always_apply=True),
        ])
    def __len__(self):
        return (len(self.X))
        
    def __getitem__(self,i):
        image = cv2.imread(self.X[i])
        image = self.transf(image=np.array(image))['image']
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        label = self.y[i]
        return torch.tensor(image, dtype=torch.float), torch.tensor(label, dtype=torch.long)

In [58]:
datasets = LoadImageData(1000) ## data instance, passing in 1000 so that only 1000 of each letter are used for training 
### changing gears, keeping the "data" as the file paths to each jpg, also imbuing this class with a __get__
### built-in method that returns the relevent tensors for when these are needed 

train_data = ImageData(datasets.train_data, datasets.train_labels_num)
test_data  = ImageData(datasets.test_data, datasets.test_labels_num)

 ----- Loading training dataset -----
Finished importing H
Finished importing N
Finished importing E
Finished importing Q
Finished importing B
Finished importing I
Finished importing U
Finished importing Y
Finished importing G
Finished importing K
Finished importing D
Finished importing del
Finished importing M
Finished importing A
Finished importing L
Finished importing J
Finished importing X
Finished importing W
Finished importing R
Finished importing V
Finished importing F
Finished importing Z
Finished importing P
Finished importing space
Finished importing nothing
Finished importing T
Finished importing C
Finished importing S
Finished importing O
Done with training dataset - loaded paths for 29000 files. Took 0.000000 seconds
----- Loading test dataset -----
Done with test dataset - loaded paths for 28 files. Took 0.000200 seconds


In [59]:
batch_size = 4 
### now load data into ptytorch
trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

### 2. Create a csv with the file path, corresponding, letter, and then binarize this 

In [60]:
df_training = pd.DataFrame()
df_training['path'] = ""
df_training['letter'] = ""
for iii in range(0,len(id_.train_data)):
    df_training.loc[iii, 'path' ] = id_.train_data[iii]
    df_training.loc[iii, 'letter' ] = id_.train_labels[iii] ### converting letter to int
df_training = df_training.sample(frac=1).reset_index(drop=True) ### shuffle
df_training.to_csv("processedDatasets/train_data.csv") ### write out csv file 

In [61]:
letters_binarized = pd.get_dummies(df_training["letter"],dtype=int) ### binarize
letters_binarized.insert(0, 'path', df_training['path']) ### reinsert the path 
letters_binarized.to_csv("processedDatasets/train_data_binarized.csv") ### write out binarized csv

In [40]:
letters_binarized.head(10)

Unnamed: 0,path,A,B,C,D,E,F,G,H,I,...,T,U,V,W,X,Y,Z,del,nothing,space
0,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
5,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
7,datasets/asl_alphabet_train/asl_alphabet_train...,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,datasets/asl_alphabet_train/asl_alphabet_train...,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


### 2.5 Save labels in a loadable format

In [76]:
import pickle
import sklearn.preprocessing


    
binarized_labels = sklearn.preprocessing.label_binarize(id_.train_labels, classes = np.array("A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/del/nothing/space".split("/")))
with open('processedDatasets/labels_binarized.pickle', 'wb') as handle:
    pickle.dump(binarized_labels, handle, protocol=pickle.HIGHEST_PROTOCOL)

"""
### test pickle file is working
binarized_labels_pkl = open('processedDatasets/labels_binarized.pickle', 'rb')
# dump information to that file
binarized_labels_loaded = pickle.load(binarized_labels_pkl)
"""

"\n### test pickle file is working\nbinarized_labels_pkl = open('processedDatasets/labels_binarized.pickle', 'rb')\n# dump information to that file\nbinarized_labels_loaded = pickle.load(binarized_labels_pkl)\n"

### 3. Define NN Architecture

In [15]:
import torch.nn as nn
### custon CNN, inherits from base torch.nn 
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN,self).__init__(self)  ## override __init__ to be from nn.Module base class
        self.conv1 = nn.Conv2d(3,16,5)
        self.conv2 = nn.Conv2d(16,32,5)
        self.conv3 = nn.Conv2d(32,64,3)
        self.conv4 = nn.Conv2d(64,128,5)
        
        self.fc1 = nn.Linear(128,256)
        self.fc2 = nn.Linear(256, len(labels_binarized.class))
        