### Steps for CNN of ASL Dataset

Load ASL training and test datasets 
- don't know how to process the image from individual pixels to torch or matplotlib

Define a Convolutional Neural Network
- working on understand different parts of torch architecture steps

Define a loss function

Train the network on the training data

Test the network on the test data

Visualize # of each letter in datasets(Bar graph to show samples in each class)

Randomly generate 1 sample from each class

Show correctly classified and wrongly classified to show why some are easier to classify vs harder to classify

#### Dataset Origin 
https://www.kaggle.com/datamunge/sign-language-mnist

#### Pytorch tutorial/inspiration
https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

In [1]:
%matplotlib inline
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as data_utils
import torchvision
import torchvision.transforms as T
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [2]:
#read in data
df_test = pd.read_csv('./sign_mnist_test.csv')
df_train = pd.read_csv('./sign_mnist_train.csv')

In [3]:
df_train.head()
df_test.head()
print(df_train.shape)
print(df_test.shape)

(27455, 785)
(7172, 785)


In [None]:
df_train.head()

In [4]:
#separate labels (letter classes) from training data set
#drop label from rest of pixels
labels_train = df_train['label']
df_train = df_train.drop('label',axis = 1)

In [5]:
# #separate labels (letter classes) from test data set
#drop label from rest of pixels
labels_test = df_test['label']
df_test = df_test.drop('label',axis = 1)

### Data visualizations 
- visualize # of each letter in total dataset
- (randomly) generate sample
- more?

In [None]:
# create dataframe with all labels to make a bargraph
frames = [labels_train,labels_test]
df_all_labels = pd.concat(frames)
print(df_all_labels.shape)
df_all_labels.head()

In [None]:
# create a bar graph with number of each labels
'''Want to change so that 0,1,2,3 etc reads A,B,C,D etc'''

ax = sns.barplot(x=df_all_labels.value_counts().index, y=df_all_labels.value_counts())
ax.set(ylabel = '# counts', xlabel = 'Alphabet Letters' )

In [None]:
df_all_labels.value_counts()

In [None]:
#convert each image to a 28x28 array
testimg = np.array(df_train[4:5]).reshape(28,28)
plt.imshow(testimg,cmap = 'gray')

### Converting data to tensor

In [None]:
# convert pandas df to numpy array and reshape into 28x28 array as prep for picture
''' only relevant for following cell with random image display'''

np_train = df_train.to_numpy() #training dataset
np_train = np_train.reshape(np_train.shape[0],28,28)
np_test = df_test.to_numpy() # testing dataset
np_test = np_test.reshape(np_test.shape[0],28,28)

In [None]:
#show random image
random_image = random.randint(0,len(np_train))
plt.imshow(np_train[random_image], cmap = 'gray')
plt.title(f"Training Example #{random_image}")
plt.xlabel(labels_train[random_image]) #xlabel = letter of alphabet

In [6]:
#converting to tensor with dataloader
batch_size = 10

train = data_utils.TensorDataset(torch.Tensor(np.array(df_train).reshape(-1,28,28)), torch.Tensor(np.array(labels_train)))
test = data_utils.TensorDataset(torch.Tensor(np.array(df_test).reshape(-1,28,28)), torch.Tensor(np.array(labels_test)))

trainloader = DataLoader(train, batch_size = batch_size,
                         shuffle = True, num_workers = 2)
testloader = DataLoader(test, batch_size = batch_size,
                       shuffle = False, num_workers = 2)

In [None]:
#COME BACK LATER
'''https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html'''
def imshow(img):
#     img = img / 2 + 0.5     # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % labels_train[j] for j in range(batch_size)))

In [7]:
#define neural network
class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        # 1 input channel (gray), 6 output channels, 3x3 square convolution
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16 * 3 * 3, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 24)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 3 * 3)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [8]:
# architecture from https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html
class Net2(nn.Module):
    
    def __init__(self):
        super().__init__()
        #first 2d convolution layer 1 input, ouput 28 features, kernel 3x3
        self.conv1 = nn.Conv2d(1,32,3,1)
        #second conv layer, take in 28 layers, output 64 features, kernel 3x3
        self.conv2 = nn.Conv2d(32,64,3,1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 24)
    
    #x = data
    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        
        # Run max pooling over x
        x = F.max_pool2d(x, 2)
        # Pass data through dropout1
        x = self.dropout1(x)
        # Flatten x with start_dim=1
        x = torch.flatten(x, 1)
        # Pass data through fc1
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)

        # Apply softmax to x
        output = F.log_softmax(x, dim=1)
        return output
net2 = Net2()

In [9]:
#define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [10]:
# train network
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        print(data)
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[tensor([[[ 88.,  52.,  98.,  ..., 196., 197., 198.],
         [ 76.,  57., 101.,  ..., 199., 199., 199.],
         [ 63.,  65., 102.,  ..., 199., 201., 200.],
         ...,
         [108., 110., 117.,  ..., 231., 231., 231.],
         [106., 109., 116.,  ..., 229., 230., 230.],
         [104., 109., 115.,  ..., 227., 227., 229.]],

        [[124., 127., 129.,  ..., 134., 132., 131.],
         [127., 129., 131.,  ..., 136., 135., 134.],
         [130., 132., 135.,  ..., 139., 138., 137.],
         ...,
         [ 76.,  77.,  76.,  ...,  68.,  63.,  60.],
         [ 76.,  79.,  79.,  ...,  79., 101., 122.],
         [ 73.,  73.,  74.,  ..., 140., 175., 172.]],

        [[143., 154., 161.,  ..., 206., 204., 203.],
         [143., 154., 162.,  ..., 205., 205., 205.],
         [144., 154., 165.,  ..., 208., 207., 206.],
         ...,
         [160., 171., 182.,  ..., 231., 231., 228.],
         [140., 144., 148.,  ..., 144., 144., 144.],
         [131., 135., 138.,  ..., 126., 126., 125.]]

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [6, 1, 3, 3], but got 3-dimensional input of size [10, 28, 28] instead

In [11]:
# train network (Net2)
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        print(data)
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[tensor([[[126., 130., 130.,  ..., 131., 131., 131.],
         [132., 134., 134.,  ..., 135., 135., 135.],
         [135., 138., 139.,  ..., 138., 137., 136.],
         ...,
         [178., 179., 180.,  ..., 190., 188., 187.],
         [178., 181., 182.,  ..., 186., 186., 187.],
         [176., 181., 184.,  ..., 188., 183., 180.]],

        [[ 80.,  14.,  43.,  ..., 159., 160., 160.],
         [ 75.,  13.,  49.,  ..., 159., 160., 160.],
         [ 64.,  13.,  55.,  ..., 160., 161., 161.],
         ...,
         [ 73.,  76.,  80.,  ..., 190., 190., 192.],
         [ 72.,  76.,  81.,  ..., 191., 192., 192.],
         [ 73.,  76.,  81.,  ..., 192., 192., 192.]],

        [[185., 185., 185.,  ..., 189., 188., 188.],
         [186., 188., 189.,  ..., 191., 191., 191.],
         [189., 191., 191.,  ..., 193., 193., 192.],
         ...,
         [212., 213., 213.,  ..., 218., 219., 218.],
         [211., 211., 212.,  ..., 219., 218., 219.],
         [211., 210., 212.,  ..., 218., 219., 219.]]

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 3, 3], but got 3-dimensional input of size [10, 28, 28] instead

In [None]:
len(labels_train.unique())

In [None]:
# class ASLDataset(Dataset):
    
#     def __init__(self,X,y):
#         self.X = X # X = training images
#         self.y = y # y = labels
        
#     def __len__(self):
#         return len(self.X)
    
#     def __getitem__(self,index):
#         image = self.X[index]
#         X = self.transform(image)
#         return X
    

In [None]:
# image_size = 28
# transform = T.Compose([T.ToPILImage(),
#                       T.Resize(image_size),
#                       T.ToTensor()])

In [None]:
# batch_size = 20
# transformed_dataset = ASLDataset(np_train,labels_train)
# train_dl = DataLoader(transformed_dataset,batch_size, shuffle = True,
#                      num_workers = 3)


In [None]:
# def show_images(images, nmax=64):
#     fig, ax = plt.subplots(figsize=(8, 8))
#     ax.set_xticks([]); ax.set_yticks([])
#     ax.imshow(make_grid((images.detach()[:nmax]), nrow=8).permute(1, 2, 0))
# def show_batch(dl, nmax=64):
#     for images in dl:
#         show_images(images, nmax)
#         break

In [None]:
# show_batch(train_dl)