In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms

In [2]:
# check to see if the GPU is available
print(torch.cuda.is_available())

True


In [3]:
# set up GPU device, otherwise use CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
# create class for implementing Dataset object (needed for Dataloader)
class Dataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        data = self.data[idx]
        label = self.labels[idx]
        sample = {'data':data, 'labels':label}
        return sample

In [5]:
# read in our data sets as numpy arrays (also reshape back to images)
asl_data = np.load('asl_data.npy').reshape(-1, 36, 36, 3)
asl_labels = np.load('asl_labels.npy')
isl_data = np.load('isl_data.npy').reshape(-1, 36, 36, 3)
isl_labels = np.load('isl_labels.npy')

In [6]:
asl_train_data, asl_test_data, asl_train_labels, asl_test_labels = train_test_split(asl_data, asl_labels, test_size=0.2)
isl_train_data, isl_test_data, isl_train_labels, isl_test_labels = train_test_split(isl_data, isl_labels, test_size=0.2)
asl_data, asl_labels, isl_data, isl_labels = None, None, None, None

In [7]:
# convert numpy arrays to torch tensors and permute the dimensions for CNN processing (N, C, H, W)
asl_train_data = torch.from_numpy(asl_train_data).permute(0, 3, 1, 2).float()
asl_train_labels = torch.from_numpy(asl_train_labels).long()
isl_train_data = torch.from_numpy(isl_train_data).permute(0, 3, 1, 2).float()
isl_train_labels = torch.from_numpy(isl_train_labels).long()
asl_test_data = torch.from_numpy(asl_test_data).permute(0, 3, 1, 2).float()
asl_test_labels = torch.from_numpy(asl_test_labels).long()
isl_test_data = torch.from_numpy(isl_test_data).permute(0, 3, 1, 2).float()
isl_test_labels = torch.from_numpy(isl_test_labels).long()

In [8]:
# # our composer object for applying tranformations and augmentations (simply modify the list)
# composer = transforms.Compose([
#     transforms.RandomHorizontalFlip(p=0.25),
#     transforms.RandomVerticalFlip(p=0.25),
#     transforms.RandomRotation(15),
#     transforms.ColorJitter()
# ])
# # pass the training image data through the composer and concatenate
# asl_train_data = torch.cat((asl_train_data, composer(asl_train_data)))
# isl_train_data = torch.cat((isl_train_data, composer(isl_train_data)))

In [9]:
# instantiate dataset objects for both the ASL and ISL data
asl_train_dataset = Dataset(asl_train_data, asl_train_labels)
isl_train_dataset = Dataset(isl_train_data, isl_train_labels)
asl_test_dataset = Dataset(asl_test_data, asl_test_labels)
isl_test_dataset = Dataset(isl_test_data, isl_test_labels)

In [21]:
# instantiate dataloader objects for training
asl_train_dataloader = DataLoader(asl_train_dataset, batch_size=64, shuffle=True)
isl_train_dataloader = DataLoader(isl_train_dataset, batch_size=64, shuffle=True)
asl_test_dataloader = DataLoader(asl_test_dataset, batch_size=64, shuffle=True)
isl_test_dataloader = DataLoader(isl_test_dataset, batch_size=64, shuffle=True)

In [22]:
for i, batch in enumerate(asl_train_dataloader):
    if (i % 100 == 0):
        data, labels = batch['data'], batch['labels']
        data = data.to(device)
        labels = labels.to(device)
        print(data.shape, labels.shape)

torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])


In [23]:
for i, batch in enumerate(isl_train_dataloader):
    if (i % 100 == 0):
        data, labels = batch['data'], batch['labels']
        data = data.to(device)
        labels = labels.to(device)
        print(data.shape, labels.shape)

torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])
torch.Size([64, 3, 36, 36]) torch.Size([64])


In [24]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.maxpool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.maxpool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        self.conv3 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.maxpool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        self.relu = torch.nn.ReLU()
        self.batchnorm = torch.nn.BatchNorm2d(256)
        self.dropout = torch.nn.Dropout()
        self.linear1 = torch.nn.Linear(256*6*6, 1024)
        self.linear2 = torch.nn.Linear(1024, 27)
        self.sigmoid = torch.nn.Sigmoid()
    
    def forward(self, x):
        out = self.maxpool1(self.relu(self.conv1(x)))
        out = self.maxpool2(self.relu(self.conv2(out)))
        out = self.maxpool3(self.relu(self.conv3(out)))
        out = self.batchnorm(out)
        out = torch.flatten(out, start_dim=1)
        out = self.dropout(out)
        out = self.sigmoid(self.linear1(out))
        out = self. linear2(out)
        return out

In [25]:
net = Net().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
training_accuracy = []
training_loss = []
testing_accuracy = []
num_epochs = 10
for epoch in range(num_epochs):
    correct = 0
    total = 0
    epoch_loss = 0
    net.train()
    for batch in asl_train_dataloader:
        data, labels = batch['data'], batch['labels']
        data = data.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        predictions = net(data)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        _, predicted = torch.max(predictions, 1)
        total += labels.shape[0]
        correct += (predicted == labels).sum().item()
    training_accuracy.append(correct/total)
    training_loss.append(epoch_loss)
    print("Epoch {} Done".format(str(epoch)))
    
    test_correct = 0
    test_total = 0
    net.eval()
    with torch.no_grad():
        for batch in asl_test_dataloader:
            data, labels = batch['data'], batch['labels']
            data = data.to(device)
            labels = labels.to(device)
            predictions = net(data)
            _, predicted = torch.max(predictions, 1)
            test_total += labels.shape[0]
            test_correct += (predicted == labels).sum().item()
        testing_accuracy.append(test_correct/test_total)

Epoch 0 Done
Epoch 1 Done
Epoch 2 Done
Epoch 3 Done
Epoch 4 Done
Epoch 5 Done
Epoch 6 Done
Epoch 7 Done
Epoch 8 Done
Epoch 9 Done


In [26]:
training_accuracy

[0.8009365131220734,
 0.9749226054355097,
 0.9843499634417635,
 0.9865590143277174,
 0.9889547455702307,
 0.9900592710132077,
 0.992066084846222,
 0.9922216518100216,
 0.9918638477932826,
 0.9944618160887354]

In [27]:
testing_accuracy

[0.9668968950283119,
 0.9888619252068944,
 0.9914753282309751,
 0.9589944620745442,
 0.9922220148092838,
 0.9974488208574451,
 0.9963910148715077,
 0.9953332088855703,
 0.9977599402650738,
 0.9990044178955884]

In [28]:
training_loss

[652.6611473225057,
 82.81278530228883,
 51.571664814371616,
 42.01046396745369,
 33.28210420534015,
 30.51154064614093,
 24.616194724920206,
 22.384505321795586,
 25.08499616268091,
 16.628041069736355]