In [1]:
from pathlib import Path
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
import torch.optim as optim
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

import fastbook
fastbook.setup_book()

from fastbook import *
from fastai.vision.widgets import *

  except ModuleNotFoundError: warn("Missing `graphviz` - please run `conda install fastbook`")


In [2]:
class ImageWithCmdDataset(Dataset):
    def __init__(self, img_dir: Path):

        # Creates labels for dataset: ["left", "right", "straight"]
        # Does this through iterating through the directories in img_dir
        # and splitting on the backslash in the path name (as d is data/<label>),
        # then taking the last item of the list produced by .split
        self.class_labels = [str(d).split("/")[-1] for d in img_dir.iterdir() if d.is_dir() and ".ipynb_checkpoints" not in str(d)]
        self.class_indices = {lbl:i for i, lbl in enumerate(self.class_labels)}
        
        # Get all filenames
        self.all_filenames = []
        for d in img_dir.iterdir():
            if d.is_dir():
                self.all_filenames += [item for item in d.iterdir() if ".ipynb_checkpoints" not in str(item)]

    def __len__(self):
        return len(self.all_filenames)

    def __getitem__(self, index):
        # img_filename looks like data/<label>/<number>-<previous_move>.png
        img_filename = self.all_filenames[index]
        
        # Opens image file and converts it into a tensor
        img = Image.open(img_filename)
        img = img.resize((128,128))
        img = torch.Tensor(np.array(img)/255)
        img = img.permute(2,0,1)
        
        # Replaces - and . with spaces then splits on the spaces
        # taking the item at index 1 which is the <previous_move>
        cmd_name = img_filename.name.replace("-", " ").replace(".", " ").split()[1]
        cmd = self.class_indices[cmd_name]
        
        # img_filename.parent takes the parent directory of img_filename
        # then that is made to be of type string, split on the backslashes
        # and the <label> in img_filename is taken
        label_name = str(img_filename.parent).split("/")[-1]
        label = self.class_indices[label_name]
        
        # Data and the label associated with that data
        return (img, cmd), label

In [3]:
dataset = ImageWithCmdDataset(Path("data"))
#print(dataset.class_labels)
#print(dataset.all_filenames)

(img, cmd), label = dataset[5]

print(f"img: {img}")
print(f"cmd: {cmd}")
print(f"label: {label}")

img: tensor([[[0.8392, 0.8471, 0.8431,  ..., 0.2745, 0.2275, 0.2510],
         [0.8314, 0.8431, 0.8392,  ..., 0.2588, 0.2118, 0.2235],
         [0.8392, 0.8353, 0.8353,  ..., 0.2353, 0.1843, 0.1765],
         ...,
         [0.1098, 0.2471, 0.3961,  ..., 0.2157, 0.2196, 0.2078],
         [0.6000, 0.6078, 0.5961,  ..., 0.2431, 0.2471, 0.2706],
         [0.5961, 0.6078, 0.5843,  ..., 0.1804, 0.1961, 0.1843]],

        [[0.8196, 0.8235, 0.8235,  ..., 0.2745, 0.2275, 0.2510],
         [0.8157, 0.8235, 0.8196,  ..., 0.2588, 0.2118, 0.2314],
         [0.8235, 0.8196, 0.8157,  ..., 0.2353, 0.1843, 0.1804],
         ...,
         [0.1020, 0.2157, 0.3333,  ..., 0.2235, 0.2275, 0.2039],
         [0.5137, 0.5216, 0.5098,  ..., 0.2353, 0.2510, 0.2902],
         [0.5098, 0.5216, 0.4980,  ..., 0.1608, 0.1725, 0.1686]],

        [[0.8078, 0.8078, 0.8157,  ..., 0.2745, 0.2235, 0.2510],
         [0.8039, 0.8118, 0.8078,  ..., 0.2588, 0.2118, 0.2039],
         [0.8118, 0.8118, 0.8039,  ..., 0.2314, 0.149

## Splitting Data into train/validate sets

In [4]:
# Getting size of dataset and corresponding list of indices
dataset_size = len(dataset.all_filenames)
dataset_indices = list(range(dataset_size))

In [5]:
# Shuffling the indices
np.random.shuffle(dataset_indices)

In [6]:
# Getting index for where we want to split the data
val_split_index = int(np.floor(0.2 * dataset_size))

In [7]:
# Splitting list of indices into training and validation indices
train_idx, val_idx = dataset_indices[val_split_index:], dataset_indices[:val_split_index]

In [8]:
# Creating samplers
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

In [9]:
train_loader = DataLoader(dataset=dataset, shuffle=False, batch_size=16, sampler=train_sampler)
val_loader = DataLoader(dataset=dataset, shuffle=False, batch_size=16, sampler=val_sampler)

In [10]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.cnn = models.resnet18(pretrained=True)
        
        self.fc1 = nn.Linear(self.cnn.fc.out_features + 1, 512)
        self.fc2 = nn.Linear(512, 3)
        
    def forward(self, data):
        img, cmd = data
        x1 = self.cnn(img)
        x2 = cmd.unsqueeze(1)
        
        x = torch.cat((x1, x2), dim=1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [11]:
net = MyModel()

In [12]:
# defining loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters())

In [13]:
num_epochs = 50

In [14]:
for epoch in range(num_epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    
    for data in train_loader:
        # get the inputs
        inp_data, label = data

        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        output = net(inp_data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        
    print(f"Training loss at epoch {epoch}/{num_epochs}:" + str(running_loss))

print('Finished Training')

Training loss at epoch 0/50:87.34523776437165
Training loss at epoch 1/50:361.00409464467407
Training loss at epoch 2/50:403.5047588678369
Training loss at epoch 3/50:137.84290138223878
Training loss at epoch 4/50:127.12608030765114
Training loss at epoch 5/50:119.786013425095
Training loss at epoch 6/50:120.00354105234146
Training loss at epoch 7/50:73.3855157494545
Training loss at epoch 8/50:71.54979711771011
Training loss at epoch 9/50:70.07654637098312
Training loss at epoch 10/50:68.7849685549736
Training loss at epoch 11/50:67.64779061079025
Training loss at epoch 12/50:66.64379113912582
Training loss at epoch 13/50:65.7557423710823
Training loss at epoch 14/50:64.96927785873413
Training loss at epoch 15/50:64.27215683460236
Training loss at epoch 16/50:63.65384244918823
Training loss at epoch 17/50:63.10516554117203
Training loss at epoch 18/50:62.618098735809326
Training loss at epoch 19/50:62.185601234436035
Training loss at epoch 20/50:61.801456570625305
Training loss at epo

In [15]:
correct = 0
total = 0

with torch.no_grad():

    for data in val_loader:

        inp_data, label = data

        # predict
        output = net(inp_data)
        
        # For-loop accounts for multiple batches
        if output.size()[0] > 1:
            for i in range(output.size()[0]):
                if round(float(torch.max(output[i]))) == label[i]:
                    correct +=1
                total +=1
        else:
            if round(float(torch.max(output))) == label:
                correct +=1
            total +=1
            
        
accuracy = correct / total
print(f"Accuracy on validation set: {correct}/{total} = {accuracy*100:.2f}%")

Accuracy on validation set: 211/1135 = 18.59%


In [16]:
PATH = 'cmd_torch.pth'
torch.save(net.state_dict(), PATH)