In [165]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
import torchvision
from imutils import paths
import cv2
import os
from sklearn.model_selection import train_test_split

In [166]:
dataset_path = '../../datasets/cgds_2/'
imagePaths = sorted(list(map(lambda x: os.path.join(dataset_path,x), filter(lambda x: x not in ['', '.DS_Store'], os.listdir(dataset_path)))))

TEST_SPLIT = 0.2
BATCH_SIZE = 32
INPUT_IMAGE_HEIGHT = 300
INPUT_IMAGE_WIDTH = 900
device = "mps:0"

In [167]:
transforms = transforms.Compose([transforms.ToPILImage(),
 	transforms.Resize((INPUT_IMAGE_HEIGHT, INPUT_IMAGE_WIDTH)),
	transforms.ToTensor()])

class EyeDataset(Dataset):
	def __init__(self, imagePaths, transforms=None):
		self.imagePaths = imagePaths
		self.transforms = transforms
	def __len__(self):
		# return the number of total samples contained in the dataset
		return len(self.imagePaths)
	def __getitem__(self, idx):
		# grab the image path from the current index
		imagePath = self.imagePaths[idx]
		# load the image from disk, swap its channels from BGR to RGB,
		# and read the associated mask from disk in grayscale mode
		image = cv2.imread(imagePath)
		res = imagePath.split('/')[-1].split('.jpg')[0].split('_')
		pitch = res[2][:-1]
		yaw = res[3][:-1]
		res = [float(pitch), float(yaw)]
		res = torch.tensor(res, dtype=torch.float32)
		res = res.to(device)
		# check to see if we are applying any transformations
		if self.transforms is not None:
			image = self.transforms(image)
		image = torch.tensor(image).to(device)
		# return a tuple of the image and its mask
		return (image, res)

dataset = EyeDataset(imagePaths, transforms)	

In [176]:
(trainImages, testImages) = train_test_split(imagePaths, test_size=TEST_SPLIT, random_state=42)

# create the train and test datasets
trainDS = EyeDataset(imagePaths=trainImages, transforms=transforms)
testDS = EyeDataset(imagePaths=testImages, transforms=transforms)

print(f"[INFO] found {len(trainDS)} examples in the training set...")
print(f"[INFO] found {len(testDS)} examples in the test set...")

# create the training and test data loaders
trainLoader = DataLoader(trainDS, shuffle=True,
	batch_size=BATCH_SIZE, num_workers=0)
testLoader = DataLoader(testDS, shuffle=False,
	batch_size=1, num_workers=0)

[INFO] found 4704 examples in the training set...
[INFO] found 1176 examples in the test set...


In [169]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


In [170]:
class FirstEyeNet(nn.ModuleList):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 10)
        self.pool = nn.MaxPool2d(3, 3)
        self.conv2 = nn.Conv2d(6, 16, 10)
        self.conv3 = nn.Conv2d(16, 32, 10)
        self.fc1 = nn.Linear(5568, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = FirstEyeNet()
net.to(device)

criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [171]:
for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainLoader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

  image = torch.tensor(image).to(device)


[1,    20] loss: 0.847
[1,    40] loss: 0.843
[1,    60] loss: 0.809
[1,    80] loss: 0.804
[1,   100] loss: 0.736
[1,   120] loss: 0.743
[1,   140] loss: 0.728
[2,    20] loss: 0.709
[2,    40] loss: 0.696
[2,    60] loss: 0.702
[2,    80] loss: 0.697
[2,   100] loss: 0.690
[2,   120] loss: 0.671
[2,   140] loss: 0.669
[3,    20] loss: 0.636
[3,    40] loss: 0.602
[3,    60] loss: 0.607
[3,    80] loss: 0.597
[3,   100] loss: 0.567
[3,   120] loss: 0.565
[3,   140] loss: 0.585
[4,    20] loss: 0.551
[4,    40] loss: 0.488
[4,    60] loss: 0.418
[4,    80] loss: 0.468
[4,   100] loss: 0.457
[4,   120] loss: 0.465
[4,   140] loss: 0.462
[5,    20] loss: 0.363
[5,    40] loss: 0.367
[5,    60] loss: 0.340
[5,    80] loss: 0.328
[5,   100] loss: 0.344
[5,   120] loss: 0.333
[5,   140] loss: 0.315
[6,    20] loss: 0.270
[6,    40] loss: 0.248
[6,    60] loss: 0.279
[6,    80] loss: 0.240
[6,   100] loss: 0.259
[6,   120] loss: 0.220
[6,   140] loss: 0.251
[7,    20] loss: 0.208
[7,    40] 

In [173]:
PATH = './models/first_eye_model.pth'
# torch.save(net.state_dict(), PATH)

In [180]:
losses = []
criterion2 = nn.L1Loss().to(device)
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        losses.append(criterion2(outputs, labels))

        
print(f'Accuracy of the network on the test images: {max(losses)}, {min(losses)}, {sum(losses)/len(losses)}')

  image = torch.tensor(image).to(device)


Accuracy of the network on the test images: 18.564542770385742, 0.038050174713134766, 2.7281651496887207
