In [118]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
import torchvision
from imutils import paths
import cv2
import os
from sklearn.model_selection import train_test_split
from skorch import NeuralNetRegressor

In [119]:
dataset_path = '../../datasets/cgds_2_m/'
imagePaths = sorted(list(map(lambda x: os.path.join(dataset_path,x), filter(lambda x: x not in ['', '.DS_Store'], os.listdir(dataset_path)))))

TEST_SPLIT = 0.1
BATCH_SIZE = 64
INPUT_IMAGE_HEIGHT = 100
INPUT_IMAGE_WIDTH = 300
device = "mps"

In [120]:
transforms = transforms.Compose([transforms.ToPILImage(),
 	transforms.Resize((INPUT_IMAGE_HEIGHT, INPUT_IMAGE_WIDTH)),
	transforms.ToTensor()])

class EyeDataset(Dataset):
	def __init__(self, imagePaths, transforms=None):
		self.imagePaths = imagePaths
		self.transforms = transforms
	def __len__(self):
		# return the number of total samples contained in the dataset
		return len(self.imagePaths)
	def __getitem__(self, idx):
		# grab the image path from the current index
		imagePath = self.imagePaths[idx]
		# load the image from disk, swap its channels from BGR to RGB,
		# and read the associated mask from disk in grayscale mode
		image = cv2.imread(imagePath)
		res = imagePath.split('/')[-1].split('.jpg')[0].split('_')
		pitch = res[2][:-1]
		yaw = res[3][:-1]
		hp = res[4]
		hr = res[5]
		hy = res[6]
		head_pos = torch.tensor([float(hp)/180, float(hr)/180, float(hy)/180], dtype=torch.float32).to(device)
		res = [float(pitch), float(yaw)]
		res = torch.tensor(res, dtype=torch.float32)
		res = res.to(device)
		# check to see if we are applying any transformations
		if self.transforms is not None:
			image = self.transforms(image)
		image = torch.tensor(image).to(device)
		# return a tuple of the image and its mask
		return ((image, head_pos), res)


In [121]:
(trainImages, testImages) = train_test_split(imagePaths, test_size=TEST_SPLIT, random_state=42)

# create the train and test datasets
trainDS = EyeDataset(imagePaths=trainImages, transforms=transforms)
testDS = EyeDataset(imagePaths=testImages, transforms=transforms)

# sklearn dataset
# ds = EyeDataset(imagePaths=imagePaths, transforms=transforms)
# X, y = list(map(lambda x: x[0], ds)), list(map(lambda x: x[1], ds))

print(f"[INFO] found {len(trainDS)} examples in the training set...")
print(f"[INFO] found {len(testDS)} examples in the test set...")

# create the training and test data loaders
trainLoader = DataLoader(trainDS, shuffle=True,
	batch_size=BATCH_SIZE, num_workers=0)
testLoader = DataLoader(testDS, shuffle=False,
	batch_size=1, num_workers=0)

[INFO] found 10585 examples in the training set...
[INFO] found 1177 examples in the test set...


In [122]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


In [123]:
class ThirdEyeNet(nn.ModuleList):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 9, 10)
        self.pool = nn.MaxPool2d(3, 3)
        self.conv2 = nn.Conv2d(9, 26, 10)
        self.fc1 = nn.Linear(5281, 700)
        self.fc2 = nn.Linear(700, 130)
        self.fc3 = nn.Linear(130, 2)

    def forward(self, x):
        x, head_pos = x
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = torch.cat((x, head_pos), 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# net = NeuralNetRegressor(FirstEyeNet, max_epochs = 100, lr=0.001, verbose=1, criterion = nn.MSELoss().to(device))
# net.device = 'mps:0'

net = ThirdEyeNet()
net.to(device)

criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [124]:
changed = False
changed2 = False
every = 20
for epoch in range(50):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainLoader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % every == every - 1:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / every:.3f}')
            if running_loss / every < 2 and not changed: 
                changed = True
                for g in optimizer.param_groups:
                    g['lr'] *= 0.1
            if running_loss / every < 0.25 and not changed2: 
                changed2 = True
                for g in optimizer.param_groups:
                    g['lr'] *= 0.1
            running_loss = 0.0
    losses = []
    if epoch % 5 == 4:    # print every 2000 mini-batches
        criterion2 = nn.L1Loss().to(device)
        # since we're not training, we don't need to calculate the gradients for our outputs
        with torch.no_grad():
            for data in testLoader:
                images, labels = data
                # calculate outputs by running images through the network
                outputs = net(images)
                # the class with the highest energy is what we choose as prediction
                losses.append(criterion2(outputs, labels))
        print(f'Accuracy of the network on the test images: {max(losses)}, {min(losses)}, {sum(losses)/len(losses)}')

print('Finished Training')

  image = torch.tensor(image).to(device)


[1,    20] loss: 82.928
[1,    40] loss: 78.043
[1,    60] loss: 71.461
[1,    80] loss: 71.125
[1,   100] loss: 68.903
[1,   120] loss: 62.351
[1,   140] loss: 60.150
[1,   160] loss: 58.606
[2,    20] loss: 58.621
[2,    40] loss: 52.548
[2,    60] loss: 47.553
[2,    80] loss: 44.066
[2,   100] loss: 42.746
[2,   120] loss: 40.559
[2,   140] loss: 39.230
[2,   160] loss: 35.759
[3,    20] loss: 36.316
[3,    40] loss: 32.326
[3,    60] loss: 30.516
[3,    80] loss: 28.752
[3,   100] loss: 26.759
[3,   120] loss: 27.085
[3,   140] loss: 28.241
[3,   160] loss: 27.650
[4,    20] loss: 24.053
[4,    40] loss: 22.650
[4,    60] loss: 22.903
[4,    80] loss: 21.888
[4,   100] loss: 22.108
[4,   120] loss: 20.425
[4,   140] loss: 19.166
[4,   160] loss: 17.626
[5,    20] loss: 17.145
[5,    40] loss: 15.172
[5,    60] loss: 16.794
[5,    80] loss: 15.971
[5,   100] loss: 16.653
[5,   120] loss: 14.152
[5,   140] loss: 14.725
[5,   160] loss: 14.799
Accuracy of the network on the test imag

KeyboardInterrupt: 

In [None]:
# from sklearn.pipeline import Pipeline
# from sklearn.preprocessing import StandardScaler


# pipe = Pipeline([
#     ('scale', StandardScaler()),
#     ('net', net),
# ])
# X = torch.stack(X)
# y = torch.stack(y)

In [None]:
# from sklearn.model_selection import GridSearchCV


# # deactivate skorch-internal train-valid split and verbose logging
# net.set_params(train_split=False, verbose=0)
# params = {
#     'lr': [0.001, 0.01],
#     'max_epochs': [10],
# }
# gs = GridSearchCV(net, params, refit=False, cv=3,
#                   scoring='accuracy', verbose=2)

# gs.fit(X, y)
# print("best score: {:.3f}, best params: {}".format(
#     gs.best_score_, gs.best_params_))


In [None]:
PATH = './models/second_eye_model_mirrored_hv.pth'
torch.save(net.state_dict(), PATH)

In [None]:
losses = []
criterion2 = nn.L1Loss().to(device)
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        losses.append(criterion2(outputs, labels))

        
print(f'Accuracy of the network on the test images: {max(losses)}, {min(losses)}, {sum(losses)/len(losses)}')
# Accuracy of the network on the test images: 11.710856437683105, 0.05646705627441406, 1.7454159259796143

  image = torch.tensor(image).to(device)


Accuracy of the network on the test images: 14.57371711730957, 0.033048465847969055, 1.6739957332611084
