In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/research/feedback_controller/data.zip

Archive:  /content/drive/MyDrive/research/feedback_controller/data.zip
   creating: data/
  inflating: data/torobo_eyes_view_1.jpeg  
  inflating: data/torobo_eyes_view_2.jpeg  
  inflating: data/torobo_eyes_view_3.jpeg  
  inflating: data/torobo_eyes_view_4.jpeg  
  inflating: data/torobo_eyes_view_5.jpeg  
  inflating: data/torobo_eyes_view_6.jpeg  
  inflating: data/torobo_eyes_view_7.jpeg  
  inflating: data/torobo_eyes_view_8.jpeg  
  inflating: data/torobo_eyes_view_9.jpeg  
  inflating: data/torobo_eyes_view_10.jpeg  
  inflating: data/torobo_eyes_view_11.jpeg  
  inflating: data/torobo_eyes_view_12.jpeg  
  inflating: data/torobo_eyes_view_13.jpeg  
  inflating: data/torobo_eyes_view_14.jpeg  
  inflating: data/torobo_eyes_view_15.jpeg  
  inflating: data/torobo_eyes_view_16.jpeg  
  inflating: data/torobo_eyes_view_17.jpeg  
  inflating: data/torobo_eyes_view_18.jpeg  
  inflating: data/torobo_eyes_view_19.jpeg  
  inflating: data/torobo_eyes_view_20.jpeg  
  inflating: data/t

In [None]:
!mkdir /content/drive/MyDrive/research/feedback_controller/weights

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms

import random
import numpy as np
from PIL import Image

In [None]:
class AlexNetPT(nn.Module):
    def __init__(self, encoded_space_dim):
        super().__init__()

        # Load the pre-trained AlexNet model
        alexnet = models.alexnet(pretrained=True)

        # Get the feature extraction layers
        self.feature_extractor = alexnet.features

        self.flatten = nn.Flatten(start_dim=1)

        self.encoder_lin = nn.Sequential(
            nn.Linear(256*6*6, 1024),
            nn.ReLU(True),
            nn.Linear(1024, encoded_space_dim)
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.flatten(x)
        x = self.encoder_lin(x)
        return x


class MLP(nn.Module):
    def __init__(self, encoded_space_dim):
        super().__init__()

        self.linear = nn.Sequential(
            nn.Linear(encoded_space_dim, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 1024),
            nn.ReLU(True),
            nn.Linear(1024, encoded_space_dim)
        )

    def forward(self, x):
        x = self.linear(x)
        return x


class GeneralModel(nn.Module):
    def __init__(self, encoded_space_dim):
        super().__init__()
        self.alexnet = AlexNetPT(encoded_space_dim)
        self.mlp = MLP(encoded_space_dim)

    def forward(self, img_tensor, joints):
        x_des = self.alexnet(img_tensor)
        x_des = torch.squeeze(x_des, 0)
        mlp_inp = x_des - joints
        joint_pred = self.mlp(mlp_inp.float())

        return joint_pred

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("device:", device)

model = GeneralModel(encoded_space_dim=7)
m = model.to(device)
# print the number of parameters in the model
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
print(model)

device: cuda


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:00<00:00, 258MB/s]


12.980046 M parameters
GeneralModel(
  (alexnet): AlexNetPT(
    (feature_extractor): Sequential(
      (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (encoder_lin): Sequential(
      (0): Li

In [None]:
epochs = 1000
learning_rate = 3e-4

In [None]:
all_indices = list(range(1, 71))
random.shuffle(all_indices)

train_indices = all_indices[:65]
test_indices = all_indices[65:]
print(train_indices)
print(test_indices)

[69, 23, 29, 55, 56, 3, 18, 66, 38, 51, 24, 25, 36, 21, 54, 26, 9, 1, 43, 5, 4, 44, 30, 10, 45, 50, 16, 59, 8, 39, 46, 7, 19, 70, 42, 15, 41, 33, 34, 22, 13, 2, 62, 58, 27, 40, 60, 12, 49, 68, 32, 65, 48, 6, 28, 61, 37, 11, 64, 67, 31, 14, 52, 35, 20]
[17, 57, 47, 53, 63]


In [None]:
transform = transforms.Compose([
                transforms.Resize([224, 224]), # Resizing the image as the VGG only take 224 x 244 as input size
                # transforms.RandomHorizontalFlip(), # Flip the data horizontally
                transforms.ToTensor(),
                # transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
            ])

In [None]:
trajectories = np.load("./data/trajectory.npy")
trajectories.shape

(70, 50, 7)

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def run_test():
    random.shuffle(test_indices)
    for idx in test_indices:
        trajectory = trajectories[idx-1, :, :]
        img = Image.open(f"./data/torobo_eyes_view_{idx}.jpeg")
        img_tensor = transform(img)
        img_tensor = torch.unsqueeze(img_tensor, 0)
        img_tensor = img_tensor.to(device)

        model.eval()
        traj_loss = 0
        randint = random.randint(0, trajectory.shape[0]-2)
        for k in range(trajectory.shape[0]-1):
            joints = torch.tensor(trajectory[k, :], dtype=torch.float, device=device)
            next_joints = torch.tensor(trajectory[k+1, :], dtype=torch.float, device=device)

            with torch.no_grad():
                joints_pred = model(img_tensor, joints)
                loss = criterion(joints_pred, next_joints)
                traj_loss += loss.item()

                if k == randint:
                    print("joints_pred", joints_pred)
                    print("next_joints", next_joints)

        model.train()
        print(f"for index: {idx}, loss: {traj_loss}")


In [None]:
for i in range(epochs):
    random.shuffle(train_indices)
    model.train()
    for idx in train_indices:
        trajectory = trajectories[idx-1, :, :]
        img = Image.open(f"./data/torobo_eyes_view_{idx}.jpeg")
        img_tensor = transform(img)
        img_tensor = torch.unsqueeze(img_tensor, 0)
        img_tensor = img_tensor.to(device)

        for k in range(trajectory.shape[0]-1):
            joints = torch.tensor(trajectory[k, :], dtype=torch.float, device=device)
            next_joints = torch.tensor(trajectory[k+1, :], dtype=torch.float, device=device)

            optimizer.zero_grad()
            joints_pred = model(img_tensor, joints)
            loss = criterion(joints_pred, next_joints)
            loss.backward()
            optimizer.step()

    if i%100 == 0:
        print(f"finished epoch {i}")
        run_test()
        print("==========")
        torch.save(model.state_dict(), f"/content/drive/MyDrive/research/feedback_controller/weights/fbc_{i}.pth")

finished epoch 0
joints_pred tensor([ 1.5917,  0.7077, -0.2234,  1.3307,  1.7102, -0.4964,  0.9751],
       device='cuda:0')
next_joints tensor([ 1.5863,  0.6760, -0.1626,  1.4063,  1.6808, -0.5013,  1.0493],
       device='cuda:0')
for index: 57, loss: 0.41068867268040776
joints_pred tensor([ 1.2300,  0.6766,  0.2012,  1.1387,  1.3206, -0.3674,  1.0601],
       device='cuda:0')
next_joints tensor([ 1.1846,  0.7332,  0.4262,  1.2966,  1.2734, -0.3981,  1.2745],
       device='cuda:0')
for index: 17, loss: 1.3057027403265238
joints_pred tensor([ 1.6924,  0.7404, -0.2913,  1.4037,  1.8099, -0.5100,  0.9859],
       device='cuda:0')
next_joints tensor([ 1.6706,  0.4542, -0.3057,  1.5329,  1.7138, -0.3937,  0.9831],
       device='cuda:0')
for index: 63, loss: 0.328197137627285
joints_pred tensor([ 1.5581,  0.6916, -0.1963,  1.3096,  1.6694, -0.4800,  0.9672],
       device='cuda:0')
next_joints tensor([ 1.5433,  0.5841, -0.1140,  1.3779,  1.6318, -0.3848,  1.0700],
       device='cuda:0')