In [1]:
import cv2
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
import torchvision.models as m

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# Create my own image dataset class
class MyImageDataset:
    def __init__(self, train=True, transform=None):
        self.transform = transform
        if train:
            self.pathX = "./lazydata/train/X"
        else:
            self.pathX = "./lazydata/test/X"
        self.pathY = "./lazydata/train/Y"
        self.data=os.listdir(self.pathX)
        self.data=[f for f in self.data if f != ".DS_Store"]
    def __getitem__(self, idx):
        path = os.path.join(self.pathX, str(idx))
        image1 = cv2.imread(os.path.join(path, "rgb/0.png"))
        image2 = cv2.imread(os.path.join(path, "rgb/1.png"))
        image3 = cv2.imread(os.path.join(path, "rgb/2.png"))
        depth = np.load(os.path.join(path, "depth.npy"))
        Y = np.load(os.path.join(self.pathY, str(idx)+".npy"))
        Y *= 1000
        if self.transform:
            image1 = self.transform(image1)
            image2 = self.transform(image2)
            image3 = self.transform(image3)
        
        return (image1, image2, image3, depth), Y
    def __len__(self):
        return len(self.data)

In [4]:
MyImageTransformations = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(3),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([111.0820, 117.8825, 123.7023], 
                           [60.2689, 56.3253, 56.8279])
])

In [6]:
def train(epoch, model, optimizer):
    model.train()
    loss_value = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data=torch.cat((data[0],data[1],data[2],data[3]), 1)
        data, target = data.to(device), target.to(device)

        model = model.to(device)
        output = model(data)
        
        mse_loss = nn.MSELoss()
        loss = mse_loss(output.float(), target.float())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_value += loss/149
        if batch_idx == 148:
          print("Epoch value: {} => Loss value: {}".format(epoch,l))
          loss_value = 0

In [None]:
train_dataset = MyImageDataset(True, transform = MyImageTransformations)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)

In [8]:
model = m.resnet50(weights=m.ResNet50_Weights.DEFAULT)
model.eval()
model.float()
model.fc = nn.Linear(2048, 12)
model.conv1 = nn.Conv2d(12, 64, kernel_size=7, stride=2, padding=3, bias=False)# with torch.no_grad():

model = model.to(device)

optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01, momentum=0.9)
for epoch in range(0, 50):
    train(epoch, model, optimizer) 


epoch = 0, 639.6286010742188
epoch = 1, 218.47723388671875
epoch = 2, 127.41681671142578
epoch = 3, 94.72064971923828
epoch = 4, 79.39344024658203
epoch = 5, 59.46860885620117
epoch = 6, 48.45988082885742
epoch = 7, 37.61295700073242
epoch = 8, 27.82272720336914
epoch = 9, 25.170499801635742
epoch = 10, 21.75239372253418
epoch = 11, 19.37546730041504
epoch = 12, 16.516271591186523
epoch = 13, 15.755706787109375
epoch = 14, 14.32571029663086
epoch = 15, 14.539135932922363
epoch = 16, 12.382811546325684
epoch = 17, 11.581938743591309
epoch = 18, 11.203937530517578
epoch = 19, 10.271881103515625
epoch = 20, 11.463558197021484
epoch = 21, 9.791245460510254
epoch = 22, 8.711827278137207
epoch = 23, 8.861493110656738
epoch = 24, 8.074983596801758
epoch = 25, 7.95707893371582
epoch = 26, 8.174131393432617
epoch = 27, 7.670015335083008
epoch = 28, 6.956993579864502
epoch = 29, 6.6521711349487305
epoch = 30, 6.716496467590332
epoch = 31, 7.025938034057617
epoch = 32, 6.501819610595703
epoch = 3

In [11]:
for epoch in range(50, 70):
    train(epoch, model, optimizer) 

epoch = 50, 3.1815264225006104
epoch = 51, 4.088736057281494
epoch = 52, 3.0571136474609375
epoch = 53, 2.9833199977874756
epoch = 54, 2.7854251861572266
epoch = 55, 2.897265672683716
epoch = 56, 3.195817470550537
epoch = 57, 2.8644704818725586
epoch = 58, 2.8478622436523438
epoch = 59, 2.7104384899139404
epoch = 60, 2.337191581726074
epoch = 61, 2.7722926139831543
epoch = 62, 2.6405677795410156
epoch = 63, 2.508108377456665
epoch = 64, 2.470583200454712
epoch = 65, 2.5284721851348877
epoch = 66, 2.841566562652588
epoch = 67, 2.3423995971679688
epoch = 68, 2.0501086711883545
epoch = 69, 1.8179773092269897


In [12]:
torch.save(model.state_dict(), "model_resnet.pt")

In [13]:
outfile = 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []

t_data = torch.load('./csci-ua-473-intro-to-machine-learning-fall22/test/test/testX.pt')
file_ids = t_data[-1]
model.eval()
t_dataset = MyImageDataset(False, transform = MyImageTransformations)
t_loader = torch.utils.data.DataLoader(t_dataset, batch_size=1, shuffle=False)

for batch_idx, (data, target) in enumerate(t_loader):
      data=torch.cat((data[0],data[1],data[2],data[3]), 1)
      data, target = data.to(device), target.to(device)

      output = model(data)/1000
      preds.append(output[0].cpu().detach().numpy())

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

Written to csv file submission.csv
