In [2]:
import os
import numpy as np
import pickle as pkl
import torch
from torchvision.utils import save_image

os.chdir('/kaggle/input/csci-ua-473-intro-to-machine-learning-fall22/')

trainx = torch.load('train/train/trainX.pt')
trainy = torch.load('train/train/trainY.pt')
testx = torch.load('test/test/testX.pt')

num_train = trainx[0].shape[0]
num_test = testx[0].shape[0]

os.chdir('/kaggle/working')

os.makedirs('./lazydata', exist_ok=True)

# Save train data
os.makedirs('./lazydata/train', exist_ok=True)
os.makedirs('./lazydata/train/X', exist_ok=True)
os.makedirs('./lazydata/train/Y', exist_ok=True)
for i in range(num_train):
    os.makedirs('./lazydata/train/X/{}'.format(i), exist_ok=True)
    # rgb
    os.makedirs('./lazydata/train/X/{}/rgb'.format(i), exist_ok=True)
    for j in range(3):
        save_image(trainx[0][i][j]/255, './lazydata/train/X/{}/rgb/{}.png'.format(i, j))
    # depth
    depth = trainx[1][i].numpy()
    np.save('./lazydata/train/X/{}/depth.npy'.format(i), depth)
    # field id
    pkl.dump(trainx[2][i], open('./lazydata/train/X/{}/field_id.pkl'.format(i), 'wb'))

    y = trainy[0][i].numpy()
    np.save('./lazydata/train/Y/{}.npy'.format(i), y)
print("Saved train data")

# Save test data
os.makedirs('./lazydata/test', exist_ok=True)
os.makedirs('./lazydata/test/X', exist_ok=True)
for i in range(num_test):
    os.makedirs('./lazydata/test/X/{}'.format(i), exist_ok=True)
    # rgb
    os.makedirs('./lazydata/test/X/{}/rgb'.format(i), exist_ok=True)
    for j in range(3):
        save_image(testx[0][i][j]/255, './lazydata/test/X/{}/rgb/{}.png'.format(i, j))
    # depth
    depth = testx[1][i].numpy()
    np.save('./lazydata/test/X/{}/depth.npy'.format(i), depth)
    # field id
    pkl.dump(testx[2][i], open('./lazydata/test/X/{}/field_i.pkl'.format(i), 'wb'))

print("Saved test data")

Saved train data
Saved test data


In [6]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pickle as pkl
import torch
import pandas as pd
import os
from torch.utils.data import DataLoader
from torchvision.io import read_image
import torchvision

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
FT = torch.cuda.FloatTensor if device == "cuda:0" else torch.FloatTensor

In [7]:
class RobotDataset(torch.utils.data.Dataset):
    def __init__(self, img_dir, train = True):
        with os.scandir(img_dir + '/X') as it:
            num_images = len(list(it))
        self.num_samples = num_images
        self.img_dir = img_dir
        self.train = train

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        base_path = os.path.join(self.img_dir, 'X', str(idx))
        image0 = read_image(os.path.join(base_path, 'rgb', '0.png')).type(torch.float32)/255
        image1 = read_image(os.path.join(base_path, 'rgb', '1.png')).type(torch.float32)/255
        image2 = read_image(os.path.join(base_path, 'rgb', '2.png')).type(torch.float32)/255
        inorm = torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        depth = torch.from_numpy(np.load(os.path.join(base_path, 'depth.npy')) / 1000)
        dnorm = torchvision.transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2))
        twelved = torch.cat((inorm(image0), inorm(image1), inorm(image2), dnorm(depth)), dim=0)
        
        if self.train:
            label = FT(np.load(os.path.join(self.img_dir, 'Y', f'{idx}.npy')))
            return twelved, label
        else:
            with open(os.path.join(base_path, 'field_id.pkl'), 'rb') as picklefile:
                fid = int(pkl.load(picklefile))

            return twelved, fid

In [5]:
train_dataset = RobotDataset('/kaggle/working/lazydata/train')
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=0)

In [6]:
resnet_image_cnn = torchvision.models.resnet152(pretrained=True)
weight_store = resnet_image_cnn.conv1.weight.clone()
resnet_image_cnn.conv1 = nn.Conv2d(12, 64, kernel_size=7, stride=2, padding=3, bias=False)
resnet_image_cnn.conv1.weight[:, :3].data = weight_store.clone()
resnet_image_cnn.conv1.weight[:, 3:6].data = weight_store.clone()
resnet_image_cnn.conv1.weight[:, 6:9].data = weight_store.clone()
resnet_image_cnn.conv1.weight[:, 9].data = torch.randn(len(resnet_image_cnn.conv1.weight[:, 9]))
resnet_image_cnn.conv1.weight[:, 10].data = torch.randn(len(resnet_image_cnn.conv1.weight[:, 10]))
resnet_image_cnn.conv1.weight[:, 11].data = torch.randn(len(resnet_image_cnn.conv1.weight[:, 11]))
resnet_image_cnn.fc = nn.Linear(2048, 512)

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth


  0%|          | 0.00/230M [00:00<?, ?B/s]

In [7]:
criterion = nn.MSELoss()

def train(epoch, model, optimizer):
    """
    Train the model for one epoch

    Args:
        epoch (int): current epoch
        model (nn.Module): model to train
        optimizer (torch.optim): optimizer to use
    """
    model.train()
    for batch_idx, (data, target) in enumerate(train_dataloader):
        # send to device
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model.forward(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_dataloader.dataset),
                100. * batch_idx / len(train_dataloader), loss.item()))

In [8]:
cnn = nn.Sequential(resnet_image_cnn, nn.ReLU(), nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 12), nn.Tanh())
cnn.to(device)
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.00001)

for epoch in range(0, 15):
    train(epoch, cnn, optimizer)



KeyboardInterrupt: 

In [8]:
test_dataset = RobotDataset('/kaggle/working/lazydata/test', train=False)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

In [9]:
outfile = 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']

preds = []
file_ids = []

for data, fid in test_dataloader:
    output = cnn.forward(data)
    preds.append(output[0].cpu().detach().numpy())
    file_ids.append(int(fid))

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

output_file.close()

NameError: name 'cnn' is not defined