## Capstone Project Carlos Figueroa


Load the packages

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sun Dec 11 18:08:09 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   62C    P0    30W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
import cv2
import os
import numpy as np
import pickle as pkl
import torch
from torchvision.utils import save_image

Function to translate data


In [None]:
class LazyLoadDataset(Dataset):
    def __init__(self,path,train=True,transform=None):
        self.transform=transform
        path=path+("train/" if train else "test/")
        
        self.pathX=path+"X/"
        self.pathY=path+"Y/"
        
        self.data=os.listdir(self.pathX)
        
    def __getitem__(self,idx):
        f=self.data[idx]
        
        #X
        #read rgb images
        img0=cv2.imread(self.pathX+f+"/rgb/0.png")
        img1=cv2.imread(self.pathX+f+"/rgb/1.png")
        img2=cv2.imread(self.pathX+f+"/rgb/2.png")
        if self.transform is not None:
            img0=self.transform(img0)
            img1=self.transform(img1)
            img2=self.transform(img2)
        #read image depth
        depth=np.load(self.pathX+f+"/depth.npy")
        #read field ID
        field_id=pkl.load(open(self.pathX+f+"/field_id.pkl","rb"))
        #Y
        Y=np.load(self.pathY+f+".npy")
        
        #normalize rgb 0-255
        img0=cv2.normalize(img0, None, alpha=0, beta=1,
                             norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        img1=cv2.normalize(img1, None, alpha=0, beta=1,
                             norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        img2=cv2.normalize(img2, None, alpha=0, beta=1,
                             norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        depth=cv2.normalize(depth/1000, None, alpha=0, beta=1,
                             norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)   
        return (img0,img1,img2,depth,field_id),Y
    def __len__(self):
        return len(self.data)

Upload data from Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train_dataset=LazyLoadDataset("/content/drive/MyDrive/lazydata/")
#train_dataset=LazyLoadDataset("/kaggle/input/final-project-lazyload-data/lazydata/")

train_dataset.__len__()

3396

In [None]:
#we add it to a dataloader for our model
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

We check dimensions


In [None]:
(img0, img1, img2, depth, field_id), Y = train_dataset[0]
img0.shape, img1.shape, img2.shape, depth.shape

((224, 224, 3), (224, 224, 3), (224, 224, 3), (3, 224, 224))

Lets make sure we mark our device to cuda


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sun Dec 11 17:57:28 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P8     9W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Train function



In [None]:
#create a helper function to calculate Root Mean Square Error for the competition

class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))


def train(epoch, model, optimizer, permute_pixels=None, permutation_order=None):
    """
    Train the model for one epoch
    Args:
        epoch (int): current epoch
        model (nn.Module): model to train
        optimizer (torch.optim): optimizer to use
        permute_pixels (function): function to permute the pixels (default: None)
        permutation_order (1D torch array): order of the permutation (default: None)
    """

    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):


        #shape of data is [64,224,224,9] target is [64,12]
        #now, we want to include depth as well, so we will have to make some tricks
        #reshape data from [64,224,224,9] to [64,9,224,224]

        #lets change some things inside-----------------------------------
        RGBs=torch.stack((data[0][:,:,:,0],data[0][:,:,:,1],data[0][:,:,:,2],
                          data[1][:,:,:,0],data[1][:,:,:,1],data[1][:,:,:,2],
                          data[2][:,:,:,0],data[2][:,:,:,1],data[2][:,:,:,2]),1)
        #concate depth
        data=torch.cat((RGBs,data[3]), 1)

        #now data is [64,12,224,224], which matches with the target [64,12], and has the dim 224x224 per image

        #------------------------------------------------------------------

        data, target = data.to(device), target.to(device)
        
        #if we want to permute pixels
        if permute_pixels is not None:
            data = permute_pixels(data.squeeze(),permutation_order)

        output = model(data)
        rmse_loss = RMSELoss()
        loss = rmse_loss(output.float(), target.float())

        optimizer.zero_grad()#reset gradients
        loss.backward()#calculate gradients
        optimizer.step()#update 

        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

# CNN Arquitecture

In [None]:
class CNN(nn.Module):
    def __init__(self, input_size, conv_feature, fc_feature, output_size):
        super(CNN, self).__init__()

        #first do convolutional layers
        self.cnn_layers = nn.Sequential(
            nn.Conv2d(in_channels=12, out_channels=conv_feature, kernel_size=50), #[12,224,224]-->[72,174,174], stride default value is 1
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=20,stride=14), #20*20 shape of kernel, stride Default value is kernel_size.[72,174,174]--->[72,12,12]
            nn.Conv2d(in_channels=conv_feature, out_channels=conv_feature, kernel_size=5), #[72,12,12]--->[72,8,8]
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), #[72,8,8]-->[72,4,4]
            )
        
        #then linear layers
        self.linear_layers=nn.Sequential(
            nn.Linear(conv_feature*4*4, fc_feature),
            nn.ReLU(),
            #nn.Dropout(0.2),
            nn.Linear(fc_feature, output_size))
        
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        result = self.linear_layers(x)
        return result

 # Train and see loss
 

In [None]:
# Training settings 
conv_features = 72 # number of feature maps used to be 72
fc_features = 50
output_size = 12

#regular
model_cnn = CNN(224*224,conv_features,fc_features,output_size) # create CNN model
model_cnn.to(device)

optimizer = torch.optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.9) # use SGD with learning rate 0.01 and momentum 0.5
# print('Number of parameters: {}'.format(get_n_params(model_cnn)))


test_accuracy = []
for epoch in range(0, 1):
    train(epoch, model_cnn, optimizer)

    
#     test_accuracy.append(test(model_cnn))

KeyboardInterrupt: ignored

   # Now, lets model predictions for test and submit

In [None]:
class LazyLoadDataset(Dataset):
    def __init__(self,path,train=True,transform=None):
        self.transform=transform
        path=path+("train/" if train else "test/")
        
        self.pathX=path+"X/"

        self.pathY=path+"Y/"
        
        self.data=os.listdir(self.pathX)
        
    def __getitem__(self,idx):
        f=self.data[idx]
        
        #X
        #read rgb images
        img0=cv2.imread(self.pathX+f+"/rgb/0.png")
        img1=cv2.imread(self.pathX+f+"/rgb/1.png")
        img2=cv2.imread(self.pathX+f+"/rgb/2.png")
        if self.transform is not None:
            img0=self.transform(img0)
            img1=self.transform(img1)
            img2=self.transform(img2)
        #read image depth
        depth=np.load(self.pathX+f+"/depth.npy")
        #read field ID
        field_id=pkl.load(open(self.pathX+f+"/field_id.pkl","rb"))
        #Y
        if train is True:
          Y=np.load(self.pathY+f+".npy")
        
        #normalize rgb 0-255
        img0=cv2.normalize(img0, None, alpha=0, beta=1,
                             norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        img1=cv2.normalize(img1, None, alpha=0, beta=1,
                             norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        img2=cv2.normalize(img2, None, alpha=0, beta=1,
                             norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        depth=cv2.normalize(depth/1000, None, alpha=0, beta=1,
                             norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
        if train is True:
            return (img0,img1,img2,depth,field_id),Y
        else: 
          return (img0,img1,img2,depth,field_id)

    def __len__(self):
        return len(self.data)

In [None]:
test_dataset = LazyLoadDataset("/content/drive/MyDrive/lazydata/", train = False)

test_dataset.__len__()

849

In [None]:
(img0, img1, img2, depth, field_id) = test_dataset[0]
img0.shape, img1.shape, img2.shape, depth.shape
#no Y being unpacked

((224, 224, 3), (224, 224, 3), (224, 224, 3), (3, 224, 224))

In [None]:
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
test_data = torch.load(test_loader)

AttributeError: ignored

In [None]:
torch.save(model_cnn.state_dict(), "model_cnn_1.pt")

In [None]:
import pickle
import pandas as pd

outfile = 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []


test_dataset=LazyLoadDataset("/content/drive/MyDrive/lazydata/", train = False)

test_data = torch.load(test_dataset)
file_ids = test_data[-1]
rgb_data = test_data[0]
model.eval()

for i, data in enumerate(rgb_data):
    # Please remember to modify this loop, input and output based on your model/architecture
    output = model(data[:1, :, :, :].to('cuda'))
    preds.append(output[0].cpu().detach().numpy())

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

In [None]:
preds = []
for data in split_test:
    # Please remember to modify this loop, input and output based on your model/architecture
    output = model_cnn(data.to('cuda'))
    preds.append(output.cpu().detach().numpy())

In [None]:
import pickle
import pandas as pd
# model=model_cnn
outfile = 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']

# test_data = torch.load('/kaggle/input/csci-ua-473-intro-to-machine-learning-fall22/test/test/testX.pt')
# file_ids = test_data[-1]
# depth_data=test_data[1]
# rgb_data = test_data[0]
# model.eval()
# new_test=torch.cat((rgb_data[:,0],rgb_data[:,1],rgb_data[:,2],depth_data),dim=1)
# split_test=torch.split(new_test,100,dim=0)
# for data in split_test:
#     # Please remember to modify this loop, input and output based on your model/architecture
#     output = model(data.to('cuda'))
#     preds.append(output[0].cpu().detach().numpy())

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(np.concatenate(preds)/100)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

NameError: ignored

In [None]:
import pickle
import pandas as pd

outfile = 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []


test_dataset=LazyLoadDataset("/content/drive/MyDrive/lazydata/", train = False)

test_data = torch.load(test_dataset)
file_ids = test_data[-1]
rgb_data = test_data[0]
model.eval()

for i, data in enumerate(rgb_data):
    # Please remember to modify this loop, input and output based on your model/architecture
    output = model(data[:1, :, :, :].to('cuda'))
    preds.append(output[0].cpu().detach().numpy())

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

FileNotFoundError: ignored

In [None]:
torch.save(model_cnn.state_dict(), "model_cnn_1.pt")

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Training settings 
conv_features = 72 # number of feature maps
fc_features = 50
output_size = 12
model_cnn = CNN(224*224,conv_features,fc_features,output_size) # create CNN model
model_cnn.to(device)
model = model_cnn
model.load_state_dict(torch.load('/kaggle/input/cnn-model-1/model_cnn_1.pt'))
model.eval()

FileNotFoundError: ignored

In [None]:
test_dataset = LazyLoadDataset("/content/drive/MyDrive/lazydata/", train=False)
test_dataset.__len__()

849

In [None]:
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
test_dataset[0]

FileNotFoundError: ignored

In [None]:
torch.cat((rgb_data[:,0],rgb_data[:,1],rgb_data[:,2],depth_data),dim=1).shape

In [None]:
new_test=torch.cat((rgb_data[:,0],rgb_data[:,1],rgb_data[:,2],depth_data),dim=1)

In [None]:
split_test=torch.split(new_test,50,dim=0)
len(split_test)

In [None]:
plt.imshow(new_test[0,1,:,:])

In [None]:
split_test[0].shape

In [None]:
preds = []
for data in split_test:
    # Please remember to modify this loop, input and output based on your model/architecture
    output = model_cnn(data.to('cuda'))
    preds.append(output.cpu().detach().numpy())

In [None]:
train_loader.dataset[0][1]

In [None]:
np.concatenate(preds)/1000

In [None]:
np.concatenate(preds).shape