# 1. Preprocessing

## 1.1 Connect Kaggle API and download data

In [1]:
! pip install -q kaggle

In [2]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"garyboyuanzhang","key":"b845c99f0424c592b63d7a6e642c062c"}'}

In [3]:
! mkdir ~/.kaggle

In [4]:
! cp kaggle.json ~/.kaggle/

In [5]:
! chmod 600 ~/.kaggle/kaggle.json

In [6]:
!kaggle competitions download -c csci-ua-473-intro-to-machine-learning-fall22

Downloading csci-ua-473-intro-to-machine-learning-fall22.zip to /content
100% 2.07G/2.08G [01:07<00:00, 25.0MB/s]
100% 2.08G/2.08G [01:08<00:00, 32.8MB/s]


In [7]:
!unzip csci-ua-473-intro-to-machine-learning-fall22.zip

Archive:  csci-ua-473-intro-to-machine-learning-fall22.zip
  inflating: test/test/testX.pt      
  inflating: train/train/trainX.pt   
  inflating: train/train/trainY.pt   


In [8]:
import pandas as pd
import numpy as np
import torch
import tensorflow as tf

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

## 1.2 load and scale data

In [10]:
# load and scale y
y = torch.load('train/train/trainY.pt')[0]
y = y.numpy() * 1000
y

array([[ 26.67237357,  54.55597747, 129.87243632, ...,  55.47796319,
         10.84437332, -68.37046632],
       [ 29.26601235,  54.32850773, 128.39585076, ...,  53.14588588,
         26.62756708, -62.98823166],
       [ 45.87444124,  53.61158033, 123.32549963, ...,  49.77022714,
         72.57992628, -20.40969572],
       ...,
       [ 59.83600325,  52.36634036, 112.88869288, ...,  43.63148558,
         39.60958927, -40.23066632],
       [ 32.31255324,  54.48886567, 127.61630379, ...,  38.7364277 ,
         40.13385551, -30.04604118],
       [ 32.87386182,  54.27227643, 128.33969611, ...,  87.54277821,
         46.93831983, -51.7001559 ]])

In [None]:
# load and split X
train_X = torch.load('train/train/trainX.pt')
rgb_images = train_X[0]
depth_images = train_X[1]
file_ids = train_X[2]

print(rgb_images.shape)
print(depth_images.shape)
print(len(file_ids))

## 1.3 transform data

In [12]:
# scale and transform rgb image
def image_preprocessed(image):
    im = image.numpy()
    im = im / float(255)
    im = im.transpose(1, 0, 2, 3)
    return im

# scale and transform depth image
def depth_preprocessed(depth):
    dp = depth.numpy()
    dp = dp / float(1000)
    dp = dp.reshape(1, 3, 224, 224)
    dp = dp.transpose(1, 0, 2, 3)
    return dp

# add depth image as extra channel into rgb image
# so there would be 4 channels for an image in total
def get_preprocessed(image, depth):
    image = image_preprocessed(image)
    depth = depth_preprocessed(depth)
    
    return np.concatenate([image, depth], axis = 1)

In [13]:
# scale and transform X
l = []
for i in range(len(file_ids)):
    l.append(get_preprocessed(rgb_images[i], depth_images[i]))

In [14]:
# convert 3 different camera views into channels as well
# so there would be 3*4 = 12 channels for an image
X = np.array(l).reshape(3396, 12, 224, 224).transpose(0, 2, 3, 1)

In [34]:
X.shape

(3396, 224, 224, 12)

In [16]:
y.shape

(3396, 12)

## 1.4 split data

In [15]:
# split into training and validation dataset
from sklearn.model_selection import train_test_split

train_X, test_X, train_y, test_y = train_test_split(X, y,
                                                    test_size=0.3, 
                                                    random_state=2058)

## 1.5 normalization

In [16]:
# before normalization
print(train_X.mean(), train_X.std())
print(test_X.mean(), test_X.std())

0.57388437 0.48828858
0.573858 0.4877441


In [17]:
from tensorflow import keras

norm_layer = keras.layers.Normalization()
norm_layer.adapt(train_X)
train_X = norm_layer(train_X).numpy()
test_X = norm_layer(test_X).numpy()

In [18]:
# after normalization
print(train_X.mean(), train_X.std())
print(test_X.mean(), test_X.std())

-1.3828827e-06 1.000001
0.0009174339 0.9996589


# 2. Model

## 2.1 base model

In [15]:
# load base model
from keras.applications import ResNet50
resnet50 = ResNet50(include_top=False, weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [16]:
# Get Resnet config in dictionary format
config = resnet50.get_config()
config

{'name': 'resnet50',
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, None, None, 3),
    'dtype': 'float32',
    'sparse': False,
    'ragged': False,
    'name': 'input_1'},
   'name': 'input_1',
   'inbound_nodes': []},
  {'class_name': 'ZeroPadding2D',
   'config': {'name': 'conv1_pad',
    'trainable': True,
    'dtype': 'float32',
    'padding': ((3, 3), (3, 3)),
    'data_format': 'channels_last'},
   'name': 'conv1_pad',
   'inbound_nodes': [[['input_1', 0, 0, {}]]]},
  {'class_name': 'Conv2D',
   'config': {'name': 'conv1_conv',
    'trainable': True,
    'dtype': 'float32',
    'filters': 64,
    'kernel_size': (7, 7),
    'strides': (2, 2),
    'padding': 'valid',
    'data_format': 'channels_last',
    'dilation_rate': (1, 1),
    'groups': 1,
    'activation': 'linear',
    'use_bias': True,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'bias_initializer': {'class_name': 'Zeros', 'config': {}

In [17]:
# change input layer shape to accomodate 12 channels
config["layers"][0]["config"]["batch_input_shape"] = (None, 224, 224, 12)

In [18]:
# build base model
base_model = tf.keras.models.Model.from_config(config)

In [19]:
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 1  0           []                               
                                2)]                                                               
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 12  0           ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  37696       ['conv1_pad[0][0]']              
                                )                                                          

## 2.2 build model

In [20]:
# model 2
from tensorflow import keras

base_model.trainable = True

model2 = keras.Sequential()
model2.add(base_model)
model2.add(keras.layers.Flatten())
model2.add(keras.layers.Dense(12))

# 3. Training

## 3.1 prepare data

In [19]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_y))
validation_dataset = tf.data.Dataset.from_tensor_slices((test_X, test_y))

In [20]:
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000

train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_dataset = validation_dataset.batch(BATCH_SIZE)

## 3.2 compile model

In [None]:
model2.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss=keras.losses.MeanSquaredError(),
              metrics=[keras.metrics.RootMeanSquaredError()])

In [27]:
loss0, accuracy0 = model2.evaluate(validation_dataset)



In [28]:
model2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23615936  
                                                                 
 flatten (Flatten)           (None, 100352)            0         
                                                                 
 dense (Dense)               (None, 12)                1204236   
                                                                 
Total params: 24,820,172
Trainable params: 24,767,052
Non-trainable params: 53,120
_________________________________________________________________


## 3.3 Learning rate step decay

In [None]:
initial_lr = 0.001
def lr_step_decay(epoch, lr):
    gamma = 0.1
    step_size = 13
    return initial_lr * math.pow(gamma, math.floor(epoch/step_size))

## 3.4 train model

In [51]:
history2 = model2.fit(
    train_dataset, 
    epochs=52, 
    batch_size=BATCH_SIZE,
    callbacks=[LearningRateScheduler(lr_step_decay, verbose=1)],
    validation_data=validation_dataset
)


Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/52

Epoch 2: LearningRateScheduler setting learning rate to 0.001.
Epoch 2/52

Epoch 3: LearningRateScheduler setting learning rate to 0.001.
Epoch 3/52

Epoch 4: LearningRateScheduler setting learning rate to 0.001.
Epoch 4/52

Epoch 5: LearningRateScheduler setting learning rate to 0.001.
Epoch 5/52

Epoch 6: LearningRateScheduler setting learning rate to 0.001.
Epoch 6/52

Epoch 7: LearningRateScheduler setting learning rate to 0.001.
Epoch 7/52

Epoch 8: LearningRateScheduler setting learning rate to 0.001.
Epoch 8/52

Epoch 9: LearningRateScheduler setting learning rate to 0.001.
Epoch 9/52

Epoch 10: LearningRateScheduler setting learning rate to 0.001.
Epoch 10/52

Epoch 11: LearningRateScheduler setting learning rate to 0.001.
Epoch 11/52

Epoch 12: LearningRateScheduler setting learning rate to 0.001.
Epoch 12/52

Epoch 13: LearningRateScheduler setting learning rate to 0.001.
Epoch 13/52

Epoch 14: Learni

## 3.5 Retrain final model using all the data

In [21]:
norm_layer = keras.layers.Normalization()
norm_layer.adapt(X)
X = norm_layer(X)

In [23]:
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000

dataset = tf.data.Dataset.from_tensor_slices((X, y))
dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)

In [25]:
from keras.callbacks import LearningRateScheduler
import math

initial_lr = 0.001
def lr_step_decay(epoch, lr):
    gamma = 0.1
    step_size = 13
    return initial_lr * math.pow(gamma, math.floor(epoch/step_size))

model2.fit(
    dataset, 
    epochs=52, 
    batch_size=BATCH_SIZE,
    callbacks=[LearningRateScheduler(lr_step_decay, verbose=1)]
)


Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/52

Epoch 2: LearningRateScheduler setting learning rate to 0.001.
Epoch 2/52

Epoch 3: LearningRateScheduler setting learning rate to 0.001.
Epoch 3/52

Epoch 4: LearningRateScheduler setting learning rate to 0.001.
Epoch 4/52

Epoch 5: LearningRateScheduler setting learning rate to 0.001.
Epoch 5/52

Epoch 6: LearningRateScheduler setting learning rate to 0.001.
Epoch 6/52

Epoch 7: LearningRateScheduler setting learning rate to 0.001.
Epoch 7/52

Epoch 8: LearningRateScheduler setting learning rate to 0.001.
Epoch 8/52

Epoch 9: LearningRateScheduler setting learning rate to 0.001.
Epoch 9/52

Epoch 10: LearningRateScheduler setting learning rate to 0.001.
Epoch 10/52

Epoch 11: LearningRateScheduler setting learning rate to 0.001.
Epoch 11/52

Epoch 12: LearningRateScheduler setting learning rate to 0.001.
Epoch 12/52

Epoch 13: LearningRateScheduler setting learning rate to 0.001.
Epoch 13/52

Epoch 14: Learni

<keras.callbacks.History at 0x7f9a186c0c40>

# 4. Prediction

## 4.1 load testing data

In [26]:
test_X = torch.load('test/test/testX.pt')
rgb_images = test_X[0]
depth_images = test_X[1]
file_ids = test_X[2]

In [27]:
len(file_ids)

849

## 4.2 Scale and Transform testing data

In [32]:
tl = []
for i in range(len(file_ids)):
    tl.append(get_preprocessed(rgb_images[i], depth_images[i]))

test_X = np.array(tl).reshape(849, 12, 224, 224).transpose(0, 2, 3, 1)

## 4.3 Normalize testing data

In [33]:
test_X = norm_layer(test_X).numpy()

## 4.4 get prediction from model

In [34]:
y_pred = model2.predict(test_X, batch_size=BATCH_SIZE)



In [35]:
y_pred.shape

(849, 12)

## 4.5 rescale label

In [40]:
y_pred = y_pred / 1000

In [41]:
pd.DataFrame(y_pred)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.039493,0.054277,0.126014,0.039752,-0.001499,0.123319,0.059520,-0.050252,0.108432,0.051574,0.013038,-0.051114
1,0.020071,0.055157,0.132240,0.031156,-0.000552,0.131319,0.034629,-0.054509,0.126155,0.048779,0.032070,-0.048454
2,0.032074,0.054493,0.128659,0.077874,-0.002546,0.106465,0.031909,-0.034766,-0.034405,0.075244,0.034032,-0.077604
3,0.031188,0.054727,0.128840,0.060682,-0.002511,0.112128,0.068417,-0.043378,0.050957,0.054492,0.025609,-0.065872
4,0.058342,0.051282,0.116719,0.076537,-0.002574,0.102759,0.074428,-0.036909,0.027639,0.058093,0.027018,-0.028085
...,...,...,...,...,...,...,...,...,...,...,...,...
844,0.039803,0.054103,0.124713,0.077398,-0.003526,0.090771,0.061965,-0.037551,0.026692,0.053184,0.032865,-0.051879
845,0.035061,0.054277,0.126601,0.068657,-0.002611,0.066854,0.048200,-0.039714,0.036053,0.052505,0.018216,-0.043364
846,0.048742,0.053053,0.119896,0.056855,-0.002083,0.110959,0.053998,-0.052959,0.108000,0.044491,0.031776,-0.036652
847,0.033166,0.054060,0.127365,0.054168,-0.001735,0.115809,0.054073,-0.037407,0.005121,0.055621,0.021937,-0.078116


## 4.6 Export result

In [42]:
import pickle
import pandas as pd

outfile = 'submission4.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']

'''
preds = []

test_data = torch.load('./test/test/testX.pt')
file_ids = test_data[-1]
rgb_data = test_data[0]
model.eval()


for i, data in enumerate(rgb_data):
    # Please remember to modify this loop, input and output based on your model/architecture
    output = model(data[:1, :, :, :].to('cuda'))
    preds.append(output[0].cpu().detach().numpy())
'''

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame(y_pred)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

Written to csv file submission4.csv
