In [1]:
!pip install -U liblinear-official



In [1]:
from tqdm import tqdm
import numpy as np
from liblinear.liblinearutil import *
import pandas as pd
import ast
tqdm.pandas()
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics
import pickle
import os

In [2]:
CLASSES = [5, 7, 12, 19, 24, 26, 28, 36, 49, 83, 88, 119, 124, 136, 144, 157, 158, 169, 172]

In [3]:
def f1_score(TP, FP, TN, FN):
    return (1.0*TP)/(TP + 0.5*(FP+FN))

In [4]:
class SVM():
    def __init__(self, cnn):
        # self.features = features # bounding box features
        self.cnn = cnn
        self.X = []
        self.Y = []
        self.num_classes = 894
        self.num_final_classes = len(CLASSES)
        self.l = 1449

    def preprocess(self):
        all_labels = []
        for i in tqdm(range(self.l)):
            labelled = np.load("./nyudv2/label/" + str(i) + ".npy")
            unique_classes = np.unique(labelled)
            # print(unique_classes)
            temp = [0]*(self.num_classes + 1)
            for c in unique_classes:
                temp[c] = 1
            self.Y.append(temp)

    def process_X(self, path="RGBHHA_emb_processed.csv", X=0):
        if X:
            self.X_arr = X
            return 0
        else:    
            df = pd.read_csv(path)
            self.df = df
            df["Embedding"] = df["Embedding"].progress_map(lambda x: ast.literal_eval(x))
            df["Embedding"] = df["Embedding"].progress_map(lambda x: x[:1000])
            X_arr = df["Embedding"].values
            # final_X_arr = []
            # for i in tqdm(range(len(X_arr))):
            #     final_X_arr.append(ast.literal_eval(X_arr[i]))
            self.X_arr = X_arr
            return df
    
    def process_Y(self):
        Y = self.Y
        temp_Y = []
        for vec in Y:
            temp = []
            for idx in CLASSES:
                temp.append(vec[idx])
            temp_Y.append(temp)

        Y_t = np.transpose(temp_Y)
        self.all_Y = Y_t

    def train_test_split(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

    # def process_Y(self):
    #     temp_Y = {i: [0]*self.l for i in range(self.num_classes + 1)}

    #     for idx_image, i in enumerate(self.Y):
    #         for idx_class, j in enumerate(i):
    #             if j == 1:
    #                 # print(idx_class, idx_image)
    #                 temp_Y[idx_class][idx_image] = 1
    #     self.all_Y = temp_Y

        return list(X_train), list(X_test), y_train, y_test
    
    def sklearn_SVM(self):
        X = self.X_arr
        l = self.num_final_classes
        models = []
        test_data = []
        for i in range(l):
            y = self.all_Y[i]
            X_train, X_test, y_train, y_test = self.train_test_split(X, y)
            clf = SVC().fit(X_train, y_train)
            models.append(clf)
            # save_model(f"./models/svm_{i}.model", m)
            test_data.append([X_test, y_test])
        self.test_data = test_data
        self.m = models
    
    def run_SVM(self):
        X = self.X_arr
        l = self.num_final_classes
        models = []
        test_data = []
        for i in range(l):
            y = self.all_Y[i]
            X_train, X_test, y_train, y_test = self.train_test_split(X, y)
            # print(type(X), type(y))
            prob = problem(y_train, X_train)
            param = parameter('-B 10 -c 0.001')
            m = train(prob, param)
            models.append(m)
            save_model(f"./models/svm_{i}.model", m)
            test_data.append([X_test, y_test])
        self.test_data = test_data
        self.m = models

    def results(self, pred, truth):
        res = []
        for i in range(len(pred)):
            TP, FP, TN, FN = 0, 0, 0, 0
            for j in range(len(pred[0])):
                if pred[i][j] == 1:
                    if truth[i][j] == 1:
                        TP += 1
                    else:
                        FP += 1
                else:
                    if truth[i][j] == 1:
                        FN += 1
                    else:
                        TN += 1

            res.append([TP, FP, TN, FN])
        return res



### RGBHHA Embeddings - RCNN

In [82]:
svm = SVM("embeddings")
svm.preprocess()
svm.process_Y()
svm.process_X(path="RGBHHA_emb_processed.csv")
svm.run_SVM()

models = svm.m

100%|██████████| 1449/1449 [00:07<00:00, 203.27it/s]
100%|██████████| 1449/1449 [00:04<00:00, 324.37it/s]


.
optimization finished, #iter = 13
Objective value = -0.951711
nSV = 1155
.*
optimization finished, #iter = 14
Objective value = -0.712318
nSV = 1148
.*
optimization finished, #iter = 13
Objective value = -0.433457
nSV = 1141
.*
optimization finished, #iter = 14
Objective value = -0.911633
nSV = 1155
.*
optimization finished, #iter = 13
Objective value = -0.471834
nSV = 1125
.*
optimization finished, #iter = 14
Objective value = -0.784526
nSV = 1147
.*
optimization finished, #iter = 14
Objective value = -0.807433
nSV = 1155
.*
optimization finished, #iter = 13
Objective value = -0.475312
nSV = 1111
.*
optimization finished, #iter = 13
Objective value = -0.282086
nSV = 870
.
optimization finished, #iter = 13
Objective value = -0.708590
nSV = 1157
.*
optimization finished, #iter = 14
Objective value = -0.499645
nSV = 1098
.*
optimization finished, #iter = 14
Objective value = -0.824247
nSV = 1156
.*
optimization finished, #iter = 13
Objective value = -0.197940
nSV = 834
.*
optimization 

In [83]:
labels = []
accs = []
vals = []
test_y = []

for idx, m in enumerate(models):
    X, y = svm.test_data[idx]
    label, acc, val = predict(y, X, m)
    test_y.append(y)
    labels.append(label)
    accs.append(acc)
    vals.append(val)

pred = [int(np.sum(i)) for i in labels]
truth = [np.sum(i) for i in test_y]
t = [i for i in zip(pred, truth)]
print(t)

report = svm.results(labels,test_y)

Accuracy = 62.069% (180/290) (classification)
Accuracy = 75.8621% (220/290) (classification)
Accuracy = 86.8966% (252/290) (classification)
Accuracy = 63.7931% (185/290) (classification)
Accuracy = 84.4828% (245/290) (classification)
Accuracy = 76.5517% (222/290) (classification)
Accuracy = 71.0345% (206/290) (classification)
Accuracy = 84.1379% (244/290) (classification)
Accuracy = 90% (261/290) (classification)
Accuracy = 80% (232/290) (classification)
Accuracy = 85.5172% (248/290) (classification)
Accuracy = 75.1724% (218/290) (classification)
Accuracy = 96.8966% (281/290) (classification)
Accuracy = 97.5862% (283/290) (classification)
Accuracy = 74.1379% (215/290) (classification)
Accuracy = 77.931% (226/290) (classification)
Accuracy = 90.3448% (262/290) (classification)
Accuracy = 88.9655% (258/290) (classification)
Accuracy = 90.3448% (262/290) (classification)
[(114, 132), (0, 70), (0, 38), (58, 105), (0, 45), (1, 67), (5, 85), (0, 46), (0, 29), (2, 56), (0, 42), (6, 66), (0, 9

In [95]:
report

[[68, 46, 112, 64],
 [0, 0, 220, 70],
 [0, 0, 252, 38],
 [29, 29, 156, 76],
 [0, 0, 245, 45],
 [0, 1, 222, 67],
 [3, 2, 203, 82],
 [0, 0, 244, 46],
 [0, 0, 261, 29],
 [0, 2, 232, 56],
 [0, 0, 248, 42],
 [0, 6, 218, 66],
 [0, 0, 281, 9],
 [0, 0, 283, 7],
 [0, 0, 215, 75],
 [2, 4, 224, 60],
 [0, 0, 262, 28],
 [0, 0, 258, 32],
 [0, 0, 262, 28]]

In [96]:
res = []
w = []
for i in report:
    res.append(f1_score(i[0], i[1], i[2], i[3]))
    w.append( i[0] + i[3])

In [97]:
np.average(res, weights=w)

0.11707563447796253

### RGBD embeddings - RCNN

In [5]:
svm_d = SVM("embeddings")
svm_d.preprocess()
svm_d.process_Y()
svm_d.process_X(path="RGB_emb.csv")
svm_d.run_SVM()

models = svm_d.m

100%|██████████| 1449/1449 [00:07<00:00, 185.85it/s]
100%|██████████| 1449/1449 [04:47<00:00,  5.03it/s]
100%|██████████| 1449/1449 [00:00<00:00, 36214.89it/s]


.
optimization finished, #iter = 13
Objective value = -0.951707
nSV = 1155
.*
optimization finished, #iter = 14
Objective value = -0.712318
nSV = 1148
.*
optimization finished, #iter = 13
Objective value = -0.433457
nSV = 1140
.*
optimization finished, #iter = 14
Objective value = -0.911634
nSV = 1156
.*
optimization finished, #iter = 13
Objective value = -0.471840
nSV = 1124
.*
optimization finished, #iter = 14
Objective value = -0.784527
nSV = 1147
.*
optimization finished, #iter = 14
Objective value = -0.807428
nSV = 1155
.*
optimization finished, #iter = 13
Objective value = -0.475314
nSV = 1111
.*
optimization finished, #iter = 13
Objective value = -0.282083
nSV = 868
.
optimization finished, #iter = 12
Objective value = -0.708535
nSV = 1157
.*
optimization finished, #iter = 13
Objective value = -0.499628
nSV = 1099
.
optimization finished, #iter = 13
Objective value = -0.824223
nSV = 1156
.*
optimization finished, #iter = 13
Objective value = -0.197941
nSV = 833
.*
optimization f

In [6]:
labels = []
accs = []
vals = []
test_y = []

for idx, m in enumerate(models):
    X, y = svm_d.test_data[idx]
    label, acc, val = predict(y, X, m)
    test_y.append(y)
    labels.append(label)
    accs.append(acc)
    vals.append(val)

pred = [int(np.sum(i)) for i in labels]
truth = [np.sum(i) for i in test_y]
t = [i for i in zip(pred, truth)]
print(t)

report_d = svm_d.results(labels,test_y)

Accuracy = 61.7241% (179/290) (classification)
Accuracy = 76.2069% (221/290) (classification)
Accuracy = 86.8966% (252/290) (classification)
Accuracy = 62.7586% (182/290) (classification)
Accuracy = 84.4828% (245/290) (classification)
Accuracy = 76.5517% (222/290) (classification)
Accuracy = 71.0345% (206/290) (classification)
Accuracy = 84.1379% (244/290) (classification)
Accuracy = 90% (261/290) (classification)
Accuracy = 80% (232/290) (classification)
Accuracy = 85.5172% (248/290) (classification)
Accuracy = 75.1724% (218/290) (classification)
Accuracy = 96.8966% (281/290) (classification)
Accuracy = 97.5862% (283/290) (classification)
Accuracy = 74.1379% (215/290) (classification)
Accuracy = 77.931% (226/290) (classification)
Accuracy = 90.3448% (262/290) (classification)
Accuracy = 88.9655% (258/290) (classification)
Accuracy = 90.3448% (262/290) (classification)
[(111, 132), (1, 70), (0, 38), (61, 105), (0, 45), (1, 67), (5, 85), (0, 46), (0, 29), (2, 56), (0, 42), (6, 66), (0, 

In [7]:
res_d = []
w_d = []
for i in report_d:
    res_d.append(f1_score(i[0], i[1], i[2], i[3]))
    w_d.append( i[0] + i[3])

In [9]:
np.average(res_d, weights=w_d)

0.11709981131875447

### YOLO!

In [46]:
path_rgb_ft = "./pickle_exp1_finetune_rgb_yolov5n/"
path_hha_ft = "./pickle_exp3_finetune_hha_yolov5n/"
path_rgb_freeze = "./pickle_exp2_freeze_rgb_yolov5n/"
path_hha_freeze = "./pickle_exp3_finetune_hha_yolov5n/"

In [32]:
FT = []
FREEZE = []

In [33]:
for i in range(1449):
    file = f"{i}.pkl"
    rgb_ft = np.array(pickle.load(open(os.path.join(path_rgb_ft, file), "rb"))[:500])
    hha_ft = np.array(pickle.load(open(os.path.join(path_hha_ft, file), "rb"))[:500])
    rgb_freeze = np.array(pickle.load(open(os.path.join(path_rgb_freeze, file), "rb"))[:500])
    hha_freeze = np.array(pickle.load(open(os.path.join(path_hha_freeze, file), "rb"))[:500])

    # print(rgb_ft +.shape)

    ft = np.concatenate((rgb_ft, hha_ft))
    freeze = np.concatenate((rgb_freeze, hha_freeze))

    FT.append(ft)
    FREEZE.append(freeze)

In [61]:
# YOLO finetuned

svm_yolo_ft = SVM("embeddings")
svm_yolo_ft.preprocess()
svm_yolo_ft.process_Y()
svm_yolo_ft.process_X(X = FT)
svm_yolo_ft.run_SVM()

models_ft = svm_yolo_ft.m

100%|██████████| 1449/1449 [00:07<00:00, 197.58it/s]



optimization finished, #iter = 6
Objective value = -1.100858
nSV = 1159

optimization finished, #iter = 6
Objective value = -0.751974
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.458497
nSV = 1158

optimization finished, #iter = 6
Objective value = -1.038334
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.490166
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.855072
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.884763
nSV = 1159
*
optimization finished, #iter = 6
Objective value = -0.508262
nSV = 1157

optimization finished, #iter = 5
Objective value = -0.319419
nSV = 1149

optimization finished, #iter = 5
Objective value = -0.741320
nSV = 1159

optimization finished, #iter = 6
Objective value = -0.544061
nSV = 1158

optimization finished, #iter = 5
Objective value = -0.873517
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.215044
nSV = 1135

optimization finished, #iter = 5
Objective value 

In [62]:
labels = []
accs = []
vals = []
test_y = []

for idx, m in enumerate(models_ft):
    X, y = svm_yolo_ft.test_data[idx]
    label, acc, val = predict(y, X, m)
    test_y.append(y)
    labels.append(label)
    accs.append(acc)
    vals.append(val)

pred = [int(np.sum(i)) for i in labels]
truth = [np.sum(i) for i in test_y]
t = [i for i in zip(pred, truth)]
print(t)

report_ft = svm_yolo_ft.results(labels,test_y)

Accuracy = 53.4483% (155/290) (classification)
Accuracy = 75.1724% (218/290) (classification)
Accuracy = 86.8966% (252/290) (classification)
Accuracy = 63.4483% (184/290) (classification)
Accuracy = 84.4828% (245/290) (classification)
Accuracy = 76.8966% (223/290) (classification)
Accuracy = 70.3448% (204/290) (classification)
Accuracy = 84.1379% (244/290) (classification)
Accuracy = 90% (261/290) (classification)
Accuracy = 80.3448% (233/290) (classification)
Accuracy = 85.5172% (248/290) (classification)
Accuracy = 77.2414% (224/290) (classification)
Accuracy = 96.8966% (281/290) (classification)
Accuracy = 97.5862% (283/290) (classification)
Accuracy = 74.1379% (215/290) (classification)
Accuracy = 78.6207% (228/290) (classification)
Accuracy = 90.3448% (262/290) (classification)
Accuracy = 88.9655% (258/290) (classification)
Accuracy = 90.3448% (262/290) (classification)
[(41, 132), (2, 70), (0, 38), (3, 105), (0, 45), (0, 67), (1, 85), (0, 46), (0, 29), (1, 56), (0, 42), (0, 66), 

In [63]:
report_ft

[[19, 22, 136, 113],
 [0, 2, 218, 70],
 [0, 0, 252, 38],
 [1, 2, 183, 104],
 [0, 0, 245, 45],
 [0, 0, 223, 67],
 [0, 1, 204, 85],
 [0, 0, 244, 46],
 [0, 0, 261, 29],
 [0, 1, 233, 56],
 [0, 0, 248, 42],
 [0, 0, 224, 66],
 [0, 0, 281, 9],
 [0, 0, 283, 7],
 [0, 0, 215, 75],
 [0, 0, 228, 62],
 [0, 0, 262, 28],
 [0, 0, 258, 32],
 [0, 0, 262, 28]]

In [107]:
res_ft = []
w_ft = []
for i in report_ft:
    res_ft.append(f1_score(i[0], i[1], i[2], i[3]))
    w_ft.append( i[0] + i[3])

In [108]:
np.average(res_ft, weights=w_ft)

0.03027266545755737

In [64]:
# YOLO freeze

svm_yolo_freeze = SVM("embeddings")
svm_yolo_freeze.preprocess()
svm_yolo_freeze.process_Y()
svm_yolo_freeze.process_X(X = FREEZE)
svm_yolo_freeze.run_SVM()

models_freeze = svm_yolo_freeze.m

100%|██████████| 1449/1449 [00:06<00:00, 212.37it/s]



optimization finished, #iter = 6
Objective value = -1.107439
nSV = 1159

optimization finished, #iter = 6
Objective value = -0.761928
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.459782
nSV = 1159

optimization finished, #iter = 6
Objective value = -1.045737
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.491535
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.858413
nSV = 1159

optimization finished, #iter = 6
Objective value = -0.886392
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.511131
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.319924
nSV = 1151

optimization finished, #iter = 5
Objective value = -0.746841
nSV = 1159

optimization finished, #iter = 5
Objective value = -0.545292
nSV = 1159

optimization finished, #iter = 6
Objective value = -0.883112
nSV = 1159
*
optimization finished, #iter = 6
Objective value = -0.214612
nSV = 1130

optimization finished, #iter = 5
Objective value 

In [65]:
labels = []
accs = []
vals = []
test_y = []

for idx, m in enumerate(models_freeze):
    X, y = svm_yolo_freeze.test_data[idx]
    label, acc, val = predict(y, X, m)
    test_y.append(y)
    labels.append(label)
    accs.append(acc)
    vals.append(val)

pred = [int(np.sum(i)) for i in labels]
truth = [np.sum(i) for i in test_y]
t = [i for i in zip(pred, truth)]
print(t)

report_freeze = svm_yolo_freeze.results(labels,test_y)

Accuracy = 56.8966% (165/290) (classification)
Accuracy = 75.5172% (219/290) (classification)
Accuracy = 86.8966% (252/290) (classification)
Accuracy = 62.7586% (182/290) (classification)
Accuracy = 84.4828% (245/290) (classification)
Accuracy = 76.5517% (222/290) (classification)
Accuracy = 70.6897% (205/290) (classification)
Accuracy = 84.1379% (244/290) (classification)
Accuracy = 90% (261/290) (classification)
Accuracy = 80.6897% (234/290) (classification)
Accuracy = 85.5172% (248/290) (classification)
Accuracy = 77.5862% (225/290) (classification)
Accuracy = 96.8966% (281/290) (classification)
Accuracy = 97.5862% (283/290) (classification)
Accuracy = 74.1379% (215/290) (classification)
Accuracy = 78.6207% (228/290) (classification)
Accuracy = 90.3448% (262/290) (classification)
Accuracy = 88.9655% (258/290) (classification)
Accuracy = 90.3448% (262/290) (classification)
[(35, 132), (1, 70), (0, 38), (3, 105), (0, 45), (1, 67), (0, 85), (0, 46), (0, 29), (0, 56), (0, 42), (1, 66), 

In [66]:
report_freeze

[[21, 14, 144, 111],
 [0, 1, 219, 70],
 [0, 0, 252, 38],
 [0, 3, 182, 105],
 [0, 0, 245, 45],
 [0, 1, 222, 67],
 [0, 0, 205, 85],
 [0, 0, 244, 46],
 [0, 0, 261, 29],
 [0, 0, 234, 56],
 [0, 0, 248, 42],
 [1, 0, 224, 65],
 [0, 0, 281, 9],
 [0, 0, 283, 7],
 [0, 0, 215, 75],
 [0, 0, 228, 62],
 [0, 0, 262, 28],
 [0, 0, 258, 32],
 [0, 0, 262, 28]]

In [105]:
res_freeze = []
w_freeze = []
for i in report_freeze:
    res_freeze.append(f1_score(i[0], i[1], i[2], i[3]))
    w_freeze.append( i[0] + i[3])

In [106]:
np.average(res_freeze, weights=w_freeze)

0.03441071824280872