In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from sklearn import svm

import pickle

# SVM with Img Pixel Features

## Load Dataset

In [8]:
with open("preprocessed_data/train_df.pkl", 'rb') as f:
    train_df = pickle.load(f)
    # Shuffling the training data so that it is not in blocks
    train_df = train_df.sample(frac=1, random_state=42).reset_index(drop=True)
    X_train = train_df.drop(columns=["label"])
    y_train = np.array(train_df[["label"]]).ravel()

with open("preprocessed_data/test_df.pkl", 'rb') as f:
    test_df = pickle.load(f)
    test_df = test_df.sample(frac=1, random_state=42).reset_index(drop=True)
    X_test = test_df.drop(columns=["label"])
    y_test = np.array(test_df[["label"]]).ravel()

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, random_state=1, test_size=0.2)

In [6]:
X_tr

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_4086,pixel_4087,pixel_4088,pixel_4089,pixel_4090,pixel_4091,pixel_4092,pixel_4093,pixel_4094,pixel_4095
7099,0.701961,0.733333,0.749020,0.745098,0.749020,0.749020,0.756863,0.764706,0.768627,0.768627,...,0.709804,0.705882,0.701961,0.705882,0.705882,0.701961,0.709804,0.721569,0.745098,0.752941
2128,0.333333,0.349020,0.356863,0.360784,0.368627,0.376471,0.388235,0.396078,0.400000,0.411765,...,0.325490,0.333333,0.321569,0.329412,0.333333,0.341176,0.333333,0.345098,0.349020,0.345098
4028,0.482353,0.498039,0.482353,0.494118,0.501961,0.509804,0.517647,0.509804,0.541176,0.596078,...,0.345098,0.333333,0.329412,0.337255,0.337255,0.329412,0.329412,0.333333,0.325490,0.333333
921,0.996078,0.996078,0.992157,0.976471,0.949020,0.937255,0.929412,0.913725,0.905882,0.898039,...,0.725490,0.721569,0.717647,0.713725,0.713725,0.713725,0.709804,0.705882,0.701961,0.701961
9047,0.898039,0.901961,0.898039,0.901961,0.898039,0.894118,0.894118,0.890196,0.890196,0.886275,...,0.450980,0.458824,0.450980,0.454902,0.458824,0.466667,0.462745,0.466667,0.474510,0.470588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2895,0.600000,0.600000,0.600000,0.603922,0.603922,0.607843,0.603922,0.603922,0.607843,0.607843,...,0.568627,0.568627,0.564706,0.560784,0.564706,0.568627,0.560784,0.560784,0.556863,0.552941
7813,0.290196,0.294118,0.286275,0.270588,0.270588,0.274510,0.278431,0.286275,0.286275,0.278431,...,0.478431,0.474510,0.474510,0.462745,0.454902,0.439216,0.427451,0.407843,0.400000,0.415686
905,0.686275,0.537255,0.478431,0.505882,0.521569,0.533333,0.545098,0.576471,0.623529,0.682353,...,0.345098,0.337255,0.333333,0.337255,0.345098,0.313725,0.278431,0.290196,0.262745,0.258824
5192,0.552941,0.611765,0.580392,0.572549,0.568627,0.556863,0.596078,0.580392,0.584314,0.572549,...,0.600000,0.650980,0.619608,0.474510,0.286275,0.145098,0.121569,0.121569,0.117647,0.121569


In [7]:
y_tr

array([2, 1, 1, ..., 2, 2, 3], dtype=int32)

In [9]:
clf = svm.SVC()
clf.fit(X_tr, y_tr)

In [9]:
y_val_pred = clf.predict(X_val)

In [13]:
print("Accuracy: "+str(accuracy_score(y_val, y_val_pred)))
print('\n')
print(classification_report(y_val, y_val_pred))
labels_dict = {0: "tops", 1: "bottoms", 2: "bags", 3: "shoes", 4: "others"}

Accuracy: 0.6661538461538462


              precision    recall  f1-score   support

           0       1.00      0.07      0.13        57
           1       0.76      0.74      0.75       528
           2       0.64      0.72      0.68       578
           3       0.74      0.58      0.65       340
           4       0.56      0.64      0.60       447

    accuracy                           0.67      1950
   macro avg       0.74      0.55      0.56      1950
weighted avg       0.68      0.67      0.66      1950



In [10]:
y_pred = clf.predict(X_test)
print("Accuracy: "+str(accuracy_score(y_test, y_pred)))
print('\n')
print(classification_report(y_test, y_pred))

Accuracy: 0.5836734693877551


              precision    recall  f1-score   support

           0       1.00      0.05      0.10        60
           1       0.68      0.92      0.78        60
           2       0.36      0.74      0.49        35
           3       0.79      0.68      0.73        60
           4       0.49      0.60      0.54        30

    accuracy                           0.58       245
   macro avg       0.66      0.60      0.53       245
weighted avg       0.72      0.58      0.53       245



# SVM (HOG Features)

In [3]:
with open("preprocessed_data/hog_train_Df.pkl", 'rb') as f:
    train_df = pickle.load(f)
    train_df = train_df.sample(frac=1, random_state=42).reset_index(drop=True)
    X_train = train_df.drop(columns=["label"])
    y_train = np.array(train_df[["label"]]).ravel()

with open("preprocessed_data/hog_test_df.pkl", 'rb') as f:
    test_df = pickle.load(f)
    test_df = test_df.sample(frac=1, random_state=42).reset_index(drop=True)
    X_test = test_df.drop(columns=["label"])
    y_test = np.array(test_df[["label"]]).ravel()

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, random_state=1, test_size=0.2)

In [4]:
X_tr

Unnamed: 0,hog_0,hog_1,hog_2,hog_3,hog_4,hog_5,hog_6,hog_7,hog_8,hog_9,...,hog_3770,hog_3771,hog_3772,hog_3773,hog_3774,hog_3775,hog_3776,hog_3777,hog_3778,hog_3779
331,0.284482,0.094212,0.075992,0.083471,0.068550,0.069679,0.154696,0.210131,0.284482,0.248418,...,0.014153,0.108340,0.114522,0.077758,0.103110,0.024609,0.016966,0.011854,0.047496,0.047364
2018,0.351422,0.049499,0.057999,0.022752,0.091303,0.001956,0.006383,0.013012,0.090630,0.389057,...,0.118168,0.103245,0.044909,0.018346,0.005789,0.030579,0.015236,0.015208,0.077124,0.085469
11373,0.136704,0.042001,0.077977,0.020128,0.315214,0.206252,0.090747,0.059003,0.076469,0.110006,...,0.288594,0.064447,0.032358,0.117933,0.054622,0.150555,0.112754,0.056019,0.122108,0.116054
3397,0.238160,0.231769,0.191681,0.086366,0.162552,0.013621,0.057410,0.086403,0.170282,0.238160,...,0.165319,0.225421,0.195468,0.037382,0.164342,0.158124,0.225421,0.096665,0.170234,0.158925
5216,0.153292,0.127676,0.387508,0.071251,0.139053,0.011098,0.017757,0.056463,0.000000,0.167208,...,0.007470,0.074987,0.064657,0.341367,0.341367,0.158975,0.000000,0.009326,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7813,0.132829,0.019326,0.047842,0.004033,0.210106,0.054687,0.334803,0.334803,0.027430,0.142951,...,0.073813,0.178052,0.006644,0.000000,0.003557,0.129965,0.005083,0.416585,0.242506,0.031990
10955,0.074600,0.011037,0.062969,0.117577,0.307527,0.307527,0.102168,0.000000,0.000000,0.147534,...,0.000000,0.151418,0.024088,0.007000,0.031195,0.318717,0.334630,0.088814,0.193160,0.117554
905,0.289616,0.116172,0.038571,0.035277,0.147888,0.058353,0.094979,0.128068,0.253666,0.289616,...,0.163367,0.293430,0.293430,0.115460,0.073955,0.256650,0.100423,0.092604,0.163331,0.293430
5192,0.342388,0.192438,0.075155,0.071665,0.022790,0.091238,0.148303,0.096312,0.342388,0.342388,...,0.000000,0.064436,0.000000,0.000000,0.060681,0.304776,0.304776,0.304776,0.042552,0.000000


In [5]:
y_tr

array(['bag', 'bag', 'other', ..., 'other', 'bottom', 'shoe'],
      dtype=object)

In [6]:
clf = svm.SVC()
clf.fit(X_tr, y_tr)

In [18]:
y_val_pred = clf.predict(X_val)

In [19]:
print("Accuracy: "+str(accuracy_score(y_val, y_val_pred)))
print('\n')
print(classification_report(y_val, y_val_pred))

Accuracy: 0.8248269896193772


              precision    recall  f1-score   support

         bag       0.77      0.85      0.81       586
      bottom       0.91      0.86      0.89       552
       other       0.74      0.86      0.79       539
        shoe       0.89      0.70      0.78       309
         top       0.91      0.78      0.84       326

    accuracy                           0.82      2312
   macro avg       0.85      0.81      0.82      2312
weighted avg       0.83      0.82      0.83      2312



In [7]:
y_pred = clf.predict(X_test)
print("Accuracy: "+str(accuracy_score(y_test, y_pred)))
print('\n')
print(classification_report(y_test, y_pred))

Accuracy: 0.8204081632653061


              precision    recall  f1-score   support

         bag       0.54      0.80      0.64        35
      bottom       0.92      0.97      0.94        60
       other       0.66      0.63      0.64        30
        shoe       0.90      0.78      0.84        60
         top       1.00      0.82      0.90        60

    accuracy                           0.82       245
   macro avg       0.80      0.80      0.79       245
weighted avg       0.85      0.82      0.83       245

