# SVM Classifier

In [127]:
from skimage.io import imread
from skimage.feature import hog
from skimage.color import rgb2grey
import sklearn
import matplotlib.pyplot as plt
import numpy as np
import dill
import random
import os
import re
import cv2

* ### Data (testing 100 images)

In [121]:
img_index = random.sample(range(1,2500), 100)
img_path = os.path.join(os.getcwd(), "data/raw_data/img_nparrays")

img_names = [os.listdir(img_path)[i] for i in img_index]
testing_paths = [os.path.join(img_path, name) for name in img_names]
testing_labels = [re.sub(r'\d+', '', name) for name in img_names]

image_dir = os.path.join(os.getcwd(), "data/raw_data/resized")
categories = os.listdir(image_dir)
catnum_dict = dict(zip(categories, range(len(categories))))

testing_num_labels = [catnum_dict[l] for l in testing_labels]
testing_unflat = [dill.load(open(p, 'rb')) for p in testing_paths]
testing_flat= [dill.load(open(p, 'rb')).flatten() for p in testing_paths]


In [67]:
# image_dir = os.path.join(os.getcwd(), "data/raw_data/resized")
# categories = os.listdir(image_dir)

# from sklearn.utils import Bunch

# img_data = Bunch(data=testing_flat,
#                  target=range(len(categories)),
#                  target_names=categories,
#                  DESCR="trash image classification dataset")

In [66]:
# from sklearn.utils import Bunch

# image_dir = os.path.join(os.getcwd(), "data/raw_data/resized")
# categories = os.listdir(image_dir)
# subdirs = [os.path.join(image_dir, c) for c in categories]

# descr = "trash image classification dataset"
# images = []
# flat_data = []
# target = []
# for i, sub in enumerate(subdirs):
#     for file in os.listdir(sub):
#         img = imread(file)
#         flat_data.append(img_resized.flatten()) 
#         images.append(img_resized)
#         target.append(i)
# flat_data = np.array(flat_data)
# target = np.array(target)
# images = np.array(images)

# img_data = Bunch(data=testing_flat,target=target,
#                  target_names=categories,
#                  images=images,
#                  DESCR=descr)

['paper', 'metal', 'cardboard', 'trash', 'glass', 'plastic']

* ### Feature Extraction

In [123]:
def extract_features(img):
    color_features = img.flatten()
    grey_image = rgb2grey(img)
    hog_features = hog(grey_image, block_norm='L2-Hys')
    flat_features = np.hstack((color_features, hog_features))
    return flat_features

In [128]:
orb = cv2.ORB_create()
def to_orb_desc(image):
    (kps, image_orb_desc) = orb.detectAndCompute(image, None)
    if image_orb_desc is None:
        return []
    return image_orb_desc

In [133]:
def extract_rgb_sift_features(image):
    sift = cv2.xfeatures2d.SIFT_create()
        
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    kps = sift.detect(np.array(image_gray), None)
    image_blue = [[pixel[0] for pixel in row] for row in image]
    image_green = [[pixel[1] for pixel in row] for row in image]
    image_red = [[pixel[2] for pixel in row] for row in image]
    (_, desc_blue) = sift.compute(np.array(image_blue), kps) 
    (_, desc_green) = sift.compute(np.array(image_green), kps) 
    (_, desc_red) = sift.compute(np.array(image_red), kps) 
    if desc_blue is None:
        desc_blue = []
    if desc_green is None:
        desc_green = []
    if desc_red is None:
        desc_red = []
    # Concat SIFT descriptor for all 3 channels into 1 384-dim vector
    desc = [combine_desc(b, g, r) for (b, g, r) in zip(desc_blue, desc_green, desc_red)] 
    return desc


In [105]:
mixed_img_str = 'data/raw_data/resize-mixed'
mixed_img_dir = os.path.join(os.getcwd(), mixed_img_str)
mixed_img_names = [os.listdir(mixed_img_dir)[i] for i in img_index]
mixed_imgs = [imread(os.path.join(mixed_img_dir, i)) for i in mixed_img_names]

In [134]:
extract_rgb_sift_features(mixed_imgs[3]).shape

AttributeError: module 'cv2.cv2' has no attribute 'xfeatures2d'

* ### Build SVM model

In [132]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline

# feature extraction
img_extract = [to_orb_desc(img) for img in mixed_imgs]

# split test and train sets
Img_train, Img_test, label_train, label_test = \
train_test_split(img_extract, testing_num_labels, test_size=0.2, random_state=109)

# train model
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']}
]
svc = svm.SVC()
svm_model = GridSearchCV(svc, param_grid)
svm_model.fit(Img_train, label_train)

label_pred = svm_model.predict(Img_test)
print(classification_report(label_test, label_pred))



ValueError: setting an array element with a sequence.

* ### Feature reduction

In [30]:
# single image testing
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

print('original shape: ', matrix_test.shape)

# scaling
ss = StandardScaler()
ss_trans = ss.fit_transform(matrix_test.reshape(-1,1))

# PCA
pca = PCA(n_components=1)
pca_trans = ss.transform(ss_trans)

# new shape
print('after transformation new shape is: ', pca_trans.shape)

original shape:  (2, 589824)
after transformation new shape is:  (1179648, 1)


In [31]:
pca_trans

array([[-4.59682663],
       [-4.62055739],
       [-4.63022548],
       ...,
       [-4.62231522],
       [-4.62934656],
       [-4.63901465]])