Low amount of data and high number of features, so we need a classification model that has bias/low variance such as Linear SVM.

https://www.kdnuggets.com/2020/05/guide-choose-right-machine-learning-algorithm.html

In [1]:
from csv import reader
import pandas as pd
import numpy as np
import os, glob, cv2

folder_pwd = os.getcwd()
os.chdir('..')
os.chdir('../resources/Data - BabyWatcher/1000')
path = os.getcwd()

# Get all csv files
features = os.listdir(path+'/Features')

os.chdir(folder_pwd)
features

['arm', 'foot', 'hand', 'head', 'leg', 'none', 'spine']

### Load features

In [2]:
def read_from_folder(folder):
    pixel_l = []
    feature_l = []
    img_l = []
    path1 = path+'/{}/'.format(folder)
    for feature in features:
        file_names = os.listdir(path1+feature)
        img_l = img_l + file_names
        for file in file_names:
            feature_l.append(feature)
            pixel_l.append(cv2.imread(path1+feature+'/'+file, 0))

    feature_images = pd.DataFrame({'img_name': img_l,'feature': feature_l, 'img': pixel_l})
    return feature_images

feature_images = read_from_folder('Features')
print('Total features: ', feature_images.shape[0])
print(feature_images.value_counts('feature'))
feature_images

Total features:  6118
feature
none     4627
head     1009
spine     348
hand       59
arm        58
leg        13
foot        4
dtype: int64


Unnamed: 0,img_name,feature,img
0,1007~20355_2021-07-14_22.51_224458527.png,arm,"[[31, 40, 51, 63, 72, 79, 84, 91, 98, 104, 106..."
1,1060~20465_2021-03-25_21.11_20574220.png,arm,"[[23, 24, 21, 17, 13, 9, 13, 24, 29, 27, 30, 4..."
2,1083~20538_2021-03-13_23.48_234759342.png,arm,"[[59, 53, 43, 31, 23, 19, 14, 13, 13, 16, 21, ..."
3,1092~20583_2021-04-23_20.19_20189924.png,arm,"[[50, 47, 44, 45, 46, 45, 44, 50, 55, 58, 63, ..."
4,1093~20583_2021-04-23_20.32_202847409.png,arm,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 156, ..."
...,...,...,...
6113,978~20201_2021-02-18_14.47_144533488.png,spine,"[[115, 114, 117, 116, 103, 89, 80, 84, 90, 92,..."
6114,981~20201_2021-02-18_14.59_145447718.png,spine,"[[0, 8, 0, 0, 35, 34, 37, 36, 34, 41, 46, 48, ..."
6115,990~20297_2021-03-06_12.09_12819509.png,spine,"[[37, 31, 30, 34, 38, 42, 46, 50, 48, 44, 39, ..."
6116,996~20306_2021-02-24_16.16_16932883.png,spine,"[[94, 105, 115, 118, 113, 98, 88, 83, 84, 77, ..."


### Extract images size

In [3]:
def image_size(row):
    row['width'] = row['img'].shape[0]
    row['height'] = row['img'].shape[1]
    return row

feature_images = feature_images.apply(image_size, axis=1)
feature_images

Unnamed: 0,img_name,feature,img,width,height
0,1007~20355_2021-07-14_22.51_224458527.png,arm,"[[31, 40, 51, 63, 72, 79, 84, 91, 98, 104, 106...",99,99
1,1060~20465_2021-03-25_21.11_20574220.png,arm,"[[23, 24, 21, 17, 13, 9, 13, 24, 29, 27, 30, 4...",75,75
2,1083~20538_2021-03-13_23.48_234759342.png,arm,"[[59, 53, 43, 31, 23, 19, 14, 13, 13, 16, 21, ...",87,87
3,1092~20583_2021-04-23_20.19_20189924.png,arm,"[[50, 47, 44, 45, 46, 45, 44, 50, 55, 58, 63, ...",87,87
4,1093~20583_2021-04-23_20.32_202847409.png,arm,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 156, ...",143,143
...,...,...,...,...,...
6113,978~20201_2021-02-18_14.47_144533488.png,spine,"[[115, 114, 117, 116, 103, 89, 80, 84, 90, 92,...",240,240
6114,981~20201_2021-02-18_14.59_145447718.png,spine,"[[0, 8, 0, 0, 35, 34, 37, 36, 34, 41, 46, 48, ...",146,146
6115,990~20297_2021-03-06_12.09_12819509.png,spine,"[[37, 31, 30, 34, 38, 42, 46, 50, 48, 44, 39, ...",117,117
6116,996~20306_2021-02-24_16.16_16932883.png,spine,"[[94, 105, 115, 118, 113, 98, 88, 83, 84, 77, ...",97,97


### Filter images

In [11]:
from Process import process
feature_images['processed_img'] = feature_images['img'].apply(process, crop_thresh=0)
feature_images

Unnamed: 0,img_name,feature,img,width,height,processed_img
0,1007~20355_2021-07-14_22.51_224458527.png,arm,"[[31, 40, 51, 63, 72, 79, 84, 91, 98, 104, 106...",99,99,"([[1, 1, 1, 2, 2, 3, 4, 6, 10, 18, 27, 33, 37,..."
1,1060~20465_2021-03-25_21.11_20574220.png,arm,"[[23, 24, 21, 17, 13, 9, 13, 24, 29, 27, 30, 4...",75,75,"([[2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5..."
2,1083~20538_2021-03-13_23.48_234759342.png,arm,"[[59, 53, 43, 31, 23, 19, 14, 13, 13, 16, 21, ...",87,87,"([[10, 10, 10, 9, 6, 5, 5, 5, 5, 5, 5, 7, 10, ..."
3,1092~20583_2021-04-23_20.19_20189924.png,arm,"[[50, 47, 44, 45, 46, 45, 44, 50, 55, 58, 63, ...",87,87,"([[17, 17, 17, 17, 17, 17, 18, 19, 20, 20, 18,..."
4,1093~20583_2021-04-23_20.32_202847409.png,arm,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 156, ...",143,143,"([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 24,..."
...,...,...,...,...,...,...
6113,978~20201_2021-02-18_14.47_144533488.png,spine,"[[115, 114, 117, 116, 103, 89, 80, 84, 90, 92,...",240,240,"([[83, 80, 79, 77, 76, 62, 48, 41, 36, 30, 29,..."
6114,981~20201_2021-02-18_14.59_145447718.png,spine,"[[0, 8, 0, 0, 35, 34, 37, 36, 34, 41, 46, 48, ...",146,146,"([[1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 5, 5, 6, 6, 6..."
6115,990~20297_2021-03-06_12.09_12819509.png,spine,"[[37, 31, 30, 34, 38, 42, 46, 50, 48, 44, 39, ...",117,117,"([[3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7..."
6116,996~20306_2021-02-24_16.16_16932883.png,spine,"[[94, 105, 115, 118, 113, 98, 88, 83, 84, 77, ...",97,97,"([[39, 39, 38, 37, 37, 37, 37, 37, 37, 36, 36,..."


In [12]:
def e(row):
    row['processed_img'] = row['processed_img'][0]
    return row
feature_images = feature_images.apply(e, axis=1)

### Resizing all to same size

In [14]:
from Process import process
def resize(x, size):
    return cv2.resize(x,(size, size))

def hog(df, method , pixels_per_cell, cells_per_block):
    df = df.copy(deep=True)
    df = df.rename(columns={'resized_img': 'window'})
    df = df.apply(method , pixels_per_cell=pixels_per_cell, cells_per_block=cells_per_block, axis=1)
    return df

### Train SVM

In [15]:
from sklearn import svm
from sklearn.metrics import classification_report,accuracy_score
from sklearn.model_selection import cross_val_score

def train_svm(data, for_feat):
    # Have each SVM think that other features are actually 'none'
    def prep_data(data):
        df = data.copy(deep=True)
        not_cur = features[:]
        not_cur = [feat for feat in not_cur if feat not in for_feat]
        df['feature'] = df['feature'].replace(not_cur,'none')
        return df
    
    df = prep_data(data)
    labels =  np.array(df['feature']).reshape(len(df['feature']),1)
    hog_features = np.stack(df['hog'], axis=1)[0]
    data = np.hstack((hog_features,labels))
    
    clf = svm.SVC(probability=True)
    #print(cross_val_score(clf, hog_features, labels.ravel(), cv=2))
    clf.fit(data[:len(data),:-1],data[:len(data),-1:].ravel())
    
    return clf

def save_classifiers(svms, svm_folder):
    import pickle
    from pathlib import Path
    
    for clf in svms:
        class_ = clf.classes_
        class_ = np.copy(class_).tolist()
        class_.remove('none')
        pkl_filename = '_'.join(class_)+'.pkl'

        Path(svm_folder).mkdir(parents=True, exist_ok=True)
        with open(svm_folder+'/'+pkl_filename, 'wb+') as file:
            pickle.dump(clf, file)

### Start Training

In [16]:
import Hog

hog_sets = [[ 128, (16,16), (4, 4), Hog.compute_hog], 
            [ 128, (16,16), (4, 4), Hog.compute_hog1],
            [ 128, (16,16), (2, 2), Hog.compute_hog], 
            [ 128, (16,16), (2, 2), Hog.compute_hog1],
            [ 96, (16,16), (2, 2), Hog.compute_hog], 
            [ 96, (16,16), (2, 2), Hog.compute_hog1],
            [ 128, (8,8), (2, 2), Hog.compute_hog], 
            [ 128, (8,8), (2, 2), Hog.compute_hog1]]


for hog_set in hog_sets[1:2]:
    
    print(hog_set, end=' ')
    size = hog_set[0]
    pixels_per_cell= hog_set[1]
    cells_per_block= hog_set[2]
    method = hog_set[3]
    
    df = feature_images.copy(deep=True)
    df['resized_img'] = df['processed_img'].apply(resize, size=size)
    print('Resized', end=' ')
    
    df = hog(df, method, pixels_per_cell, cells_per_block)
    print('HOG computed {}'.format(df['hog'][0].shape), end=' ')
    
    print(features, end=' ')
    svms = [train_svm(df, features)]
    for f in features:
        if f == 'none':
            continue
        print(f, end=' ')
        svms.append(train_svm(df, f))
    
    svm_folder =  path+"/SVM/"+str(size)+'-'+str(pixels_per_cell)+'-'+str(cells_per_block)+'-'+method.__name__+'-feedback'
    save_classifiers(svms, svm_folder)
    print('\n'+svm_folder)

[128, (16, 16), (4, 4), <function compute_hog1 at 0x00000296C16E21F0>] Resized HOG computed (1, 3600) ['arm', 'foot', 'hand', 'head', 'leg', 'none', 'spine'] arm foot hand head leg spine 
C:\Users\emery\IdeaProjects\Project3-1\resources\Data - BabyWatcher\Zain/SVM/128-(16, 16)-(4, 4)-compute_hog1-feedback
