Low amount of data and high number of features, so we need a classification model that has bias/low variance such as Linear SVM.

https://www.kdnuggets.com/2020/05/guide-choose-right-machine-learning-algorithm.html

In [1]:
import os
os.chdir('..')
os.chdir('../resources')
pwd = os.getcwd()

In [3]:
%%time
import pandas as pd
import cv2

features = []
pixels = []
img_names = []
feature_names = ['head', 'spine', 'leg', 'arm', 'hand', 'foot']
for feature in feature_names+ ['none']:
    file_names = os.listdir(pwd+'/features/'+feature)
    img_names = img_names + file_names
    for file in file_names:
        features.append(feature)
        pixels.append(cv2.imread(pwd+'/features/'+feature+'/'+file, 0))

feature_images = pd.DataFrame({'image_name': img_names,'class' : features, 'image_pixels': pixels})

print('Total features: ', feature_images.shape[0])
print(feature_images.value_counts('class'))
feature_images

Total features:  2123
class
head     1365
spine     410
none      167
arm        75
hand       59
leg        29
foot       18
dtype: int64
Wall time: 1.5 s


Unnamed: 0,image_name,class,image_pixels
0,19150_2021-01-24_10.31_103052326.png,head,"[[79, 78, 82, 85, 87, 89, 93, 97, 100, 104, 10..."
1,19150_2021-01-24_19.28_191920932.png,head,"[[95, 96, 98, 100, 101, 104, 108, 108, 106, 10..."
2,19150_2021-01-24_19.28_192010654.png,head,"[[103, 101, 99, 92, 83, 83, 90, 93, 96, 97, 94..."
3,19150_2021-01-24_19.28_192723415.png,head,"[[96, 92, 88, 81, 84, 86, 83, 84, 82, 83, 83, ..."
4,19150_2021-01-24_19.28_192733399.png,head,"[[124, 131, 153, 171, 177, 153, 111, 102, 115,..."
...,...,...,...
2118,19642_2021-01-16_18.54_185155873.png,none,"[[120, 122, 139, 167, 182, 182, 177, 177, 186,..."
2119,19642_2021-01-17_10.15_10101114.png,none,"[[171, 161, 165, 155, 158, 175, 177, 167, 171,..."
2120,19642_2021-01-17_10.15_101520226.png,none,"[[142, 174, 197, 203, 198, 207, 215, 215, 171,..."
2121,19676_2021-02-04_21.33_21322923.png,none,"[[68, 66, 70, 76, 77, 74, 73, 73, 69, 70, 74, ..."


Get images ready for HOG:
1. Filter
2. Resize to (128, 128)

In [4]:
%%time
from Process import process
def resize(x):
    return cv2.resize(x,(128, 128))

#feature_images['filtered_pixels'] = feature_images['image_pixels'].apply(process)
feature_images['resized_pixels'] = feature_images['image_pixels'].apply(resize)
feature_images

Wall time: 133 ms


Unnamed: 0,image_name,class,image_pixels,resized_pixels
0,19150_2021-01-24_10.31_103052326.png,head,"[[79, 78, 82, 85, 87, 89, 93, 97, 100, 104, 10...","[[79, 79, 82, 85, 87, 91, 95, 99, 103, 106, 10..."
1,19150_2021-01-24_19.28_191920932.png,head,"[[95, 96, 98, 100, 101, 104, 108, 108, 106, 10...","[[95, 96, 98, 99, 102, 107, 108, 106, 103, 99,..."
2,19150_2021-01-24_19.28_192010654.png,head,"[[103, 101, 99, 92, 83, 83, 90, 93, 96, 97, 94...","[[103, 100, 97, 89, 83, 87, 92, 95, 97, 94, 95..."
3,19150_2021-01-24_19.28_192723415.png,head,"[[96, 92, 88, 81, 84, 86, 83, 84, 82, 83, 83, ...","[[96, 91, 85, 84, 87, 84, 84, 83, 83, 97, 115,..."
4,19150_2021-01-24_19.28_192733399.png,head,"[[124, 131, 153, 171, 177, 153, 111, 102, 115,...","[[126, 136, 158, 173, 159, 117, 109, 120, 125,..."
...,...,...,...,...
2118,19642_2021-01-16_18.54_185155873.png,none,"[[120, 122, 139, 167, 182, 182, 177, 177, 186,...","[[121, 127, 149, 174, 182, 179, 177, 186, 198,..."
2119,19642_2021-01-17_10.15_10101114.png,none,"[[171, 161, 165, 155, 158, 175, 177, 167, 171,...","[[171, 161, 165, 155, 158, 173, 177, 168, 170,..."
2120,19642_2021-01-17_10.15_101520226.png,none,"[[142, 174, 197, 203, 198, 207, 215, 215, 171,...","[[142, 175, 195, 196, 206, 215, 191, 153, 140,..."
2121,19676_2021-02-04_21.33_21322923.png,none,"[[68, 66, 70, 76, 77, 74, 73, 73, 69, 70, 74, ...","[[69, 67, 72, 76, 74, 73, 72, 69, 71, 72, 63, ..."


Compute HOG features

In [5]:
%%time
from Hog import compute_hog
feature_images['hog'] = feature_images['resized_pixels'].apply(compute_hog)
feature_images

Wall time: 6.58 s


Unnamed: 0,image_name,class,image_pixels,resized_pixels,hog
0,19150_2021-01-24_10.31_103052326.png,head,"[[79, 78, 82, 85, 87, 89, 93, 97, 100, 104, 10...","[[79, 79, 82, 85, 87, 91, 95, 99, 103, 106, 10...","[[0.04839468071532554, 0.02948810026201215, 0...."
1,19150_2021-01-24_19.28_191920932.png,head,"[[95, 96, 98, 100, 101, 104, 108, 108, 106, 10...","[[95, 96, 98, 99, 102, 107, 108, 106, 103, 99,...","[[0.04614005067862152, 0.030539446803876866, 0..."
2,19150_2021-01-24_19.28_192010654.png,head,"[[103, 101, 99, 92, 83, 83, 90, 93, 96, 97, 94...","[[103, 100, 97, 89, 83, 87, 92, 95, 97, 94, 95...","[[0.058288720644839594, 0.03386355518528862, 0..."
3,19150_2021-01-24_19.28_192723415.png,head,"[[96, 92, 88, 81, 84, 86, 83, 84, 82, 83, 83, ...","[[96, 91, 85, 84, 87, 84, 84, 83, 83, 97, 115,...","[[0.08872191762431396, 0.03263218258918781, 0...."
4,19150_2021-01-24_19.28_192733399.png,head,"[[124, 131, 153, 171, 177, 153, 111, 102, 115,...","[[126, 136, 158, 173, 159, 117, 109, 120, 125,...","[[0.1335836971083118, 0.01965405485303258, 0.0..."
...,...,...,...,...,...
2118,19642_2021-01-16_18.54_185155873.png,none,"[[120, 122, 139, 167, 182, 182, 177, 177, 186,...","[[121, 127, 149, 174, 182, 179, 177, 186, 198,...","[[0.04730741724291017, 0.02954489923818306, 0...."
2119,19642_2021-01-17_10.15_10101114.png,none,"[[171, 161, 165, 155, 158, 175, 177, 167, 171,...","[[171, 161, 165, 155, 158, 173, 177, 168, 170,...","[[0.06891262877289671, 0.06664489370117056, 0...."
2120,19642_2021-01-17_10.15_101520226.png,none,"[[142, 174, 197, 203, 198, 207, 215, 215, 171,...","[[142, 175, 195, 196, 206, 215, 191, 153, 140,...","[[0.11648649506187697, 0.06878848092414896, 0...."
2121,19676_2021-02-04_21.33_21322923.png,none,"[[68, 66, 70, 76, 77, 74, 73, 73, 69, 70, 74, ...","[[69, 67, 72, 76, 74, 73, 72, 69, 71, 72, 63, ...","[[0.022663235076007475, 0.029759945757108445, ..."


Separate into test and train data

Train Linear SVMs for each feature

In [6]:
%%time
import numpy as np
from sklearn import svm
from sklearn.metrics import classification_report,accuracy_score
feature_svm = []
fully_trained = []
accuracy = []
classification_rep = []
for feature in feature_names:
    df = feature_images[feature_images['class'].isin([feature, 'none'])]
    
    labels =  np.array(df['class']).reshape(len(df['class']),1)
    hog_features = np.stack(df['hog'], axis=1)[0]
    data = np.hstack((hog_features,labels))

    np.random.shuffle(data)
    percentage = 80
    partition = int(len(data)*percentage/100)

    x_train, x_test = data[:partition,:-1],  data[partition:,:-1]
    y_train, y_test = data[:partition,-1:].ravel() , data[partition:,-1:].ravel()

    clf = svm.SVC(probability=True)
    clf.fit(x_train,y_train)
    
    clf2 = svm.SVC(probability=True)
    clf2.fit(data[:len(data),:-1],data[:len(data),-1:].ravel())
    
    y_pred = clf.predict(x_test)
    
    fully_trained.append(clf2)
    feature_svm.append(clf)
    accuracy.append(accuracy_score(y_test, y_pred))
    classification_rep.append(classification_report(y_test, y_pred))

results = pd.DataFrame({'feature': feature_names, 'svm': feature_svm, 'accuracy': accuracy, 'classification report': classification_rep})

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Wall time: 10.1 s


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Print results

In [8]:
for idx,row in results.iterrows():
    print(row['feature'])
    print('accuracy: '+str(row['accuracy']))
    print(row['classification report'])
    print('\n')

head
accuracy: 0.9609120521172638
              precision    recall  f1-score   support

        head       0.96      1.00      0.98       279
        none       1.00      0.57      0.73        28

    accuracy                           0.96       307
   macro avg       0.98      0.79      0.85       307
weighted avg       0.96      0.96      0.96       307



spine
accuracy: 0.8620689655172413
              precision    recall  f1-score   support

        none       0.89      0.65      0.75        37
       spine       0.85      0.96      0.90        79

    accuracy                           0.86       116
   macro avg       0.87      0.81      0.83       116
weighted avg       0.87      0.86      0.86       116



leg
accuracy: 0.8
              precision    recall  f1-score   support

         leg       0.00      0.00      0.00         8
        none       0.80      1.00      0.89        32

    accuracy                           0.80        40
   macro avg       0.40      0.50    

In [9]:
import pickle
from pathlib import Path

svm_folder =  pwd+"/SVM/"
for clf in fully_trained:
    class_ = clf.classes_
    class_ = np.copy(class_).tolist()
    class_.remove('none')
    pkl_filename = class_[0]+'.pkl'
    with open(svm_folder+pkl_filename, 'wb+') as file:
        pickle.dump(clf, file)

In [None]:
# Load from file
with open(pkl_filename, 'rb') as file:
    pickle_model = pickle.load(file)