In [1]:
import glob

import numpy as np

from skimage.io import imread
from skimage.feature import hog

from sklearn.svm import LinearSVC
from skimage import exposure
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

import time

import matplotlib.pyplot as plt
import os

from features import FeaturesRGB

Class mapping the name of the original and the augmented one

In [2]:
class LftToAuglft:
    def __init__(self, lft_filename, idx):
        self.lft_name = lft_filename
        self.self_idx = idx
        self.aug_lft_idx = []

HOG parameters, paths for dataset

In [3]:
hog_params = {
    'orientations': 11, # number of bins
    'pixels_per_cell': (8, 8), # normally (8,8), refers to 8x8 pixels are used to calculate hog
    'cells_per_block': (2, 2),
    'block_norm': 'L2-Hys'
}

ftrRGB = FeaturesRGB(hog_params)
lft_paths = glob.glob('../images/lateral/training/cropped/*.jpg')
auglft_paths = glob.glob('../images/lateral/training/augmented/*.jpg')

nonlft_paths = glob.glob('../images/non-lateral/training/*.jpg')


## Main

#### HOG for LFT

In [4]:
print("Load lateral flow test images, get HOG and add to the feature list ...")

lft_features, auglft_features, nonlft_features = [], [], []
lft_ftToAuglft = {}
lft_ftToAuglft_list = []

lft_filename = []

count = 0

print("Loading and generating HOG for LFT...")
for path in lft_paths: 
    # Get file name
    f_split = os.path.basename(path)
    name_tmp = os.path.splitext(f_split)[0]
    
    lftToAuglft_obj = LftToAuglft(name_tmp, count)
    
    lft_ftToAuglft[name_tmp] = lftToAuglft_obj
    lft_ftToAuglft_list.append(lftToAuglft_obj)
    
    #lft_ftToAuglft.append(LftToAuglft(os.path.splitext(f_split)[0]))
    
    lft_img = imread(path) 

    hog_flatten = ftrRGB.cal_hog(lft_img, 0)
    lft_features.append(hog_flatten)
    
    
    count = count + 1
    #if count == 20:
        #break
        
print("Finish, files loaded: ", count)




Load lateral flow test images, get HOG and add to the feature list ...
Loading and generating HOG for LFT...
Finish, files loaded:  148


#### Get the filename for augmented LFT

In [5]:
count = 0
auglft_filename = []

print("Loading files for augmented LFT...")
for path in auglft_paths: 
    # Get file name
    f_split = os.path.basename(path)
    name_tmp = os.path.splitext(f_split)[0]
    name_prefix_tmp = name_tmp.split('_')[0]

    
    
    if name_prefix_tmp in lft_ftToAuglft:
        lft_ftToAuglft[name_prefix_tmp].aug_lft_idx.append(len(auglft_filename))
    else:
        print("Cannot find the original image: ", name_prefix_tmp)
        break
        
        
    auglft_filename.append(name_tmp)
    
    count = count + 1

print("Finish, files loaded: ", count)  


for key, value in lft_ftToAuglft.items():    
    print(key, value.aug_lft_idx, value.self_idx)


Loading files for augmented LFT...
Finish, files loaded:  1114
hcng1 [80, 81, 82, 83, 84, 85, 86, 87] 0
hcng10 [0, 1, 2, 3, 4, 5, 6, 7] 1
hcng11 [8, 9, 10, 11, 12, 13, 14, 15] 2
hcng12 [16, 17, 18, 19, 20, 21, 22, 23] 3
hcng13 [24, 25, 26, 27, 28, 29, 30, 31] 4
hcng14 [32, 33, 34, 35, 36, 37, 38, 39] 5
hcng15 [40, 41, 42, 43, 44, 45, 46, 47] 6
hcng16 [48, 49, 50, 51, 52, 53, 54, 55] 7
hcng17 [56, 57, 58, 59, 60, 61, 62, 63] 8
hcng18 [64, 65, 66, 67, 68, 69, 70, 71] 9
hcng19 [72, 73, 74, 75, 76, 77, 78, 79] 10
hcng2 [120, 121, 122, 123, 124, 125, 126, 127] 11
hcng20 [88, 89, 90, 91, 92, 93, 94, 95] 12
hcng21 [96, 97, 98, 99, 100, 101, 102, 103] 13
hcng22 [104, 105, 106, 107, 108, 109, 110, 111] 14
hcng23 [112, 113, 114, 115, 116, 117, 118, 119] 15
hcng3 [128, 129, 130, 131, 132, 133, 134, 135] 16
hcng4 [136, 137, 138, 139, 140, 141, 142, 143] 17
hcng5 [144, 145, 146, 147, 148, 149, 150, 151] 18
hcng6 [152, 153, 154, 155, 156, 157, 158, 159] 19
hcng7 [160, 161, 162, 163, 164, 165, 166, 1

#### HOG for augmented LFT

In [6]:
count = 0
auglft_features = []

print("Loading and generating HOG for augmented LFT...")
for path in auglft_paths: 

    lft_img = imread(path) 
    
    hog_flatten = ftrRGB.cal_hog(lft_img, 0)
    auglft_features.append(hog_flatten)
    
    count += 1
    
    
print("Finish, files loaded: ", count)  

Loading and generating HOG for augmented LFT...
Finish, files loaded:  1114


#### HOG for non-LFT

In [7]:
print("Load non-lateral flow test images, get HOG and add to the feature list ...")

count = 0  
nonlft_features = []
print("Loading and generating HOG for non-LFT...")
  
for path in nonlft_paths: 
    nonlft_image = imread(path)

    hog_flatten = ftrRGB.cal_hog(nonlft_image, 0)
    
    nonlft_features.append(hog_flatten)
    
    count = count + 1
    #if count == 10:
        #break
        
print("Finish, files loaded: ", count)  


Load non-lateral flow test images, get HOG and add to the feature list ...
Loading and generating HOG for non-LFT...
Finish, files loaded:  893


### Convert to numpy

In [8]:

for i in range(0, len(nonlft_features)):
    print(i, len(nonlft_features[i]), nonlft_features[i])

0 93060 [0.22265636 0.2337267  0.11513022 ... 0.00694243 0.02176558 0.00833214]
1 93060 [0.         0.         0.         ... 0.13427199 0.11058181 0.08440961]
2 93060 [0.22540178 0.17688124 0.13790134 ... 0.23643786 0.2532686  0.06743551]
3 93060 [0.07191769 0.03821947 0.17934814 ... 0.18604419 0.26464905 0.24358729]
4 93060 [0.09854248 0.         0.11004115 ... 0.2457569  0.12657062 0.        ]
5 93060 [0.14186137 0.0186534  0.07727949 ... 0.10209894 0.22481513 0.0533024 ]
6 93060 [0.0771485  0.01347018 0.01765927 ... 0.03135949 0.09746596 0.01828557]
7 93060 [0.06532405 0.02337121 0.0880987  ... 0.13996008 0.02458848 0.        ]
8 93060 [0.2793024  0.2793024  0.2793024  ... 0.         0.02466193 0.10058457]
9 93060 [0.23185743 0.1819314  0.18683566 ... 0.1129082  0.03734183 0.14606224]
10 93060 [0.0569119  0.00495814 0.         ... 0.25277302 0.02626103 0.14720679]
11 93060 [0.18431113 0.0651564  0.09606354 ... 0.01687831 0.01071178 0.06341051]
12 93060 [0.32530134 0.18141383 0.1958

572 93060 [0.13442111 0.01002703 0.08688907 ... 0.03743528 0.         0.02521085]
573 93060 [0.02611535 0.00314816 0.00793645 ... 0.         0.         0.        ]
574 93060 [0.21371064 0.02006896 0.00213263 ... 0.         0.18717544 0.24305208]
575 93060 [0.21841259 0.02285698 0.04283916 ... 0.15477208 0.2041143  0.06026717]
576 93060 [0.05419932 0.03757227 0.01429408 ... 0.28949233 0.28949233 0.06921013]
577 93060 [0.26307541 0.0681357  0.10662595 ... 0.3348903  0.15116127 0.06638434]
578 93060 [0.1392945  0.2215472  0.0098343  ... 0.01617264 0.01542285 0.02260644]
579 93060 [0.12543688 0.06083688 0.13009204 ... 0.04312966 0.0072135  0.15845766]
580 93060 [0.28417373 0.02256092 0.02302455 ... 0.01048472 0.10148979 0.22607252]
581 93060 [0.07497208 0.         0.01006996 ... 0.0132338  0.0083574  0.        ]
582 93060 [0.02304747 0.005139   0.00245689 ... 0.0591937  0.03127092 0.03919649]
583 93060 [0.18704429 0.01294585 0.02046919 ... 0.00749812 0.00354616 0.00653329]
584 93060 [0.037

In [9]:
np_lft_features = np.asarray(lft_features)
np_auglft_features = np.asarray(auglft_features)
np_nonlft_features = np.asarray(nonlft_features)

Flatten the dataset

In [10]:
print("np_lft_features: ", np_lft_features.shape)
print("np_auglft_features: ", np_auglft_features.shape)
print("np_nonlft_features: ", np_nonlft_features.shape)

unscaled_x = np.vstack((np_lft_features, np_auglft_features, np_nonlft_features)).astype(np.float64)
print(unscaled_x.shape)

#print(unscaled_x)
scaler = StandardScaler().fit(unscaled_x)
x = scaler.transform(unscaled_x)

print(x.shape)
#print(x)

np_lft_features:  (148, 93060)
np_auglft_features:  (1114, 93060)
np_nonlft_features:  (893, 93060)
(2155, 93060)
(2155, 93060)


In [11]:
lft_cnt = np_lft_features.shape[0]
auglft_cnt = np_auglft_features.shape[0]
nonlft_cnt = np_nonlft_features.shape[0]



x_lft = x[:lft_cnt, :]
x_auglft = x[lft_cnt:auglft_cnt+lft_cnt, :] 
x_nonlft = x[auglft_cnt+lft_cnt:, :] 

print(x_lft.shape)
print(x_auglft.shape)
print(x_nonlft.shape)

#y = np.hstack((np.ones(lft_cnt), np.zeros(nonlft_cnt)))

(148, 93060)
(1114, 93060)
(893, 93060)


In [12]:
# Train test split, the train set will be split into 5-fold
x_lft_train_fold, x_lft_test = train_test_split(x_lft, test_size = 0.2, random_state = 0)
x_nonlft_train_fold, x_nonlft_test = train_test_split(x_nonlft, test_size = 0.2, random_state = 0)

# Test set
x_test = np.concatenate((x_lft_test, x_nonlft_test), axis = 0)
y_test = np.hstack((np.ones(x_lft_test.shape[0]), np.zeros(x_nonlft_test.shape[0])))


    
kf = KFold(n_splits=5, shuffle=True)

x_lft_train_idx_list = []
x_lft_val_idx_list = []

x_nonlft_train_idx_list = []
x_nonlft_val_idx_list = []


for train_idx, val_idx in kf.split(x_lft_train_fold):
    #print("TRAIN:", train_idx, "\nTEST:", val_idx)
    
    x_lft_train_idx_list.append(train_idx)
    x_lft_val_idx_list.append(val_idx)


for train_idx, val_idx in kf.split(x_nonlft_train_fold):
    #print("TRAIN:", train_idx.shape, "\nTEST:", val_idx.shape)
    
    x_nonlft_train_idx_list.append(train_idx)
    x_nonlft_val_idx_list.append(val_idx)


In [16]:
svc_list = []
svc_score_list = []

for i in range(0,5):
    
    x_lft_train = np.take(x_lft_train_fold, x_lft_train_idx_list[i], axis=0)

    print("Original shape for LFT", x_lft_train.shape)
    
    x_auglft_train = None
    
    for lft_idx in x_lft_train_idx_list[i]:
        
        # get the index of the augmented ones based on the originals
        auglft_idx = lft_ftToAuglft_list[lft_idx].aug_lft_idx
        
        # get the flatten HOG of augmented LFT in numpy array
        x_auglft_train_tmp = np.take(x_auglft, auglft_idx, axis=0)
        #print(x_auglft_train_tmp.shape)
        
        if x_auglft_train is None:
            x_auglft_train = np.array(x_auglft_train_tmp)
        else:
            #print("concat")
            x_auglft_train = np.concatenate((x_auglft_train, x_auglft_train_tmp), axis = 0)
        
        
            #print(x_auglft_train.shape)
            
    # Get the validation set for LFT
    lft_idx = x_lft_val_idx_list[i]
    x_lft_val = np.take(x_lft_train_fold, lft_idx, axis=0)

    # Get the non LFT
    #x_nonlft
    nonlft_idx = x_nonlft_train_idx_list[i]
    x_nonlft_train = np.take(x_nonlft_train_fold, nonlft_idx, axis=0)
    
    # Get the validation set for non-LFT
    lft_idx = x_nonlft_val_idx_list[i]
    x_nonlft_val = np.take(x_nonlft_train_fold, lft_idx, axis=0)


    # Combine the augmented dataset with the normal LFT
    x_lft_cmb_train = np.concatenate((x_lft_train, x_auglft_train), axis = 0)

    print("(Training) LFT + Augmented LFT shape: ", x_lft_cmb_train.shape, "Non-LFT shape: ", x_nonlft_train.shape)
    print("(Validate) LFT shape: ", x_lft_val.shape, "Non-LFT shape: ", x_nonlft_val.shape)
    
    
    # Splitting Training and Validation set
    x_train = np.concatenate((x_lft_cmb_train, x_nonlft_train), axis = 0)
    x_val = np.concatenate((x_lft_val, x_nonlft_val), axis = 0)

    y_train = np.hstack((np.ones(x_lft_cmb_train.shape[0]), np.zeros(x_nonlft_train.shape[0])))
    y_val = np.hstack((np.ones(x_lft_val.shape[0]), np.zeros(x_nonlft_val.shape[0])))

    #print("x_train.shape: ", x_train.shape, "x_val.shape: ", x_val.shape)
    #print("y_train.shape: ",y_train.shape, "y_val.shape: ", y_val.shape)
    
    
    print("Training classifier ", i+1, "Times")
    t_start = time.time()

    svc = LinearSVC()
    svc.fit(x_train, y_train)
    accuracy = svc.score(x_val, y_val)

    print("...Done")
    print("Time Taken:", np.round(time.time() - t_start, 2))
    print("Accuracy: ", np.round(accuracy, 4))
    
    svc_list.append(svc)
    svc_score_list.append(accuracy)


Original shape for LFT (94, 93060)
(Training) LFT + Augmented LFT shape:  (776, 93060) Non-LFT shape:  (571, 93060)
(Validate) LFT shape:  (24, 93060) Non-LFT shape:  (143, 93060)
Training classifier  1 Times
...Done
Time Taken: 10.28
Accuracy:  0.982
Original shape for LFT (94, 93060)
(Training) LFT + Augmented LFT shape:  (812, 93060) Non-LFT shape:  (571, 93060)
(Validate) LFT shape:  (24, 93060) Non-LFT shape:  (143, 93060)
Training classifier  2 Times
...Done
Time Taken: 13.8
Accuracy:  0.988
Original shape for LFT (94, 93060)
(Training) LFT + Augmented LFT shape:  (788, 93060) Non-LFT shape:  (571, 93060)
(Validate) LFT shape:  (24, 93060) Non-LFT shape:  (143, 93060)
Training classifier  3 Times
...Done
Time Taken: 12.69
Accuracy:  0.994
Original shape for LFT (95, 93060)
(Training) LFT + Augmented LFT shape:  (805, 93060) Non-LFT shape:  (571, 93060)
(Validate) LFT shape:  (23, 93060) Non-LFT shape:  (143, 93060)
Training classifier  4 Times
...Done
Time Taken: 12.84
Accuracy: 

Get the max accuracy

In [17]:
svc_max_idx = svc_score_list.index(max(svc_score_list))

accuracy = svc_list[svc_max_idx].score(x_test, y_test)
print("Accuracy: ", np.round(accuracy, 4))

Accuracy:  0.9809


Saving the model

In [18]:
print ("Saving models...")
import joblib

joblib.dump(svc, '../svc_validated.pkl')
joblib.dump(scaler, '../scaler_validated.pkl')

print("...Done")

Saving models...
...Done


## Legacy code

In [None]:
y = np.hstack((np.ones(lft_cnt + auglft_cnt), np.zeros(nonlft_cnt)))


print(" x shape: ", x.shape, " y shape: ", y.shape)

In [None]:
print("Training classifier...")
t_start = time.time()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2,
                                                    random_state =0)

svc = LinearSVC()
svc.fit(x_train, y_train)
accuracy = svc.score(x_test, y_test)

print("...Done")
print("Time Taken:", np.round(time.time() - t_start, 2))
print("Accuracy: ", np.round(accuracy, 4))

In [None]:
print ("Saving models...")
import joblib

joblib.dump(svc, '../svc2.pkl')
joblib.dump(scaler, '../scaler2.pkl')

print("...Done")

In [None]:
        """
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        
        ax[0].axis('off')
        ax[0].title.set_text('Original')
        #R_hog_image_rescaled = exposure.rescale_intensity(self.R_hog_image, in_range=(0, 10))
        #ax[0].imshow(R_hog_image_rescaled, cmap=plt.cm.gray)
        ax[0].imshow(self.RGB_img)
        
        ax[1].axis('off')
        ax[1].title.set_text('HOG (pixels_per_cell 8x8)')
        R_hog_image_rescaled = exposure.rescale_intensity(self.R_hog_image, in_range=(0, 10))
        ax[1].imshow(R_hog_image_rescaled, cmap=plt.cm.gray)
        
        #ax[2].axis('off')
        #B_hog_image_rescaled = exposure.rescale_intensity(self.B_hog_image, in_range=(0, 10))
        #ax[2].imshow(B_hog_image_rescaled, cmap=plt.cm.gray)  
        """