In [1]:
import cv2
import numpy as np
import os
import glob
import math

import mahotas as mt
import pandas as pd

from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss, make_scorer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score,confusion_matrix

from urllib.request import urlopen
exec(urlopen("https://raw.githubusercontent.com/fhebert/CascadeSVC/main/CascadeSVC.py").read())

In [2]:
# load the training dataset
path = "..\Datasets\Colombiam\\svm\\thyroid-crop-small-background-square-160\\"
path_names = os.listdir(path)

# empty list to hold feature vectors and labels
features_arr = []
labels_arr = []

In [3]:
# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
        # calculate haralick texture features for 4 types of adjacency
        textures = mt.features.haralick(image)

        # take the mean of it and return it
        ht_mean = textures.mean(axis=0)

        #for i in range(0,13):
               #ht_mean[i] = -1* math.copysign(1.0, ht_mean[i]) *  math.log10(abs(ht_mean[i]))
        return ht_mean

In [4]:
# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
    # Calculate Hu Moments.
    feature = cv2.HuMoments(cv2.moments(image))
    #for i in range(0,7):
        #feature[i] = -1* math.copysign(1.0, feature[i]) *  math.log10(abs(feature[i]))
    return feature.flatten()

In [5]:
# loop over the training dataset
print ("[STATUS] Started extracting haralick textures and hu moments shape..")
for path_name in path_names:
        cur_path = path + "/" + path_name
        cur_label = path_name
        i = 1
        for file in glob.glob(cur_path + "/*.jpg"):
                #print ("Processing Image - {} in {}".format(i, cur_label))
                # read the training image
                image = cv2.imread(file)

                # convert the image to grayscale
                #gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                gray = cv2.imread(file,cv2.IMREAD_GRAYSCALE)

                # extract features from image
                fv_hu_moments = fd_hu_moments(gray)
                fv_haralick   = fd_haralick(gray)

                # concatened global features
                #features = np.hstack([fv_haralick])
                features = np.hstack(gray)

                # append the feature vector and label
                features_arr.append(features)
                labels_arr.append(cur_label)

                # show loop update
                i += 1
        print("[STATUS] processed folder: {}".format(cur_label))

[STATUS] Started extracting haralick textures and hu moments shape..
[STATUS] processed folder: 1
[STATUS] processed folder: 2
[STATUS] processed folder: 3
[STATUS] processed folder: 4-4a
[STATUS] processed folder: 5-4b
[STATUS] processed folder: 6-4c
[STATUS] processed folder: 7-5


In [6]:
# encode the target labels
targetNames = np.unique(labels_arr)
le          = LabelEncoder()
labels      = le.fit_transform(labels_arr)
print("[STATUS] labels encoded...")

# scale features in the range (0-1)
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(features_arr) #scaler.fit_transform(features_arr) #features_arr
print("[STATUS] feature vector normalized...")

print("[STATUS] target labels: {}".format(labels))
print("[STATUS] target labels shape: {}".format(labels.shape))

data=np.array(rescaled_features)
target=np.array(labels)
df=pd.DataFrame(data)
df['Target']=target
df

[STATUS] labels encoded...
[STATUS] feature vector normalized...
[STATUS] target labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,Target
0,0.003246,0.265041,0.680229,0.147665,0.178968,0.420804,0.143620,0.627777,0.676134,0.185943,0.518131,0.590165,0.761525,0
1,0.003193,0.378474,0.407834,0.091730,0.099778,0.524050,0.080808,0.519818,0.683701,0.187433,0.635388,0.806729,0.467838,0
2,0.001432,0.619914,0.231794,0.126537,0.033474,0.860142,0.104730,0.586723,0.823374,0.044169,0.828653,0.877482,0.386063,0
3,0.001394,0.558610,0.333435,0.135810,0.043171,0.710035,0.117208,0.612195,0.823778,0.066237,0.791825,0.834781,0.477711,0
4,0.002771,0.389983,0.575185,0.161108,0.119911,0.590239,0.151318,0.621828,0.735298,0.128186,0.640629,0.681262,0.669299,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,0.001347,0.503214,0.717656,0.338695,0.120165,0.584086,0.327736,0.807701,0.862958,0.103850,0.726039,0.523632,0.846648,5
455,0.001630,0.444760,0.637941,0.229218,0.129173,0.556929,0.218429,0.715555,0.809286,0.131799,0.664959,0.606797,0.741703,5
456,0.001314,0.475834,0.610967,0.228114,0.103216,0.514228,0.215787,0.711441,0.830039,0.117128,0.705506,0.648842,0.716991,5
457,0.005560,0.278408,0.324223,0.035212,0.147011,0.394345,0.027773,0.377811,0.535130,0.250924,0.528742,0.862563,0.300231,5


In [7]:
x=df.iloc[:,:-1]
y=df.iloc[:,-1]
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=77,stratify=y)
print('Splitted Successfully')

# have a look at the size of our feature vector and labels
print ("Training features: {}".format(np.array(x_train).shape))
print ("Training labels: {}".format(np.array(y_train).shape))
print ("Test features: {}".format(np.array(x_test).shape))
print ("Test labels: {}".format(np.array(y_test).shape))

Splitted Successfully
Training features: (344, 13)
Training labels: (344,)
Test features: (115, 13)
Test labels: (115,)


In [8]:
#log_loss_score = make_scorer(score_func=log_loss,greater_is_better=True,needs_proba=True)
#
#csvm = Pipeline([("scaler",StandardScaler()),
#                 ("csvm",CascadeSVC(fold_size=10000,verbose=False,
#                                    kernel="rbf",probability=True))])
#
## Variar o grid
#model = GridSearchCV(estimator=csvm,
#                          param_grid={"csvm__C" : [120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140], #10,100,1000,10000
#                                      "csvm__gamma" : [0.0001,0.001,0.1,1]}, #0.0001,0.001,0.1,1
#                          cv=5,n_jobs=5,verbose=10,scoring=log_loss_score,refit=True)
#
#model.fit(x_train, y_train)
#
#par1 = model.best_params_
#res1 = pd.DataFrame({
#    "C" : model.cv_results_["param_csvm__C"],
#    "gamma" : model.cv_results_["param_csvm__gamma"],
#    "mean_test_score" : model.cv_results_["mean_test_score"],
#    "std_test_score" : model.cv_results_["std_test_score"]
#})
#print(par1)
#print(res1)

In [9]:
# Grid Parameters
#param_grid={'C':[9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,9915,9916,9917,9918,9919,9920,9921,9922,9923,9924,9925,9926,9927,9928,9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944,9945,9946,9947,9948,9949,9950,9951,9952,9953,9954,9955,9956,9957,9958,9959,9960,9961,9962,9963,9964,9965,9966,9967,9968,9969,9970,9971,9972,9973,9974,9975,9976,9977,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987,9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019,10020,10021,10022,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033,10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064,10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080,10081,10082,10083,10084,10085,10086,10087,10088,10089,10090,10091,10092,10093,10094,10095,10096,10097,10098,10099,10100],'gamma':[0.1,],'kernel':['rbf']} #'kernel':['rbf','poly'] [0.0001,0.001,0.1,1] #0.1,1,10,100,1000,10000
#param_grid={'C':[0.1,1,10,100,1000,10000]}
param_grid={'C':[0.1,1,10,100,1000,10000],'gamma':[0.0001,0.001,0.1,1],'kernel':['rbf']} #'kernel':['rbf','poly'] [0.0001,0.001,0.1,1] #0.1,1,10,100,1000,10000

# create the classifier
print ("[STATUS] Creating the classifier..")
#clf_svm = LinearSVC(dual=False, random_state=77)
clf_svm = SVC(probability=True, cache_size=7000)
model=GridSearchCV(clf_svm,param_grid)

# fit the training data and labels
print ("[STATUS] Fitting data/label to model..")
model.fit(x_train, y_train)

# Show the best model
print('The Model is trained well with the given images')
model.best_params_


[STATUS] Creating the classifier..
[STATUS] Fitting data/label to model..
The Model is trained well with the given images


{'C': 10, 'gamma': 1, 'kernel': 'rbf'}

In [10]:
y_pred=model.predict(x_test)
print("The predicted Data is :")
y_pred

The predicted Data is :


array([2, 3, 2, 2, 3, 2, 2, 3, 3, 4, 2, 3, 2, 2, 2, 2, 3, 3, 3, 2, 3, 2,
       4, 3, 3, 3, 4, 2, 2, 2, 2, 3, 2, 3, 2, 5, 3, 3, 5, 2, 2, 2, 3, 3,
       2, 0, 2, 2, 2, 3, 3, 3, 2, 4, 3, 2, 3, 3, 3, 4, 2, 2, 3, 5, 2, 4,
       3, 2, 4, 2, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 4, 2, 2, 2, 3, 5,
       3, 3, 2, 2, 2, 2, 3, 3, 3, 2, 5, 3, 2, 3, 2, 4, 2, 3, 2, 4, 3, 3,
       3, 2, 2, 2, 3], dtype=int64)

In [11]:
print("The actual data is:")
np.array(y_test)

The actual data is:


array([4, 4, 5, 0, 2, 4, 3, 2, 1, 2, 2, 0, 3, 3, 2, 2, 5, 3, 2, 2, 0, 3,
       2, 2, 5, 2, 2, 3, 0, 2, 2, 4, 0, 3, 3, 3, 3, 4, 3, 5, 4, 3, 5, 3,
       5, 0, 4, 2, 2, 4, 1, 3, 3, 3, 2, 4, 1, 5, 0, 4, 3, 3, 5, 2, 2, 5,
       3, 2, 2, 3, 2, 4, 3, 3, 0, 4, 2, 2, 5, 4, 2, 4, 3, 5, 2, 2, 2, 2,
       2, 4, 4, 3, 4, 4, 1, 5, 2, 1, 3, 5, 2, 4, 2, 0, 5, 0, 3, 0, 4, 4,
       0, 4, 3, 2, 3], dtype=int64)

In [12]:
#classification_report(y_pred,y_test)
print(f"The model is {accuracy_score(y_pred,y_test)*100}% accurate")
print(confusion_matrix(y_pred,y_test))
print(classification_report(y_pred,y_test))


The model is 22.608695652173914% accurate
[[ 1  0  1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 3  1 16 15 10  5]
 [ 6  4 11  8 11  8]
 [ 2  0  4  2  1  1]
 [ 0  0  2  3  0  0]]
              precision    recall  f1-score   support

           0       0.08      0.50      0.14         2
           1       0.00      0.00      0.00         0
           2       0.47      0.32      0.38        50
           3       0.29      0.17      0.21        48
           4       0.05      0.10      0.06        10
           5       0.00      0.00      0.00         5

    accuracy                           0.23       115
   macro avg       0.15      0.18      0.13       115
weighted avg       0.33      0.23      0.26       115



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
# loop over the test images
#test_path = "..\Datasets\Colombiam\\svm\\thyroid-crop-small-background-square-160-tri\\6-4c\\"
#for file in glob.glob(test_path + "/*.jpg"):
#        # read the input image
#        image = cv2.imread(file)
#
#        # convert to grayscale
#        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#
#        # extract haralick texture from the image
#        features = extract_features(gray)
#
#        # evaluate the model and predict label
#        prediction = clf_svm.predict(features.reshape(1, -1))[0]
#
#        # show the label
#        cv2.putText(image, prediction, (20,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,255), 3)
#
#        # display the output image
#        cv2.imshow("Test_Image", image)
#        cv2.waitKey(0)