<a href="https://colab.research.google.com/github/Vernalhav/image_processing_project/blob/main/dataset_creator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import imutils
import mahotas as mt

In [None]:
def extract_color_histogram(image, bins=(2, 2, 2)):
	# extract a 3D color histogram from the HSV color space using
	# the supplied number of `bins` per channel
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
		[0, 180, 0, 256, 0, 256])
 
	# handle normalizing the histogram if we are using OpenCV 2.4.X
	if imutils.is_cv2():
		hist = cv2.normalize(hist)
	# otherwise, perform "in place" normalization in OpenCV 3 (I
	# personally hate the way this is done
	else:
		cv2.normalize(hist, hist)
  
	# return the flattened histogram as the feature vector
	return hist.flatten()

def extract_sobel(image):
    NUM_FILTERS = 16
    gabor = []
    
    for i in range(NUM_FILTERS):
        gabor.append(cv2.getGaborKernel((5, 5), 8.0, (np.pi/NUM_FILTERS) * (i), 5.0, 0.5, 0, ktype=cv2.CV_32F))

    filtered = []
    
    for i in range(NUM_FILTERS):
        filtered_img = cv2.filter2D(image,3,gabor[i])
        filtered_img = ((filtered_img/np.max(filtered_img))*255).astype(np.uint8)
        filtered_img = cv2.cvtColor(filtered_img, cv2.COLOR_BGR2GRAY)
        filtered_img = cv2.bitwise_not(filtered_img)
        filtered.append(cv2.resize(filtered_img, (10,10)))

    return np.array(filtered)

def extract_haralick(image):
        # calculate haralick texture features for 4 types of adjacency
        textures = mt.features.haralick(image)

        # take the mean of it and return it
        ht_mean = textures.mean(axis=0)
        return ht_mean

def characterize(img):
    img = cv2.resize(img,(30,30))
    
    #extract color and shape features
    colors = extract_color_histogram(img)
    
    # shapes = extract_haralick(img)    
    shapes = extract_sobel(img)

    features = []
    features = np.concatenate((features, colors.flatten()), axis=None)

    for shape in shapes:
        features = np.concatenate((features, shape.flatten()), axis=None)
    
    return features

## Reading images and extracting features

In [None]:
IMGS_PATH = "/drive/Shareddrives/PDI/Dataset/Segmented"

classes = ['Dead', "Alive"]

## creating dataset
dataset = []
labels = []
for label in classes:
    img_folder = os.path.join(IMGS_PATH, label)
    for img_name in os.listdir(img_folder):
        img_path = os.path.join(img_folder, img_name)
        img = cv2.imread(img_path)
        features = characterize(img)
        dataset.append(features)
        labels.append(label)

dataset = np.array(dataset)
labels = np.array(labels)

## Preparing training dataset

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set()

X_train, X_test, y_train, y_test = train_test_split(dataset, labels, random_state=2,test_size=0.4, stratify=labels)

## KNN

In [None]:
knn = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
knn.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)
# print(y_pred_knn, y_test)

print("Confustion Matrix: ", confusion_matrix(y_test, y_pred_knn))
print("Accuracy: ", accuracy_score(y_test, y_pred_knn))
print("Precision for each class: ", precision_score(y_test, y_pred_knn, average=None))
print("Recall for each class: ", recall_score(y_test, y_pred_knn, average=None))
print("F1 score for each class: ", f1_score(y_test, y_pred_knn, average=None))

Confustion Matrix:  [[ 5  2]
 [ 0 34]]
Accuracy:  0.9512195121951219
Precision for each class:  [1.         0.94444444]
Recall for each class:  [0.71428571 1.        ]
F1 score for each class:  [0.83333333 0.97142857]


## SVC


In [None]:
from sklearn.svm import LinearSVC

clf_svm = LinearSVC(random_state=9)

clf_svm.fit(X_train, y_train)

y_pred_svc = clf_svm.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred_svc))
print("Precision for each class: ", precision_score(y_test, y_pred_svc, average=None))
print("Recall for each class: ", recall_score(y_test, y_pred_svc, average=None))
print("F1 score for each class: ", f1_score(y_test, y_pred_svc, average=None))

Accuracy:  0.9512195121951219
Precision for each class:  [1.         0.94444444]
Recall for each class:  [0.71428571 1.        ]
F1 score for each class:  [0.83333333 0.97142857]


## Saving the best model

In [None]:
#saving the model into a file
import pickle
MODEL_PATH = "/drive/Shareddrives/PDI/Dataset"
knnPickle = open(os.path.join(MODEL_PATH, 'knn_strat_sobel_.pickle'), 'wb') 
pickle.dump(knn, knnPickle) 
knnPickle.close()