In [115]:
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
from os import walk

import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split

**LOAD DATASET WITH FOLDER IMAGES**

In [109]:
def load_dataset(IMAGES_PATH: str, GT_DATA: str):
    import ast # This will convert literal lists to lists
    
    data = pd.read_csv(GT_DATA)
    
    dataset = []
    
    for image_name, datalist in zip(data["filename"], data["datalist"]):   
        
        datalist = ast.literal_eval(datalist) # This will convert literal lists to lists
        
        original_image = cv.imread(IMAGES_PATH+image_name, cv.IMREAD_COLOR) # Load general image
        gray_image = cv.cvtColor(original_image, cv.COLOR_BGR2GRAY)
        
        for segment in datalist:
            x, y, width, height = segment[:4] # Crop each segment from the image
            dataset.append( (gray_image[y:y+height, x:x+width], segment[4]))
    
    X = [image[0] for image in dataset]
    y = [label[1] for label in dataset]
    
    return X, y

In [110]:
IMAGES_PATH = "dataset\\"
GT_DATA = "image_annotations.csv"

X, y = load_dataset(IMAGES_PATH, GT_DATA)


**CREATE SIFT FEATURE EXTRACTOR AND EXTRACT ALL IMAGES' DATA**

In [111]:
sift = cv.SIFT_create()
sift_features = []

for image in X:
    if len(image.shape) == 2:
        gray_image = image 
    else:
        gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

    keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    
    sift_features.append(descriptors)

sift_features = np.vstack(sift_features)

**SPLIT DATA IN TRAIN TEST**

In [112]:
X_train, X_test, y_train, y_test = train_test_split(sift_features, y, test_size=0.2, random_state=42)

**TRAIN THE SVM MODEL**

In [117]:
clf = svm.SVC(kernel='rbf', C=1.0)
clf.fit(X_train, y_train)

ValueError: Input X contains NaN.
SVC does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

**EVALUATE THE MODEL**

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")