# CS 419 - ASSIGNMENT 3

Berkay Barış Turan - 28132

### Importing Libraries

In [1]:
import os
import numpy as np
import cv2
from scipy.spatial.distance import euclidean, cityblock, mahalanobis
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score
from numpy.linalg import inv
import mahotas as mh


### Helper Functions

In [2]:
def load_dataset(directory):
    data, labels = [], []
    for filename in os.listdir(directory):
        if filename.endswith('.png'):
            img = cv2.imread(os.path.join(directory, filename), cv2.IMREAD_GRAYSCALE)
            label = filename.split('-')[0]
            data.append(img)
            labels.append(label)
    return data, labels

In [3]:
# Load the data
train_data, train_labels = load_dataset('mpeg7shapeB/train')
test_data, test_labels = load_dataset('mpeg7shapeB/test')

### Step 1: Distance Calculations

In [4]:
def mahalanobis_distance(u, v, VI):
    return mahalanobis(u, v, VI)

def extract_features(image):
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return np.zeros(3)  
    largest_contour = max(contours, key=cv2.contourArea)
    moments = cv2.moments(largest_contour)
    area = moments['m00']
    perimeter = cv2.arcLength(largest_contour, True)
    aspect_ratio = float(image.shape[1]) / image.shape[0]
    return np.array([area, perimeter, aspect_ratio])

def extract_features_from_dataset(dataset):
    features = []
    for image in dataset:
        feature = extract_features(image)
        features.append(feature)
    return features

def nearest_neighbor_classify(test_sample, train_samples, train_labels, distance_func):
    closest_label = None
    min_distance = float('inf')
    for train_sample, label in zip(train_samples, train_labels):
        distance = distance_func(test_sample, train_sample)
        if distance < min_distance:
            min_distance = distance
            closest_label = label
    return closest_label

def classify_and_evaluate(train_features, train_labels, test_features, test_labels, distance_func):
    predicted_labels = []
    for test_feature in test_features:
        predicted_label = nearest_neighbor_classify(test_feature, train_features, train_labels, distance_func)
        predicted_labels.append(predicted_label)
    accuracy = accuracy_score(test_labels, predicted_labels)
    return accuracy

def classify_and_evaluate2(train_features, train_labels, test_features, test_labels, distance_func, inv_cov_matrix=None):
    predicted_labels = []
    for test_feature in test_features:
        if inv_cov_matrix is not None:
            predicted_label = nearest_neighbor_classify(test_feature, train_features, train_labels, lambda u, v: distance_func(u, v, inv_cov_matrix))
        else:
            predicted_label = nearest_neighbor_classify(test_feature, train_features, train_labels, distance_func)
        predicted_labels.append(predicted_label)
    accuracy = accuracy_score(test_labels, predicted_labels)
    return accuracy

def chi_squared_distance(vec1, vec2):
    return np.sum((vec1 - vec2)**2 / (vec1 + vec2 + 1e-10))

def fourier_descriptors(contour, num_coeffs):
    contour = contour.squeeze()
    min_length = num_coeffs * 2
    if contour.shape[0] < min_length:
        padding = np.zeros((min_length - contour.shape[0], 2))
        contour = np.vstack((contour, padding))
    contour_complex = contour[:, 0] + 1j * contour[:, 1]
    fourier_result = np.fft.fft(contour_complex)
    fourier_result = fourier_result / np.abs(fourier_result[0])
    fd = np.abs(fourier_result[:num_coeffs])
    return fd

def extract_features2(image, num_coeffs):
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return np.zeros(num_coeffs)
    largest_contour = max(contours, key=cv2.contourArea)
    fd = fourier_descriptors(largest_contour, num_coeffs)
    return fd

In [5]:
train_features = extract_features_from_dataset(train_data)
test_features = extract_features_from_dataset(test_data)

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)

cov_matrix = np.cov(np.array(train_features_scaled).T)
inv_cov_matrix = inv(cov_matrix)

scaler = MinMaxScaler()
train_features_norm = scaler.fit_transform(train_features)
test_features_norm = scaler.transform(test_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, lambda u, v: mahalanobis_distance(u, v, inv_cov_matrix))
accuracy_chi_squared = classify_and_evaluate(train_features_norm, train_labels, test_features_norm, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance: {accuracy_chi_squared}')


Accuracy with Euclidean distance: 0.5071428571428571
Accuracy with Manhattan distance: 0.5171428571428571
Accuracy with Mahalanobis distance: 0.5014285714285714
Accuracy with Chi-squared distance: 0.5


### Step 2: Basic Shape Descriptors

#### a. Area

In [6]:
def calculate_area_descriptor(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(largest_contour)
        return area
    else:
        return 0 

In [7]:
def calculate_area_descriptor(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(largest_contour)
        return area
    else:
        return 0 
        
train_features = [calculate_area_descriptor(image) for image in train_data]
test_features = [calculate_area_descriptor(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

expanded_train_features = np.hstack([train_features_scaled, train_features_scaled + np.random.randn(*train_features_scaled.shape) * 0.01])
expanded_test_features = np.hstack([test_features_scaled, test_features_scaled + np.random.randn(*test_features_scaled.shape) * 0.01])

cov_matrix = np.cov(expanded_train_features.T)
regularized_cov_matrix = cov_matrix + 1e-5 * np.eye(cov_matrix.shape[0])
inv_cov_matrix = inv(regularized_cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(expanded_train_features, train_labels, expanded_test_features, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with area descriptor: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with area descriptor: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with area descriptor: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with area descriptor: {accuracy_chi_squared}')

Accuracy with Euclidean distance with area descriptor: 0.18714285714285714
Accuracy with Manhattan distance with area descriptor: 0.18714285714285714
Accuracy with Mahalanobis distance with area descriptor: 0.07285714285714286
Accuracy with Chi-squared distance with area descriptor: 0.18714285714285714


#### b. Perimeter

In [8]:
def calculate_perimeter_descriptor(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        perimeter = cv2.arcLength(largest_contour, True)
        return perimeter
    else:
        return 0  

In [9]:
train_features = [calculate_perimeter_descriptor(image) for image in train_data]
test_features = [calculate_perimeter_descriptor(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

expanded_train_features = np.hstack([train_features_scaled, train_features_scaled + np.random.randn(*train_features_scaled.shape) * 0.01])
expanded_test_features = np.hstack([test_features_scaled, test_features_scaled + np.random.randn(*test_features_scaled.shape) * 0.01])

cov_matrix = np.cov(expanded_train_features.T)
regularized_cov_matrix = cov_matrix + 1e-5 * np.eye(cov_matrix.shape[0])
inv_cov_matrix = inv(regularized_cov_matrix)

accuracy_mahalanobis = classify_and_evaluate2(expanded_train_features, train_labels, expanded_test_features, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with perimeter descriptor: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with perimeter descriptor: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with perimeter descriptor: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with perimeter descriptor: {accuracy_chi_squared}')

Accuracy with Euclidean distance with perimeter descriptor: 0.1457142857142857
Accuracy with Manhattan distance with perimeter descriptor: 0.1457142857142857
Accuracy with Mahalanobis distance with perimeter descriptor: 0.09857142857142857
Accuracy with Chi-squared distance with perimeter descriptor: 0.1457142857142857


#### c. Convexity

In [10]:
def calculate_convexity_descriptor(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        object_area = cv2.contourArea(largest_contour)
        hull = cv2.convexHull(largest_contour)
        hull_area = cv2.contourArea(hull)
        if hull_area == 0:
            return 0
        convexity = object_area / hull_area
        return convexity
    else:
        return 0

In [11]:
train_features = [calculate_convexity_descriptor(image) for image in train_data]
test_features = [calculate_convexity_descriptor(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

expanded_train_features = np.hstack([train_features_scaled, train_features_scaled + np.random.randn(*train_features_scaled.shape) * 0.01])
expanded_test_features = np.hstack([test_features_scaled, test_features_scaled + np.random.randn(*test_features_scaled.shape) * 0.01])

cov_matrix = np.cov(expanded_train_features.T)
regularized_cov_matrix = cov_matrix + 1e-5 * np.eye(cov_matrix.shape[0])
inv_cov_matrix = inv(regularized_cov_matrix)

accuracy_mahalanobis = classify_and_evaluate2(expanded_train_features, train_labels, expanded_test_features, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)
print(f'Accuracy with Euclidean distance with convexity descriptor: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with convexity descriptor: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with convexity descriptor: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with convexity descriptor: {accuracy_chi_squared}')
    

Accuracy with Euclidean distance with convexity descriptor: 0.13
Accuracy with Manhattan distance with convexity descriptor: 0.13
Accuracy with Mahalanobis distance with convexity descriptor: 0.07285714285714286
Accuracy with Chi-squared distance with convexity descriptor: 0.13


#### d. Circularity

In [12]:
def calculate_circularity_descriptor(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        object_area = cv2.contourArea(largest_contour)
        perimeter = cv2.arcLength(largest_contour, True)
        if perimeter == 0:
            return 0
        circularity = (4 * np.pi * object_area) / (perimeter ** 2)
        return circularity
    else:
        return 0

In [13]:
train_features = [calculate_circularity_descriptor(image) for image in train_data]
test_features = [calculate_circularity_descriptor(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

expanded_train_features = np.hstack([train_features_scaled, train_features_scaled + np.random.randn(*train_features_scaled.shape) * 0.01])
expanded_test_features = np.hstack([test_features_scaled, test_features_scaled + np.random.randn(*test_features_scaled.shape) * 0.01])

cov_matrix = np.cov(expanded_train_features.T)
regularized_cov_matrix = cov_matrix + 1e-5 * np.eye(cov_matrix.shape[0])
inv_cov_matrix = inv(regularized_cov_matrix)

accuracy_mahalanobis = classify_and_evaluate2(expanded_train_features, train_labels, expanded_test_features, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)
print(f'Accuracy with Euclidean distance with circularity descriptor: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with circularity descriptor: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with circularity descriptor: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with circularity descriptor: {accuracy_chi_squared}')
    

Accuracy with Euclidean distance with circularity descriptor: 0.13142857142857142
Accuracy with Manhattan distance with circularity descriptor: 0.13142857142857142
Accuracy with Mahalanobis distance with circularity descriptor: 0.06285714285714286
Accuracy with Chi-squared distance with circularity descriptor: 0.13142857142857142


#### e. Rectangularity

In [14]:
def calculate_rectangularity_descriptor(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        object_area = cv2.contourArea(largest_contour)
        x, y, w, h = cv2.boundingRect(largest_contour)
        bounding_rect_area = w * h

        if bounding_rect_area == 0:
            return 0

        rectangularity = object_area / bounding_rect_area
        return rectangularity
    else:
        return 0

In [15]:
train_features = [calculate_rectangularity_descriptor(image) for image in train_data]
test_features = [calculate_rectangularity_descriptor(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

expanded_train_features = np.hstack([train_features_scaled, train_features_scaled + np.random.randn(*train_features_scaled.shape) * 0.01])
expanded_test_features = np.hstack([test_features_scaled, test_features_scaled + np.random.randn(*test_features_scaled.shape) * 0.01])

cov_matrix = np.cov(expanded_train_features.T)
regularized_cov_matrix = cov_matrix + 1e-5 * np.eye(cov_matrix.shape[0])
inv_cov_matrix = inv(regularized_cov_matrix)

accuracy_mahalanobis = classify_and_evaluate2(expanded_train_features, train_labels, expanded_test_features, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with rectangularity descriptor: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distancewith rectangularity descriptor: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with rectangularity descriptor: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with rectangularity descriptor: {accuracy_chi_squared}')

Accuracy with Euclidean distance with rectangularity descriptor: 0.11
Accuracy with Manhattan distancewith rectangularity descriptor: 0.11
Accuracy with Mahalanobis distance with rectangularity descriptor: 0.08142857142857143
Accuracy with Chi-squared distance with rectangularity descriptor: 0.11


#### f. Eccentricity

In [16]:
def calculate_eccentricity_descriptor(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        if len(largest_contour) >= 5: 
            ellipse = cv2.fitEllipse(largest_contour)
            (center, axes, orientation) = ellipse
            major_axis_length = max(axes)
            minor_axis_length = min(axes)
            eccentricity = np.sqrt(1 - (minor_axis_length / major_axis_length) ** 2)
            return eccentricity
        else:
            return 0
    else:
        return 0 

In [17]:
train_features = [calculate_eccentricity_descriptor(image) for image in train_data]
test_features = [calculate_eccentricity_descriptor(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

expanded_train_features = np.hstack([train_features_scaled, train_features_scaled + np.random.randn(*train_features_scaled.shape) * 0.01])
expanded_test_features = np.hstack([test_features_scaled, test_features_scaled + np.random.randn(*test_features_scaled.shape) * 0.01])

cov_matrix = np.cov(expanded_train_features.T)
regularized_cov_matrix = cov_matrix + 1e-5 * np.eye(cov_matrix.shape[0])
inv_cov_matrix = inv(regularized_cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(expanded_train_features, train_labels, expanded_test_features, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(np.array(train_features).reshape(-1, 1))
test_features_scaled = scaler.transform(np.array(test_features).reshape(-1, 1))
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with eccentricity descriptor: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with eccentricity descriptor: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with eccentricity descriptor: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with eccentricity descriptor: {accuracy_chi_squared}')
    

Accuracy with Euclidean distance with eccentricity descriptor: 0.12571428571428572
Accuracy with Manhattan distance with eccentricity descriptor: 0.12571428571428572
Accuracy with Mahalanobis distance with eccentricity descriptor: 0.06714285714285714
Accuracy with Chi-squared distance with eccentricity descriptor: 0.12571428571428572


### Step 3: Fourier Descriptor

In [18]:
for i in range(1,5):
    num_coeffs = 5*i
    print(f'num coefficient: {num_coeffs}')
    
    train_features = [extract_features2(image, num_coeffs) for image in train_data]
    test_features = [extract_features2(image, num_coeffs) for image in test_data]

    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    test_features_scaled = scaler.transform(test_features)

    cov_matrix = np.cov(np.array(train_features_scaled).T)
    inv_cov_matrix = inv(cov_matrix)

    scaler = MinMaxScaler()
    train_features_norm = scaler.fit_transform(train_features)
    test_features_norm = scaler.transform(test_features)

    accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
    accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)
    accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)
    accuracy_chi_squared = classify_and_evaluate(train_features_norm, train_labels, test_features_norm, test_labels, chi_squared_distance)
    
    print(f'Accuracy with Euclidean distance with fourier descriptor: {accuracy_euclidean}')
    print(f'Accuracy with Manhattan distance with fourier descriptor: {accuracy_manhattan}')
    print(f'Accuracy with Mahalanobis distance with fourier decriptor: {accuracy_mahalanobis}')
    print(f'Accuracy with Chi-squared distance with fourier descriptor: {accuracy_chi_squared}')

num coefficient: 5
Accuracy with Euclidean distance with fourier descriptor: 0.3628571428571429
Accuracy with Manhattan distance with fourier descriptor: 0.36428571428571427
Accuracy with Mahalanobis distance with fourier decriptor: 0.3142857142857143
Accuracy with Chi-squared distance with fourier descriptor: 0.36142857142857143
num coefficient: 10
Accuracy with Euclidean distance with fourier descriptor: 0.5057142857142857
Accuracy with Manhattan distance with fourier descriptor: 0.5
Accuracy with Mahalanobis distance with fourier decriptor: 0.41
Accuracy with Chi-squared distance with fourier descriptor: 0.4785714285714286
num coefficient: 15
Accuracy with Euclidean distance with fourier descriptor: 0.5057142857142857
Accuracy with Manhattan distance with fourier descriptor: 0.4928571428571429
Accuracy with Mahalanobis distance with fourier decriptor: 0.34
Accuracy with Chi-squared distance with fourier descriptor: 0.4685714285714286
num coefficient: 20
Accuracy with Euclidean dista

### Step 4: Shape Histograms

In [19]:
def calculate_shape_histogram(contour, bins=10):
    M = cv2.moments(contour)
    cx = int(M['m10'] / M['m00'])
    cy = int(M['m01'] / M['m00'])
    centroid = np.array([cx, cy])

    distances = np.sqrt(np.sum((contour - centroid) ** 2, axis=2))
    histogram, _ = np.histogram(distances, bins=bins, range=[0, np.max(distances)])
    histogram = histogram / np.sum(histogram) 

    return histogram

def extract_features(image, bins=8):
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return np.zeros(bins)
    largest_contour = max(contours, key=cv2.contourArea)
    oh = calculate_shape_histogram(largest_contour, bins=bins)
    return oh    

def regularized_inverse(cov_matrix, alpha=1e-5):
    return inv(cov_matrix + alpha * np.eye(cov_matrix.shape[0]))

def classify_and_evaluate(train_features, train_labels, test_features, test_labels, distance_func, inv_cov_matrix=None):
    predicted_labels = []
    for test_feature in test_features:
        if inv_cov_matrix is not None:
            predicted_label = nearest_neighbor_classify(test_feature, train_features, train_labels, lambda u, v: distance_func(u, v, inv_cov_matrix))
        else:
            predicted_label = nearest_neighbor_classify(test_feature, train_features, train_labels, distance_func)
        predicted_labels.append(predicted_label)
    accuracy = accuracy_score(test_labels, predicted_labels)
    return accuracy

In [20]:
for i in range(1, 5):
    bins = 4 * i
    print(f'bins = {bins}')
    train_features = [extract_features(image, bins) for image in train_data]
    test_features = [extract_features(image, bins) for image in test_data]

    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    test_features_scaled = scaler.transform(test_features)

    accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
    accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

    cov_matrix = np.cov(np.array(train_features_scaled).T)
    inv_cov_matrix = regularized_inverse(cov_matrix)
    accuracy_mahalanobis = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)
    
    scaler = MinMaxScaler()
    train_features_norm = scaler.fit_transform(train_features)
    test_features_norm = scaler.transform(test_features)
    accuracy_chi_squared = classify_and_evaluate(train_features_norm, train_labels, test_features_norm, test_labels, chi_squared_distance)

    print(f'Accuracy with Euclidean distance with shape histograms: {accuracy_euclidean}')
    print(f'Accuracy with Manhattan distance with shape histograms: {accuracy_manhattan}')
    print(f'Accuracy with Mahalanobis distance with shape histograms: {accuracy_mahalanobis}')
    print(f'Accuracy with Chi-squared distance with shape histograms: {accuracy_chi_squared}')

bins = 4
Accuracy with Euclidean distance with shape histograms: 0.25
Accuracy with Manhattan distance with shape histograms: 0.2542857142857143
Accuracy with Mahalanobis distance with shape histograms: 0.23857142857142857
Accuracy with Chi-squared distance with shape histograms: 0.24714285714285714
bins = 8
Accuracy with Euclidean distance with shape histograms: 0.4742857142857143
Accuracy with Manhattan distance with shape histograms: 0.4785714285714286
Accuracy with Mahalanobis distance with shape histograms: 0.4257142857142857
Accuracy with Chi-squared distance with shape histograms: 0.48857142857142855
bins = 12
Accuracy with Euclidean distance with shape histograms: 0.53
Accuracy with Manhattan distance with shape histograms: 0.56
Accuracy with Mahalanobis distance with shape histograms: 0.46285714285714286
Accuracy with Chi-squared distance with shape histograms: 0.5414285714285715
bins = 16
Accuracy with Euclidean distance with shape histograms: 0.49857142857142855
Accuracy wit

### Step 5: Moment Invariants

#### a. Hu's Moment

In [21]:
def extract_hu_moments(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    moments = cv2.moments(image)
    hu_moments = cv2.HuMoments(moments)
    epsilon = 1e-10
    hu_moments = -np.sign(hu_moments) * np.log10(np.abs(hu_moments) + epsilon)
    return hu_moments.flatten()

def extract_hu(image):
    hu_features = extract_hu_moments(image)
    return hu_features

train_features = [extract_hu(image) for image in train_data]
test_features = [extract_hu(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with Hu moment invariants: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with Hu moment invariants: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with Hu moment invariants: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with Hu moment invariants: {accuracy_chi_squared}')

Accuracy with Euclidean distance with Hu moment invariants: 0.45714285714285713
Accuracy with Manhattan distance with Hu moment invariants: 0.46
Accuracy with Mahalanobis distance with Hu moment invariants: 0.4657142857142857
Accuracy with Chi-squared distance with Hu moment invariants: 0.4542857142857143


#### b. Zernike's Moment

In [22]:
def extract_zernike_moments(image, radius, degree=8):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY)
    zernike_moments = mh.features.zernike_moments(binary_image, radius, degree=degree)
    return zernike_moments

def extract_zernike(image, zernike_radius):
    zernike_features = extract_zernike_moments(image, zernike_radius)
    return zernike_features

zernike_radius = 21

train_features = [extract_zernike(image, zernike_radius) for image in train_data]
test_features = [extract_zernike(image, zernike_radius) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with Zernike moment invariant: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with Zernike moment invariant: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with Zernike moment invariant: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with Zernike moment invariant: {accuracy_chi_squared}')

Accuracy with Euclidean distance with Zernike moment invariant: 0.12
Accuracy with Manhattan distance with Zernike moment invariant: 0.11285714285714285
Accuracy with Mahalanobis distance with Zernike moment invariant: 0.08142857142857143
Accuracy with Chi-squared distance with Zernike moment invariant: 0.11571428571428571


#### c. Geometric Moment

In [23]:
def extract_geometric_moments(image):
    moments = cv2.moments(image)
    return np.array([moments['m00'], moments['m01'], moments['m10'], moments['m11']])

def extract_geometric(image):
    geometric_features = extract_geometric_moments(image)
    return geometric_features

train_features = [extract_geometric(image) for image in train_data]
test_features = [extract_geometric(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with geometric moment invariant: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with geometric moment invariant: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with geometric moment invariant: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with geometric moment invariant: {accuracy_chi_squared}')

Accuracy with Euclidean distance with geometric moment invariant: 0.5242857142857142
Accuracy with Manhattan distance with geometric moment invariant: 0.5285714285714286
Accuracy with Mahalanobis distance with geometric moment invariant: 0.5471428571428572
Accuracy with Chi-squared distance with geometric moment invariant: 0.52


#### d. Flusser Moment

In [24]:
def extract_flusser_moments(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    moments = cv2.moments(image)
    flusser_moments = []
    if moments['mu02'] != 0:
        flusser_moments.append(moments['mu20'] / moments['mu02'])
    return np.array(flusser_moments)

def extract_flusser(image):
    flusser_features = extract_flusser_moments(image)
    return flusser_features

def classify_and_evaluate(train_features, train_labels, test_features, test_labels, distance_func, VI=None):
    predicted_labels = []
    for test_feature in test_features:
        closest_label = None
        min_distance = float('inf')
        for train_feature, label in zip(train_features, train_labels):
            if VI is not None:
                distance = distance_func(test_feature, train_feature, VI)
            else:
                distance = distance_func(test_feature, train_feature)
            if distance < min_distance:
                min_distance = distance
                closest_label = label
        predicted_labels.append(closest_label)
    return accuracy_score(test_labels, predicted_labels)

train_features = [extract_flusser(image) for image in train_data]
test_features = [extract_flusser(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)
"""
cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)
"""
scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with flusser moment invariant: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with flusser moment invariant: {accuracy_manhattan}')
#print(f'Accuracy with Mahalanobis distance with flusser moment invariant: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with flusser moment invariant: {accuracy_chi_squared}')

Accuracy with Euclidean distance with flusser moment invariant: 0.18857142857142858
Accuracy with Manhattan distance with flusser moment invariant: 0.18857142857142858
Accuracy with Chi-squared distance with flusser moment invariant: 0.18857142857142858


### Step 6: Combinations

#### a. Combination 1: Area, Perimeter, and Circularity

In [25]:
def extract_combined_features(image):
    area = calculate_area_descriptor(image)
    perimeter = calculate_perimeter_descriptor(image)
    circularity = calculate_circularity_descriptor(image)
    combined_features = np.array([area, perimeter, circularity])
    return combined_features

train_combined_features = [extract_combined_features(image) for image in train_data]
test_combined_features = [extract_combined_features(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_combined_features)
test_features_scaled = scaler.transform(test_combined_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled_chi = scaler.fit_transform(train_combined_features)
test_features_scaled_chi = scaler.transform(test_combined_features)
accuracy_chi_squared = classify_and_evaluate2(train_features_scaled_chi, train_labels, test_features_scaled_chi, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with combined features 1: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with combined features 1: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with combined features 1: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with combined features 1: {accuracy_chi_squared}')

Accuracy with Euclidean distance with combined features 1: 0.3314285714285714
Accuracy with Manhattan distance with combined features 1: 0.33285714285714285
Accuracy with Mahalanobis distance with combined features 1: 0.3242857142857143
Accuracy with Chi-squared distance with combined features 1: 0.32142857142857145


#### b. Combination 2: Area, Perimeter, Circularity, Convexity, Rectangularity, and Eccentricity

In [26]:
def extract_all_combined_features(image):
    area = calculate_area_descriptor(image)
    perimeter = calculate_perimeter_descriptor(image)
    convexity = calculate_convexity_descriptor(image)
    circularity = calculate_circularity_descriptor(image)
    rectangularity = calculate_rectangularity_descriptor(image)
    eccentricity = calculate_eccentricity_descriptor(image)

    combined_features = np.array([area, perimeter, convexity, circularity, rectangularity, eccentricity])
    return combined_features

train_combined_features = [extract_all_combined_features(image) for image in train_data]
test_combined_features = [extract_all_combined_features(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_combined_features)
test_features_scaled = scaler.transform(test_combined_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled_chi = scaler.fit_transform(train_combined_features)
test_features_scaled_chi = scaler.transform(test_combined_features)
accuracy_chi_squared = classify_and_evaluate2(train_features_scaled_chi, train_labels, test_features_scaled_chi, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with combined features 2: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with combined features 2 {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with combined features 2: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with combined features 2: {accuracy_chi_squared}')

Accuracy with Euclidean distance with combined features 2: 0.6728571428571428
Accuracy with Manhattan distance with combined features 2 0.6657142857142857
Accuracy with Mahalanobis distance with combined features 2: 0.6657142857142857
Accuracy with Chi-squared distance with combined features 2: 0.6442857142857142


#### c. Combination 3: Area, Perimeter, Circularity, Convexity, Rectangularity, and Eccentricity with Fourier Descriptor

In [27]:
def extract_all_combined_features(image, num_coeffs=10):
    area = calculate_area_descriptor(image)
    perimeter = calculate_perimeter_descriptor(image)
    convexity = calculate_convexity_descriptor(image)
    circularity = calculate_circularity_descriptor(image)
    rectangularity = calculate_rectangularity_descriptor(image)
    eccentricity = calculate_eccentricity_descriptor(image)

    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        fourier_features = fourier_descriptors(largest_contour, num_coeffs)
    else:
        fourier_features = np.zeros(num_coeffs)
    combined_features = np.concatenate(([area, perimeter, convexity, circularity, rectangularity, eccentricity], fourier_features))
    return combined_features

train_combined_features = [extract_all_combined_features(image, num_coeffs=10) for image in train_data]
test_combined_features = [extract_all_combined_features(image, num_coeffs=10) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_combined_features)
test_features_scaled = scaler.transform(test_combined_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled_chi = scaler.fit_transform(train_combined_features)
test_features_scaled_chi = scaler.transform(test_combined_features)
accuracy_chi_squared = classify_and_evaluate2(train_features_scaled_chi, train_labels, test_features_scaled_chi, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with combined features 3: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with combined features 3: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with combined features 3: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with combined features 3: {accuracy_chi_squared}')

Accuracy with Euclidean distance with combined features 3: 0.7471428571428571
Accuracy with Manhattan distance with combined features 3: 0.78
Accuracy with Mahalanobis distance with combined features 3: 0.6885714285714286
Accuracy with Chi-squared distance with combined features 3: 0.7328571428571429


#### d. Combination 4: Area, Perimeter, Circularity, Convexity, Rectangularity and Eccentricity with Shape Histogram

In [28]:
def extract_all_combined_features(image, bins=12):
    area = calculate_area_descriptor(image)
    perimeter = calculate_perimeter_descriptor(image)
    convexity = calculate_convexity_descriptor(image)
    circularity = calculate_circularity_descriptor(image)
    rectangularity = calculate_rectangularity_descriptor(image)
    eccentricity = calculate_eccentricity_descriptor(image)

    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        shape_histogram = calculate_shape_histogram(largest_contour, bins)
    else:
        shape_histogram = np.zeros(bins)
    combined_features = np.concatenate(([area, perimeter, convexity, circularity, rectangularity, eccentricity], shape_histogram))
    return combined_features

train_combined_features = [extract_all_combined_features(image, bins=12) for image in train_data]
test_combined_features = [extract_all_combined_features(image, bins=12) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_combined_features)
test_features_scaled = scaler.transform(test_combined_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled_chi = scaler.fit_transform(train_combined_features)
test_features_scaled_chi = scaler.transform(test_combined_features)
accuracy_chi_squared = classify_and_evaluate2(train_features_scaled_chi, train_labels, test_features_scaled_chi, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with with combined features 4: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with with combined features 4: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with with combined features 4: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with with combined features 4: {accuracy_chi_squared}')

Accuracy with Euclidean distance with with combined features 4: 0.7142857142857143
Accuracy with Manhattan distance with with combined features 4: 0.7742857142857142
Accuracy with Mahalanobis distance with with combined features 4: 0.7071428571428572
Accuracy with Chi-squared distance with with combined features 4: 0.7528571428571429


#### e. Combination 5: Area, Perimeter, Circularity, Convexity, Rectangularity and Eccentricity with Shape Histogram and Fourier Descriptor

In [29]:
def extract_all_combined_features(image, bins=12, num_coeffs=10):
    area = calculate_area_descriptor(image)
    perimeter = calculate_perimeter_descriptor(image)
    convexity = calculate_convexity_descriptor(image)
    circularity = calculate_circularity_descriptor(image)
    rectangularity = calculate_rectangularity_descriptor(image)
    eccentricity = calculate_eccentricity_descriptor(image)

    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        shape_histogram = calculate_shape_histogram(largest_contour, bins)
        fourier_features = fourier_descriptors(largest_contour, num_coeffs)
    else:
        shape_histogram = np.zeros(bins)
        fourier_features = np.zeros(num_coeffs)
    combined_features = np.concatenate(([area, perimeter, convexity, circularity, rectangularity, eccentricity], shape_histogram, fourier_features))
    return combined_features

train_combined_features = [extract_all_combined_features(image, bins=12, num_coeffs=10) for image in train_data]
test_combined_features = [extract_all_combined_features(image, bins=12, num_coeffs=10) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_combined_features)
test_features_scaled = scaler.transform(test_combined_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled_chi = scaler.fit_transform(train_combined_features)
test_features_scaled_chi = scaler.transform(test_combined_features)
accuracy_chi_squared = classify_and_evaluate2(train_features_scaled_chi, train_labels, test_features_scaled_chi, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with with combined features 5: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with with combined features 5: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with with combined features 5: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with with combined features 5: {accuracy_chi_squared}')


  return np.sqrt(m)


Accuracy with Euclidean distance with with combined features 5: 0.7971428571428572
Accuracy with Manhattan distance with with combined features 5: 0.8314285714285714
Accuracy with Mahalanobis distance with with combined features 5: 0.6585714285714286
Accuracy with Chi-squared distance with with combined features 5: 0.8128571428571428


#### f. Combination 6: Hu's Moment and Geometric Moment

In [30]:
def extract_combined_features_hu_geo(image):
    hu_features = extract_hu_moments(image)
    geometric_features = extract_geometric_moments(image)
    combined_features = np.hstack([
        hu_features,
        geometric_features,
    ])
    return combined_features

train_features = [extract_combined_features_hu_geo(image) for image in train_data]
test_features = [extract_combined_features_hu_geo(image) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with combined features 6: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with combined features 6: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with combined features 6: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with combined features 6: {accuracy_chi_squared}')

Accuracy with Euclidean distance with combined features 6: 0.5357142857142857
Accuracy with Manhattan distance with combined features 6: 0.57
Accuracy with Mahalanobis distance with combined features 6: 0.5642857142857143
Accuracy with Chi-squared distance with combined features 6: 0.5557142857142857


#### g. Combination 7: Hu's, Geometric, Zernike, and Flusser Moments

In [31]:
def extract_combined_features2(image, zernike_radius):
    hu_features = extract_hu_moments(image)
    zernike_features = extract_zernike_moments(image, zernike_radius)
    geometric_features = extract_geometric_moments(image)
    flusser_features = extract_flusser_moments(image)
    combined_features = np.hstack([
        hu_features,
        zernike_features,
        geometric_features,
        flusser_features
    ])
    return combined_features

zernike_radius = 21
train_features = [extract_combined_features2(image, zernike_radius) for image in train_data]
test_features = [extract_combined_features2(image, zernike_radius) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with combined features 7: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with combined features 7: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with combined features 7: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with combined features 7: {accuracy_chi_squared}')

Accuracy with Euclidean distance with combined features 7: 0.5157142857142857
Accuracy with Manhattan distance with combined features 7: 0.5242857142857142
Accuracy with Mahalanobis distance with combined features 7: 0.45285714285714285
Accuracy with Chi-squared distance with combined features 7: 0.4685714285714286


#### h. Combination 8: Area, Perimeter, Circularity, Convexity, Rectangularity, Eccentricity, Shape Histogram, Fourier Descriptor, Hu's Moment, and Geometric Moment

In [32]:
def extract_all_features(image, bins, num_coeffs):
    combined_features1 = extract_all_combined_features(image, bins=bins, num_coeffs=num_coeffs)
    combined_features2 = extract_combined_features_hu_geo(image)
    all_combined_features = np.concatenate((combined_features1, combined_features2))
    return all_combined_features

train_all_combined_features = [extract_all_features(image, bins=12, num_coeffs=10) for image in train_data]
test_all_combined_features = [extract_all_features(image, bins=12, num_coeffs=10) for image in test_data]

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_all_combined_features)
test_features_scaled = scaler.transform(test_all_combined_features)

accuracy_euclidean = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, euclidean)
accuracy_manhattan = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, cityblock)

cov_matrix = np.cov(train_features_scaled.T)
inv_cov_matrix = inv(cov_matrix)
accuracy_mahalanobis = classify_and_evaluate2(train_features_scaled, train_labels, test_features_scaled, test_labels, mahalanobis_distance, inv_cov_matrix)

scaler = MinMaxScaler()
train_features_scaled = scaler.fit_transform(train_all_combined_features)
test_features_scaled = scaler.transform(test_all_combined_features)
accuracy_chi_squared = classify_and_evaluate(train_features_scaled, train_labels, test_features_scaled, test_labels, chi_squared_distance)

print(f'Accuracy with Euclidean distance with combined features 8: {accuracy_euclidean}')
print(f'Accuracy with Manhattan distance with combined features 8: {accuracy_manhattan}')
print(f'Accuracy with Mahalanobis distance with combined features 8: {accuracy_mahalanobis}')
print(f'Accuracy with Chi-squared distance with combined features 8: {accuracy_chi_squared}')

  return np.sqrt(m)


Accuracy with Euclidean distance with combined features 8: 0.7857142857142857
Accuracy with Manhattan distance with combined features 8: 0.8514285714285714
Accuracy with Mahalanobis distance with combined features 8: 0.6942857142857143
Accuracy with Chi-squared distance with combined features 8: 0.7028571428571428
