In [15]:
import cv2
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from utils.elpv_reader import load_dataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline


In [16]:
# Load the dataset and print some information
images, proba, types = load_dataset()
print(images.shape)
print(proba)
print(types)

# Step 1: Classify images into "mono" and "poly" based on "types"
# You can create two separate lists for "mono" and "poly" images
mono_images = []
poly_images = []
poly_proba=[]
mono_proba=[]


for i in range(len(images)):
    if types[i] == "mono":
        mono_images.append(images[i])
        mono_proba.append((proba[i]))
    elif types[i] == "poly":
        poly_images.append(images[i])
        poly_proba.append((proba[i]))

# Convert lists to NumPy arrays
mono_images = np.array(mono_images)
poly_images = np.array(poly_images)

# Step 2: Split the dataset into train and test sets for both "mono" and "poly" images
# Adjust the test_size and random_state as needed
x_mono_train, x_mono_test, y_mono_train, y_mono_test = train_test_split(mono_images, mono_proba, test_size=0.25, random_state=42)
x_poly_train, x_poly_test, y_poly_train, y_poly_test = train_test_split(poly_images, poly_proba, test_size=0.25, random_state=42)

y_mono_train =np.array(y_mono_train)
y_poly_train=np.array(y_poly_train)


# Step 3: Convert each image to pixel histograms (histograms can be shared)
his_train_mono = []
his_test_mono = []
his_train_poly = []
his_test_poly = []

for image in x_mono_train:
    hist = cv2.calcHist([image], [0], None, [256], [0.0, 255.0])
    his_train_mono.append(((hist / 255).flatten()))

for image in x_mono_test:
    hist = cv2.calcHist([image], [0], None, [256], [0.0, 255.0])
    his_test_mono.append(((hist / 255).flatten()))

for image in x_poly_train:
    hist = cv2.calcHist([image], [0], None, [256], [0.0, 255.0])
    his_train_poly.append(((hist / 255).flatten()))

for image in x_poly_test:
    hist = cv2.calcHist([image], [0], None, [256], [0.0, 255.0])
    his_test_poly.append(((hist / 255).flatten()))

# Step 4: Preprocess, train, and evaluate SVM models separately for "mono" and "poly" images
his_train_mono = np.array(his_train_mono)
his_test_mono = np.array(his_test_mono)

his_train_poly = np.array(his_train_poly)
his_test_poly = np.array(his_test_poly)

# Create MinMaxScaler objects separately for "mono" and "poly" images
scaler_mono = MinMaxScaler()
scaler_poly = MinMaxScaler()

# Fit the scalers to the training data and transform it
his_train_normalized_mono = scaler_mono.fit_transform(his_train_mono)
his_test_normalized_mono = scaler_mono.transform(his_test_mono)

his_train_normalized_poly = scaler_poly.fit_transform(his_train_poly)
his_test_normalized_poly = scaler_poly.transform(his_test_poly)

# Create separate PCA and SVM models for "mono" and "poly" images
pipeline_mono = Pipeline([
    ('pca', PCA()),
    ('svm', SVC(kernel="linear"))
])

pipeline_poly = Pipeline([
    ('pca', PCA()),
    ('svm', SVC(kernel="linear"))
])

# Define a range of PCA components to try (can adjust separately)
param_grid = {
    'pca__n_components': range(1, his_train_normalized_mono.shape[1] + 1)
}

# Create GridSearchCV objects separately for "mono" and "poly" images
grid_search_mono = GridSearchCV(pipeline_mono, param_grid, cv=5, scoring='accuracy')
grid_search_poly = GridSearchCV(pipeline_poly, param_grid, cv=5, scoring='accuracy')

# Fit the grid searches to the normalized training data
grid_search_mono.fit(his_train_normalized_mono, y_mono_train.astype("str"))
grid_search_poly.fit(his_train_normalized_poly, y_poly_train.astype("str"))

# Get the best number of PCA components and the best scores separately for "mono" and "poly" images
best_n_components_mono = grid_search_mono.best_estimator_.named_steps['pca'].n_components
best_score_mono = grid_search_mono.best_score_
print(f"Best number of PCA components for 'mono': {best_n_components_mono}")
print(f"Best cross-validation accuracy for 'mono': {best_score_mono}")

best_n_components_poly = grid_search_poly.best_estimator_.named_steps['pca'].n_components
best_score_poly = grid_search_poly.best_score_
print(f"Best number of PCA components for 'poly': {best_n_components_poly}")
print(f"Best cross-validation accuracy for 'poly': {best_score_poly}")

# Use the best models found by the grid searches to make predictions on the test sets
best_model_mono = grid_search_mono.best_estimator_
predict_labels_mono = best_model_mono.predict(his_test_normalized_mono)

best_model_poly = grid_search_poly.best_estimator_
predict_labels_poly = best_model_poly.predict(his_test_normalized_poly)

# Print the classification reports for the test sets of "mono" and "poly" images
print("Classification report for 'mono' images:")
print(metrics.classification_report(y_mono_test.astype("str"), predict_labels_mono))

print("Classification report for 'poly' images:")
print(metrics.classification_report(y_poly_test.astype("str"), predict_labels_poly))


(2624, 300, 300)
[1. 1. 1. ... 0. 0. 0.]
['mono' 'mono' 'mono' ... 'poly' 'poly' 'poly']
Best number of PCA components for 'mono': 21
Best cross-validation accuracy for 'mono': 0.6857142857142857
Best number of PCA components for 'poly': 9
Best cross-validation accuracy for 'poly': 0.6600377386414088
Classification report for 'mono' images:


AttributeError: 'list' object has no attribute 'astype'