In [None]:
# import cv2
# import numpy as np
# from sklearn.cluster import KMeans
# from sklearn.svm import SVC
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import accuracy_score
# from sklearn.preprocessing import StandardScaler
# import os

# # Function to load image dataset and corresponding labels
# def load_dataset(data_dir):
#     X = []
#     y = []
#     class_labels = os.listdir(data_dir)
    
#     for label, class_name in enumerate(class_labels):
#         class_dir = os.path.join(data_dir, class_name)
        
#         for image_file in os.listdir(class_dir):
#             image_path = os.path.join(class_dir, image_file)
#             image = cv2.imread(image_path)
#             X.append(image)
#             y.append(label)
    
#     return np.array(X), np.array(y)

# # Function to preprocess images (resize, normalize, convert to grayscale)
# def preprocess_images(images, img_size, grayscale=True):
#     processed_images = []
    
#     for image in images:
#         if grayscale:
#             image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#         if img_size is not None:
#             image = cv2.resize(image, img_size)
#         # You can add more preprocessing steps here (e.g., normalization)
#         processed_images.append(image)
    
#     return np.array(processed_images)

# # Function to extract local features from preprocessed images
# def extract_local_features(images, feature_extractor):
#     features = []
    
#     for image in images:
#         kp, des = feature_extractor.detectAndCompute(image, None)
#         if des is not None:
#             features.append(des)
    
#     return np.vstack(features)

# # Function to construct visual vocabulary using KMeans
# def construct_visual_vocabulary(features, num_clusters):
#     kmeans = KMeans(n_clusters=num_clusters)
#     kmeans.fit(features)
#     return kmeans

# # Function to quantize local features based on visual vocabulary
# def quantize_features(features, vocabulary):
#     labels = vocabulary.predict(features)
#     return labels

# # Function to encode quantized features
# def encode_features(labels, num_clusters):
#     encoded_features = []
#     for label in labels:
#         histogram, _ = np.histogram(label, bins=range(num_clusters + 1))
#         encoded_features.append(histogram)
#     return np.array(encoded_features)

# # Function to split dataset into training and testing sets
# def split_dataset(X, y, test_size=0.2, random_state=42):
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
#     return X_train, X_test, y_train, y_test

# # Function to train classification model on training set using encoded features
# def train_classification_model(X_train, y_train):
#     clf = SVC()
#     clf.fit(X_train, y_train)
#     return clf

# # Function to evaluate trained model on testing set and calculate accuracy
# def evaluate_model(model, X_test, y_test):
#     y_pred = model.predict(X_test)
#     accuracy = accuracy_score(y_test, y_pred)
#     return accuracy

# # Main function
# def main():
#     data_dir = 'data/test'
#     img_size = (128, 128)  # Resize images to a common size (adjust as needed)
#     num_clusters = 100  # Adjust as needed
#     grayscale = True  # Set to True if you want to convert images to grayscale

#     X, y = load_dataset(data_dir)
#     X = preprocess_images(X, img_size, grayscale)
#     feature_extractor = cv2.SIFT_create()
#     features = extract_local_features(X, feature_extractor)
#     vocabulary = construct_visual_vocabulary(features, num_clusters)
#     labels = quantize_features(features, vocabulary)
#     encoded_features = encode_features(labels, num_clusters)
#     X_train, X_test, y_train, y_test = split_dataset(encoded_features, y)
#     model = train_classification_model(X_train, y_train)
#     accuracy = evaluate_model(model, X_test, y_test)
    
#     print(f'Accuracy with Bag of Features: {accuracy}')

# if __name__ == '__main__':
#     main()

# # Analyze and interpret results


import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn import datasets
import os

# Function to load image dataset and corresponding labels

# def load_dataset(data_dir):
#     X = []
#     y = []
#     class_labels = os.listdir(data_dir)
    
#     for label, class_name in enumerate(class_labels):
#         class_dir = os.path.join(data_dir, class_name)
        
#         for image_file in os.listdir(class_dir):
#             image_path = os.path.join(class_dir, image_file)
#             image = cv2.imread(image_path)
#             X.append(image)
#             y.append(label)
    
#     return np.array(X), np.array(y)

def load_dataset(dataset_path):
    images = []
    labels = []
    for subdir in os.listdir(dataset_path):
        subdir_path = os.path.join(dataset_path, subdir)
        for filename in os.listdir(subdir_path):
            image_path = os.path.join(subdir_path, filename)
            # Load and preprocess the image (example using OpenCV) 
            image = cv2.imread(image_path)
            if image is None:
                print("Failed to load the image.")
            else:
                # Check if the image has valid dimensions
                if not image.size == (0, 0):
                    # Resize the image
                    image = cv2.resize(image, (64, 64))
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Convert to grayscale
                    # Append the image and label to the lists 
                    images.append(image)
                    labels.append(subdir) # Assuming directory name represents the class label
                else:
                    print("The image has empty dimensions.")
    return images, labels

# Function to preprocess images (resize, normalize, convert to grayscale)
def preprocess_images(images, img_size, grayscale=True):
    processed_images = []

    for image in images:
        if img_size is not None:
            image = cv2.resize(image, img_size)

        if grayscale:
            if len(image.shape) == 3 and image.shape[2] == 3:
                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            else:
                print("Warning: The image is not in BGR format; conversion to grayscale may not be necessary.")
        
        processed_images.append(image)

    return np.array(processed_images)

# Function to extract local features from preprocessed images
def extract_local_features(images, feature_extractor):
    features = []
    
    for image in images:
        kp, des = feature_extractor.detectAndCompute(image, None)
        if des is not None and len(des) > 0:
            features.append(des)
    
    if len(features) > 0:
        return np.vstack(features)
    else:
        return None  # Return None if no features were extracted

# Function to construct visual vocabulary using KMeans
def construct_visual_vocabulary(features, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(features)
    return kmeans

# Function to quantize local features based on visual vocabulary
def quantize_features(features, vocabulary):
    labels = vocabulary.predict(features)
    return labels

# Function to encode quantized features
def encode_features(labels, num_clusters):
    encoded_features = []
    for label in labels:
        histogram, _ = np.histogram(label, bins=range(num_clusters + 1))
        encoded_features.append(histogram)
    return np.array(encoded_features)

# Function to split dataset into training and testing sets
def split_dataset(X, y, test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

# Function to train classification model on training set using encoded features
def train_classification_model(X_train, y_train):
    clf = SVC()
    clf.fit(X_train, y_train)
    return clf

# Function to evaluate trained model on testing set and calculate accuracy
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

