In [8]:
import cv2
import numpy as np
import os
import random
from glob import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score , make_scorer, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score, cross_validate

## Define Functions for Loading and Processing Images

In [2]:
def load_and_process_images(directory, label, count=1000, image_size=(64, 64)):
    # Create a glob pattern to match all jpg images in the directory
    glob_pattern = os.path.join(directory, f"{label}.*.jpg")
    
    # Get all matching file paths
    all_files = glob(glob_pattern)
    random.shuffle(all_files)  # Shuffle to randomize which files are selected
    
    # Select a subset of files (up to the specified count)
    selected_files = all_files[:count]
    
    # Initialize lists to store image data and labels
    images = []
    labels = []
    
    # Process each selected file
    for file_path in selected_files:
        # Read an image with OpenCV
        img = cv2.imread(file_path, cv2.IMREAD_COLOR)  # Read in color mode
        if img is None:
            print(f"Image not loaded properly: {file_path}")
        else:
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert the image to grayscale
            img_resized = cv2.resize(img_gray, image_size)  # Resize the image
            img_normalized = img_resized / 255.0  # Normalize the pixel values
            img_flattened = img_normalized.flatten()  # Flatten the image
            images.append(img_flattened)
            labels.append(1 if label == 'cat' else 0)  # Assuming 'cat' is 1, 'dog' is 0
    
    return np.array(images), np.array(labels)


##  Specify the Image Directories and Load Images 

In [12]:
# Define the paths to the directories containing the images
cat_images_path = './train'
dog_images_path = './train'

# Load and process the images
cats, cat_labels = load_and_process_images(cat_images_path, 'cat', count=10000)
dogs, dog_labels = load_and_process_images(dog_images_path, 'dog', count=10000)

# Combine the datasets
X = np.concatenate((cats, dogs), axis=0)
y = np.concatenate((cat_labels, dog_labels), axis=0)


In [13]:
# Assuming X and y are your image data and labels respectively
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, shuffle=True)


## Training the KNN with k=5

In [14]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

KNN model trained successfully.


In [16]:
# Compute the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Generate a classification report
report = classification_report(y_test, y_pred, target_names=['Cat', 'Dog'])

print("Accuracy:", accuracy)
print("Classification Report:\n", report)


Accuracy: 0.55525
Classification Report:
               precision    recall  f1-score   support

         Cat       0.58      0.38      0.46      1981
         Dog       0.54      0.72      0.62      2019

    accuracy                           0.56      4000
   macro avg       0.56      0.55      0.54      4000
weighted avg       0.56      0.56      0.54      4000



## Cross Validation

In [17]:
knn = KNeighborsClassifier(n_neighbors=5)

In [18]:
scoring = {
    'accuracy': 'accuracy',
    'precision': make_scorer(precision_score, average='macro'),
    'recall': make_scorer(recall_score, average='macro'),
    'f1': make_scorer(f1_score, average='macro')
}


In [19]:
results = cross_validate(knn, X, y, cv=10, scoring=scoring)

In [20]:
print("Average Accuracy: %0.2f (+/- %0.2f)" % (results['test_accuracy'].mean(), results['test_accuracy'].std() * 2))
print("Average Precision: %0.2f (+/- %0.2f)" % (results['test_precision'].mean(), results['test_precision'].std() * 2))
print("Average Recall: %0.2f (+/- %0.2f)" % (results['test_recall'].mean(), results['test_recall'].std() * 2))
print("Average F1-score: %0.2f (+/- %0.2f)" % (results['test_f1'].mean(), results['test_f1'].std() * 2))

Average Accuracy: 0.56 (+/- 0.02)
Average Precision: 0.57 (+/- 0.02)
Average Recall: 0.56 (+/- 0.02)
Average F1-score: 0.54 (+/- 0.02)
