# <u> Machine Learning Internship at Prodigy InfoTech - Classification Cats & Dogs Using SVM  (Tsak 3)</u>

### <i>The goal of this project is to classify images of dogs using a pre-trained Support Vector Machine (SVM) model. The SVM model has been previously trained on a dataset of images containing dogs and cats.</i>

# import required libraries

In [148]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from skimage.transform import resize 
from skimage.io import imread 
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import StratifiedKFold



# Laoding and Preprocessing Train Data

In [149]:
def load_images_from_folder(folder, target_size=(50, 50)):
    images = []
    labels = []
    for idx, class_folder in enumerate(os.listdir(folder)):
        class_path = os.path.join(folder, class_folder)
        for filename in os.listdir(class_path):
            img_path = os.path.join(class_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img_resized = cv2.resize(img, target_size)
                img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
                img_flattened = img_gray.flatten()
                images.append(img_flattened)
                labels.append(idx)  
    return images, labels


In [150]:

dataset_path = r'C:\Users\engyo\OneDrive\Documents\python apps\train'

images, labels = load_images_from_folder(dataset_path)

# Convert lists to NumPy arrays
images = np.array(images)
labels = np.array(labels)


# Split Data into train and test data

In [151]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)


# <u> Applying support vector classifier :- </u>

In [152]:
# Initialize SVC with some default parameters
svc = SVC()

# Define parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': [1, 0.1, 0.01, 0.001],  # Kernel coefficient
    'kernel': ['rbf', 'linear'],  # Kernel methods
    'class_weight': [None, 'balanced'],  
    'shrinking': [True, False]  
}

# Initialize GridSearchCV with StratifiedKFold for better validation
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=stratified_kfold, verbose=2, n_jobs=-1)

# Perform grid search to find the best parameters

In [153]:
grid_search.fit(X_train, y_train)

# Print the best parameters and best score found by GridSearchCV
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-validation Accuracy:", grid_search.best_score_)

Fitting 5 folds for each of 128 candidates, totalling 640 fits
Best Parameters: {'C': 0.1, 'class_weight': None, 'gamma': 1, 'kernel': 'linear', 'shrinking': True}
Best Cross-validation Accuracy: 0.5415730337078651


# Predict on test data with best model

In [154]:

best_svm = grid_search.best_estimator_
y_pred = best_svm.predict(X_test)

# Calculate Accuracy

In [155]:

accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)


Test Accuracy: 0.5625


# Print Classification Report

In [156]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.63      0.58      0.60        64
           1       0.49      0.54      0.51        48

    accuracy                           0.56       112
   macro avg       0.56      0.56      0.56       112
weighted avg       0.57      0.56      0.56       112



# Load Test Data

In [158]:
def load_images_from_folder(folder, target_size=(50, 50)):
    images = []
    labels = []
    for idx, class_folder in enumerate(os.listdir(folder)):
        class_path = os.path.join(folder, class_folder)
        for filename in os.listdir(class_path):
            img_path = os.path.join(class_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img_resized = cv2.resize(img, target_size)
                img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
                img_flattened = img_gray.flatten()
                images.append(img_flattened)
                labels.append(idx)  # Assuming folder names correspond to class labels
    return images, labels

In [159]:
# Provide path to your test dataset directory
test_dataset_path = r'C:\Users\engyo\OneDrive\Documents\python apps\test'

test_images, test_labels = load_images_from_folder(test_dataset_path)

# Convert lists to NumPy arrays
test_images = np.array(test_images)
test_labels = np.array(test_labels)

# Identifing Classes

In [None]:
# Define class names corresponding to your dataset
classes = ['cat','dog']

# train model 

In [160]:
best_svm = grid_search.best_estimator_  


# Test data

In [161]:
# Predict on test data
test_predictions = best_svm.predict(test_images)

# Predictions

In [163]:
test_predictions

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 1, 0, 1])