In [82]:
#Importing all necessary libraries

In [83]:
import os
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import time
from sklearn.model_selection import train_test_split

# Loading images in the df

In [84]:
def load_data(directory, num_cats, num_dogs):
    images = []
    labels = []
    
    # List all files in the directory
    all_files = os.listdir(directory)
    
    # Filter files based on 'cat' and 'dog'
    cat_files = sorted([file for file in all_files if 'cat' in file.lower()])
    dog_files = sorted([file for file in all_files if 'dog' in file.lower()])
    
    # Limit to the specified number of images
    cat_files = cat_files[:num_cats]
    dog_files = dog_files[:num_dogs]
    
    # Load cat images
    for file in cat_files:
        img_path = os.path.join(directory, file)
        img = cv2.imread(img_path)
        if img is None:
            print(f"Failed to load image: {img_path}")
            continue
        img = cv2.resize(img, (32, 32))  # Resize images to 32x32
        img = img.flatten()  # Flatten the image to a 1D array
        images.append(img)
        labels.append(0)  # Label for cats is 0
    
    # Load dog images
    for file in dog_files:
        img_path = os.path.join(directory, file)
        img = cv2.imread(img_path)
        if img is None:
            print(f"Failed to load image: {img_path}")
            continue
        img = cv2.resize(img, (32, 32))  # Resize images to 32x32
        img = img.flatten()  # Flatten the image to a 1D array
        images.append(img)
        labels.append(1)  # Label for dogs is 1
    
    return np.array(images), np.array(labels)

# loading training data in df with a limit

In [85]:
data_dir = "D:/Internship Tasks (Prodigy InfoTech)/Task 3/train"
X, y = load_data(data_dir, num_cats = 12500, num_dogs = 12500)
X = X / 255.0  # Normalize data

# Print the shape of first few labels

In [86]:
print("Shape of X:", X.shape)
print("First 10 labels in y:", y[:10])
print("Unique classes in y:", np.unique(y))

Shape of X: (25000, 3072)
First 10 labels in y: [0 0 0 0 0 0 0 0 0 0]
Unique classes in y: [0 1]


In [87]:
print("Unique classes in y_train:", np.unique(y_train))

Unique classes in y_train: [0 1]


# Splitting training and testing data

In [88]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=1000, test_size=100, stratify=y, random_state=42)

# Shape of the training/testing data

In [89]:
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Unique classes in y_train:", np.unique(y_train))
print("Unique classes in y_test:", np.unique(y_test))

Shape of X_train: (1000, 3072)
Shape of X_test: (100, 3072)
Unique classes in y_train: [0 1]
Unique classes in y_test: [0 1]


# Training the SVM classifier

In [90]:
svm = SVC(kernel='linear', C=1.0, random_state=42)
start_time = time.time()
svm.fit(X_train, y_train)
end_time = time.time()
print(f"Time to train the SVM: {end_time - start_time} seconds")

Time to train the SVM: 2.9249985218048096 seconds


# Ensure test data is not empty

In [91]:
if X_test.size == 0 or y_test.size == 0:
    raise ValueError("Test data is empty. Please check the test directory and ensure there are images available.")

# Measuring the time taken to predict

In [92]:
start_time = time.time()
y_pred = svm.predict(X_test)
end_time = time.time()
print(f"Time to predict using the SVM on limited test data: {end_time - start_time} seconds")

Time to predict using the SVM on limited test data: 0.2050001621246338 seconds


# Checking the accuracy of the model

In [93]:
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.5


In [94]:
print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.48      0.49        50
           1       0.50      0.52      0.51        50

    accuracy                           0.50       100
   macro avg       0.50      0.50      0.50       100
weighted avg       0.50      0.50      0.50       100

