In [3]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define paths
train_dir = r'F:/prodigy/PRODIGY_ML_03/train'
test_dir = r'F:/prodigy/PRODIGY_ML_03/test'

# Function to load images and labels based on filename patterns
def load_images_and_labels(directory):
    images = []
    labels = []

    for filename in os.listdir(directory):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            img_path = os.path.join(directory, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (64, 64))  # Resize to 64x64
            images.append(img.flatten())
            
            # Infer label from filename
            if 'cat' in filename:
                labels.append(0)
            elif 'dog' in filename:
                labels.append(1)

    return np.array(images), np.array(labels)

# Load training data
X_train, y_train = load_images_and_labels(train_dir)

# Split training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Print sizes of training, validation, and test sets
print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")

# You can optionally create a test set from the remaining training data
# Here, we use 20% of the original training data for testing
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Print sizes of training, validation, and test sets after the second split
print(f"New Training set size: {len(X_train)}")
print(f"New Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")


Training set size: 1174
Validation set size: 294
New Training set size: 939
New Validation set size: 294
Test set size: 235


In [4]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Initialize SVM classifier
svm_model = SVC(kernel='linear', random_state=42)

# Train the SVM model
svm_model.fit(X_train, y_train)

# Predict on the validation set
y_val_pred = svm_model.predict(X_val)

# Calculate accuracy on the validation set
val_acc = accuracy_score(y_val, y_val_pred)
print(f'Validation Accuracy: {val_acc}')


Validation Accuracy: 0.7585034013605442


In [5]:
# Predict on the test set
y_test_pred = svm_model.predict(X_test)

# Calculate accuracy on the test set
test_acc = accuracy_score(y_test, y_test_pred)
print(f'Test Accuracy: {test_acc}')


Test Accuracy: 0.7957446808510639


In [6]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.001, 0.01, 0.1],
    'kernel': ['linear', 'rbf']
}

svm_model = SVC()

grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=3)
grid_search.fit(X_train, y_train)

print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
test_acc = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {test_acc}')

Best parameters found:  {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}
Best cross-validation score: 0.83
Test Accuracy: 0.8553191489361702
