In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [3]:
base_dir = r'C:\Users\gowth\OneDrive\Desktop\ProdigyInfotech\PRODIGY_ML_03'
train_dir = os.path.join(base_dir, 'train')

In [4]:
if not os.path.exists(train_dir):
    raise FileNotFoundError(f"Train directory not found: {train_dir}")

def load_data(directory):
    images = []
    labels = []
    for filename in os.listdir(directory):
        img_path = os.path.join(directory, filename)
        if not os.path.isfile(img_path):
            continue
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue
        img = cv2.resize(img, (32, 32))
        images.append(img.flatten())  
        label = 'cat' if 'cat' in filename else 'dog'
        labels.append(label)
    return np.array(images), np.array(labels)

try:
    x, y = load_data(train_dir)
    print(f"x shape: {x.shape}")
    print(f"y shape: {y.shape}")
except FileNotFoundError as e:
    print(e)
    exit()

x shape: (25000, 1024)
y shape: (25000,)


In [5]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.5, random_state=42)


In [6]:
from sklearn.decomposition import PCA

# Apply PCA to reduce dimensionality
pca = PCA(n_components=100)  # Adjust n_components as needed
x_train_pca = pca.fit_transform(x_train)
x_test_pca = pca.transform(x_test)




In [7]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest classifier
clf = RandomForestClassifier(n_jobs=-1)
clf.fit(x_train_pca, y_train)

# Predict on the test set
y_pred = clf.predict(x_test_pca)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.61


In [8]:
'''clf = SVC(kernel='linear', C=1)

# Train the model
clf.fit(x_train_pca, y_train)

# Predict on the test set
y_pred = clf.predict(x_test_pca)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")'''