In [3]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tqdm import tqdm

# Paths
train_dir = r"C:\Users\USER\OneDrive\Desktop\skillup\task_03\train"
test_dir = r"C:\Users\USER\OneDrive\Desktop\skillup\task_03\test1"

# Load pre-trained CNN (no top classifier, average pooling for features)
base_model = VGG16(weights="imagenet", include_top=False, pooling="avg")

# --- FEATURE EXTRACTION ---
def extract_features(img_path):
    img = load_img(img_path, target_size=(224, 224))
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = base_model.predict(x, verbose=0)
    return features.flatten()

# Prepare training data
X, y = [], []
for fname in tqdm(os.listdir(train_dir)):
    fpath = os.path.join(train_dir, fname)
    label = 1 if "dog" in fname else 0   # dog=1, cat=0
    feat = extract_features(fpath)
    X.append(feat)
    y.append(label)

X, y = np.array(X), np.array(y)

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train SVM
clf = SVC(kernel="linear", probability=True)
clf.fit(X_train, y_train)

# Validate
y_pred = clf.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, y_pred))

# --- PREDICT TEST DATA ---
results = {}
for fname in tqdm(os.listdir(test_dir)):
    fpath = os.path.join(test_dir, fname)
    feat = extract_features(fpath)
    pred = clf.predict([feat])[0]
    results[fname] = "Dog" if pred == 1 else "Cat"

# Example output
print(list(results.items())[:10])  # show first 10 predictions


  0%|          | 0/25000 [00:01<?, ?it/s]


KeyboardInterrupt: 