In [2]:
import os
import cv2
import zipfile
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [5]:
%pip install kaggle

def download_dataset():
    from kaggle.api.kaggle_api_extended import KaggleApi

    api = KaggleApi()
    api.authenticate()

    print("[INFO] Downloading dataset from Kaggle...")
    api.competition_download_file('dogs-vs-cats', 'train.zip', path='data')

    with zipfile.ZipFile('data/train.zip', 'r') as zip_ref:
        print("[INFO] Extracting images...")
        zip_ref.extractall('data/train')
    os.remove('data/train.zip')


Note: you may need to restart the kernel to use updated packages.


In [6]:
def load_images(image_dir, img_size=64, limit=2000):
    data = []
    labels = []

    files = os.listdir(image_dir)
    files = [f for f in files if f.endswith('.jpg')][:limit]

    for file in tqdm(files):
        label = 0 if 'cat' in file else 1
        img_path = os.path.join(image_dir, file)
        try:
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (img_size, img_size))
            data.append(img.flatten())
            labels.append(label)
        except Exception as e:
            print(f"Error: {file} -> {e}")

    return np.array(data), np.array(labels)


In [7]:
def train_svm(X, y):
    print("[INFO] Splitting and scaling data...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    print("[INFO] Training SVM classifier...")
    clf = SVC(kernel='linear')
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    print(f"\nAccuracy: {accuracy_score(y_test, y_pred)*100:.2f}%")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


In [None]:
if __name__ == "__main__":
    data_dir = 'data/train/train'

    if not os.path.exists(data_dir):
        # Upload kaggle.json and move it to the correct location
        from pathlib import Path

        kaggle_dir = Path.home() / ".kaggle"
        kaggle_dir.mkdir(exist_ok=True)
        import shutil

        from IPython.display import display
        import ipywidgets as widgets

        uploader = widgets.FileUpload(accept='.json', multiple=False)
        display(uploader)

        print("Please upload your kaggle.json file using the widget above.")

        # Wait for the user to upload the file
        import time
        while len(uploader.value) == 0:
            time.sleep(1)

        for filename, fileinfo in uploader.value.items():
            with open(kaggle_dir / "kaggle.json", "wb") as f:
                f.write(fileinfo['content'])
        os.chmod(kaggle_dir / "kaggle.json", 0o600)

        download_dataset()

    print("[INFO] Loading image data...")
    X, y = load_images(data_dir, img_size=64, limit=2000)

    print("[INFO] Training model...")
    train_svm(X, y)