# 🧪 Hotdog/Not Hotdog Classifier (Baseline HOG + Logistic Regression)

In [1]:
import os
import cv2
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from skimage.feature import hog
import matplotlib.pyplot as plt

# Config
DATA_DIR = "data"
TRAIN_HOTDOG = os.path.join(DATA_DIR, "train", "hot_dog")
TRAIN_NOT = os.path.join(DATA_DIR, "train", "not_hot_dog")
TEST_HOTDOG = os.path.join(DATA_DIR, "test", "hot_dog")
TEST_NOT = os.path.join(DATA_DIR, "test", "not_hot_dog")
IMAGE_SIZE = (128, 128)
HOG_PARAMS = {"orientations": 9, "pixels_per_cell": (8, 8), "cells_per_block": (2, 2)}


In [2]:
def load_images(folder, label):
    features, labels = [], []
    for fname in os.listdir(folder):
        path = os.path.join(folder, fname)
        img = cv2.imread(path)
        if img is None:
            continue
        img = cv2.resize(img, IMAGE_SIZE)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        feature = hog(gray, **HOG_PARAMS)
        features.append(feature)
        labels.append(label)
    return np.array(features), np.array(labels)


In [3]:
X_train_hot, y_train_hot = load_images(TRAIN_HOTDOG, 1)
X_train_not, y_train_not = load_images(TRAIN_NOT, 0)
X_test_hot, y_test_hot = load_images(TEST_HOTDOG, 1)
X_test_not, y_test_not = load_images(TEST_NOT, 0)

X_train = np.concatenate([X_train_hot, X_train_not])
y_train = np.concatenate([y_train_hot, y_train_not])
X_test = np.concatenate([X_test_hot, X_test_not])
y_test = np.concatenate([y_test_hot, y_test_not])


In [4]:
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
preds = clf.predict(X_test)
accuracy = accuracy_score(y_test, preds)
print(f"Test accuracy: {accuracy:.4f}")


Test accuracy: 0.5960


In [5]:
import joblib

clf.fit(X_train, y_train)
joblib.dump(clf, "logreg_model.pkl")


['logreg_model.pkl']