In [5]:
import os
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib

IMG_SIZE = 128
DATASET_PATH = r"D:\Project\Datasets"   # <-- your dataset path
MODELS_PATH = r"D:\Project\Models"     # <-- your models path
os.makedirs(MODELS_PATH, exist_ok=True)

# 1. Load dataset
def load_data(flatten=True):
    X, y = [], []
    classes = [d for d in os.listdir(DATASET_PATH) if os.path.isdir(os.path.join(DATASET_PATH, d))]
    classes.sort()
    class_map = {cls: i for i, cls in enumerate(classes)}

    for cls in classes:
        folder = os.path.join(DATASET_PATH, cls)
        if len(os.listdir(folder)) < 2:
            print(f"Skipping {cls}: Not enough images")
            continue
        for f in os.listdir(folder):
            try:
                img = Image.open(os.path.join(folder, f)).convert("RGB")
                img = img.resize((IMG_SIZE, IMG_SIZE))
                img = np.array(img, dtype=np.float32) / 255.0
                if flatten: img = img.reshape(-1)
                X.append(img)
                y.append(class_map[cls])
            except Exception as e:
                print("Skipping", f, e)
    return np.stack(X), np.array(y), class_map

# 2. Train model
def train_model():
    X, y, class_map = load_data()
    if len(X) == 0:
        print("No data to train!")
        return

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)

    # Accuracy
    acc = model.score(X_test, y_test)
    print(f"Training done! Accuracy on test set: {acc*100:.2f}%")

    # Predictions
    y_pred = model.predict(X_test)

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)

    # Classification Report
    cr = classification_report(y_test, y_pred, target_names=list(class_map.keys()))
    print("Classification Report:")
    print(cr)

    # Save model
    joblib.dump({"model": model, "class_map": class_map}, os.path.join(MODELS_PATH, "logistic_model.joblib"))
    print(f"Model saved at {MODELS_PATH}\\logistic_model.joblib")

if __name__ == "__main__":
    train_model()


Training done! Accuracy on test set: 56.36%
Confusion Matrix:
[[72 21  9  3]
 [21 58  9 10]
 [ 6  2 65 44]
 [ 2  6 35 22]]
Classification Report:
              precision    recall  f1-score   support

        Cats       0.71      0.69      0.70       105
        Dogs       0.67      0.59      0.63        98
      Humans       0.55      0.56      0.55       117
  Not-Humans       0.28      0.34      0.31        65

    accuracy                           0.56       385
   macro avg       0.55      0.54      0.55       385
weighted avg       0.58      0.56      0.57       385

Model saved at D:\Project\Models\logistic_model.joblib


In [2]:
# backend/train_models.py
import os
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

IMG_SIZE = 128
DATASET_PATH = r"D:\Project\Datasets"
MODELS_PATH = r"D:\Project\Models"
os.makedirs(MODELS_PATH, exist_ok=True)

def load_data(flatten=True):
    X, y = [], []
    classes = [d for d in os.listdir(DATASET_PATH) if os.path.isdir(os.path.join(DATASET_PATH, d))]
    classes.sort()
    class_map = {cls: i for i, cls in enumerate(classes)}

    for cls in classes:
        folder = os.path.join(DATASET_PATH, cls)
        if len(os.listdir(folder)) < 2:
            print(f"Skipping {cls}: Not enough images")
            continue
        for f in os.listdir(folder):
            try:
                img = Image.open(os.path.join(folder, f)).convert("RGB")
                img = img.resize((IMG_SIZE, IMG_SIZE))
                img = np.array(img, dtype=np.float32) / 255.0
                if flatten:
                    img = img.reshape(-1)
                X.append(img)
                y.append(class_map[cls])
            except Exception as e:
                print("Skipping", f, e)
    return np.stack(X), np.array(y), class_map

def train_logistic(X_train, y_train):
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    return model

def train_random_forest(X_train, y_train):
    model = RandomForestClassifier(n_estimators=200)
    model.fit(X_train, y_train)
    return model

def train_cnn(X_train, y_train, X_test, y_test, num_classes):
    # Reshape and one-hot encode
    X_train = X_train.reshape(-1, IMG_SIZE, IMG_SIZE, 3)
    X_test = X_test.reshape(-1, IMG_SIZE, IMG_SIZE, 3)
    y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes)
    y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes)
    
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
        MaxPooling2D(2,2),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(2,2),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train_cat, epochs=10, batch_size=32, verbose=1)

    # Evaluate
    loss, acc = model.evaluate(X_test, y_test_cat, verbose=0)
    print(f"CNN Accuracy: {acc:.4f}")

    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    print("CNN Confusion Matrix:\n", confusion_matrix(y_test, y_pred_classes))
    print("CNN Classification Report:\n", classification_report(y_test, y_pred_classes))

    return model

def train_models():
    X, y, class_map = load_data(flatten=False)
    if len(X) == 0: 
        print("No data to train.")
        return

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train_flat = X_train.reshape(len(X_train), -1)
    X_test_flat = X_test.reshape(len(X_test), -1)

    # Logistic Regression
    lr_model = train_logistic(X_train_flat, y_train)
    y_pred_lr = lr_model.predict(X_test_flat)
    print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
    print("Logistic Regression Confusion Matrix:\n", confusion_matrix(y_test, y_pred_lr))
    print("Logistic Regression Classification Report:\n", classification_report(y_test, y_pred_lr))
    joblib.dump({"model": lr_model, "class_map": class_map}, os.path.join(MODELS_PATH, "logistic_model.joblib"))

    # Random Forest
    rf_model = train_random_forest(X_train_flat, y_train)
    y_pred_rf = rf_model.predict(X_test_flat)
    print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
    print("Random Forest Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
    print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))
    joblib.dump({"model": rf_model, "class_map": class_map}, os.path.join(MODELS_PATH, "rf_model.joblib"))

    # CNN
    cnn_model = train_cnn(X_train, y_train, X_test, y_test, len(class_map))
    cnn_model.save(os.path.join(MODELS_PATH, "cnn_model.h5"))

    print("All models trained, evaluated, and saved!")

if __name__ == "__main__":
    train_models()


Logistic Regression Accuracy: 0.5636363636363636
Logistic Regression Confusion Matrix:
 [[72 21  9  3]
 [21 58  9 10]
 [ 6  2 65 44]
 [ 2  6 35 22]]
Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.69      0.70       105
           1       0.67      0.59      0.63        98
           2       0.55      0.56      0.55       117
           3       0.28      0.34      0.31        65

    accuracy                           0.56       385
   macro avg       0.55      0.54      0.55       385
weighted avg       0.58      0.56      0.57       385

Random Forest Accuracy: 0.7220779220779221
Random Forest Confusion Matrix:
 [[91  7  3  4]
 [11 83  1  3]
 [ 0  5 87 25]
 [ 2  3 43 17]]
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.87      0.87       105
           1       0.85      0.85      0.85        98
           2       0.65      0.74 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 1s/step - accuracy: 0.5091 - loss: 1.4099
Epoch 2/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 1s/step - accuracy: 0.7643 - loss: 0.5270
Epoch 3/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 1s/step - accuracy: 0.8496 - loss: 0.3427
Epoch 4/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 1s/step - accuracy: 0.9141 - loss: 0.2159
Epoch 5/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 1s/step - accuracy: 0.9622 - loss: 0.1183
Epoch 6/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 1s/step - accuracy: 0.9740 - loss: 0.0802
Epoch 7/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 1s/step - accuracy: 0.9909 - loss: 0.0399
Epoch 8/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 1s/step - accuracy: 0.9967 - loss: 0.0216
Epoch 9/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[



CNN Confusion Matrix:
 [[89 13  3  0]
 [ 7 91  0  0]
 [ 2  4 81 30]
 [ 1  0 28 36]]
CNN Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.85      0.87       105
           1       0.84      0.93      0.88        98
           2       0.72      0.69      0.71       117
           3       0.55      0.55      0.55        65

    accuracy                           0.77       385
   macro avg       0.75      0.76      0.75       385
weighted avg       0.77      0.77      0.77       385

All models trained, evaluated, and saved!
