In [4]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.8-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [12]:
!git clone https://github.com/laxmimerit/dog-cat-full-dataset.git /content/gdrive/MyDrive/colab_images

Cloning into '/content/gdrive/MyDrive/colab_images'...
remote: Enumerating objects: 25033, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 25033 (delta 0), reused 4 (delta 0), pack-reused 25027 (from 1)[K
Receiving objects: 100% (25033/25033), 541.85 MiB | 23.12 MiB/s, done.
Resolving deltas: 100% (5/5), done.
Updating files: 100% (24990/24990), done.


In [26]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from skimage.feature import hog
from skimage import io, transform
import os
import optuna
import pickle

In [10]:
def load_images(path, label):
    images = []
    labels = []
    for filename in os.listdir(path):
        try:
            img = io.imread(os.path.join(path, filename))
            img = transform.resize(img, (64, 64))
            images.append(img)
            labels.append(label)
        except Exception as e:
            print(e)
    return np.array(images), np.array(labels)

In [13]:
# Load datasets
cats_path = 'small/cats'
dogs_path = 'small/dogs'

cat_images, cat_labels = load_images(cats_path, label=0)
dog_images, dog_labels = load_images(dogs_path, label=1)

In [14]:
X = np.concatenate((cat_images, dog_images), axis=0)
y = np.concatenate((cat_labels, dog_labels), axis=0)

In [17]:
# Extract HOG features
def extract_hog_features(images):
    features = []
    for image in images:
        feature = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), channel_axis = -1)
        features.append(feature)
    return np.array(features)

In [18]:
X_hog = extract_hog_features(X)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_hog, y, test_size=0.2, random_state=42)

In [20]:
# Define the Optuna objective function
def objective(trial):
    model_type = trial.suggest_categorical("model_type", ["logistic_regression", "svm"])

    if model_type == "logistic_regression":
        C = trial.suggest_float("C", 1e-3, 1e3, log=True)
        model = LogisticRegression(C=C, max_iter=1000, random_state=42)
    elif model_type == "svm":
        C = trial.suggest_float("C", 1e-3, 1e3, log=True)
        kernel = trial.suggest_categorical("kernel", ["linear", "rbf", "poly"])
        model = SVC(C=C, kernel=kernel, random_state=42)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    return accuracy_score(y_test, y_pred)

In [21]:
# Run Optuna study
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2024-12-15 10:37:52,413] A new study created in memory with name: no-name-f3e653d5-39a0-46b6-b81a-35eb24f99232
[I 2024-12-15 10:37:54,196] Trial 0 finished with value: 0.734 and parameters: {'model_type': 'logistic_regression', 'C': 0.13006378088999365}. Best is trial 0 with value: 0.734.
[I 2024-12-15 10:37:54,575] Trial 1 finished with value: 0.72 and parameters: {'model_type': 'logistic_regression', 'C': 0.001250326807216914}. Best is trial 0 with value: 0.734.
[I 2024-12-15 10:37:55,996] Trial 2 finished with value: 0.725 and parameters: {'model_type': 'logistic_regression', 'C': 0.26602991145695176}. Best is trial 0 with value: 0.734.
[I 2024-12-15 10:38:25,725] Trial 3 finished with value: 0.682 and parameters: {'model_type': 'svm', 'C': 2.894870808206561, 'kernel': 'linear'}. Best is trial 0 with value: 0.734.
[I 2024-12-15 10:38:26,408] Trial 4 finished with value: 0.732 and parameters: {'model_type': 'logistic_regression', 'C': 0.15473646661462342}. Best is trial 0 with val

In [22]:
# Output the best hyperparameters
print("Best trial:", study.best_trial)
print("Best hyperparameters:", study.best_params)

[I 2024-12-15 12:02:13,399] A new study created in memory with name: no-name-d0a9a9a4-e5d5-41b6-9d04-a26e9e550988


ValueError: No trials are completed yet.

In [23]:
def train_best_model():
    params = study.best_params
    if params["model_type"] == "logistic_regression":
        best_model = LogisticRegression(C=params["C"], max_iter=1000, random_state=42)
    elif params["model_type"] == "svm":
        best_model = SVC(C=params["C"], kernel=params["kernel"], random_state=42)

    best_model.fit(X_train, y_train)
    return best_model


In [27]:
# Save the best model using pickle
def save_model(model, filename="best_model.pkl"):
    with open(filename, "wb") as file:
        pickle.dump(model, file)

In [28]:
# Load the model from a file
def load_model(filename="best_model.pkl"):
    with open(filename, "rb") as file:
        return pickle.load(file)

In [29]:
# Predict a single image
def predict_image(model, image_path):
    image = io.imread(image_path)
    image = transform.resize(image, (64, 64))
    feature = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), channel_axis=-1)
    feature = np.array(feature).reshape(1, -1)
    prediction = model.predict(feature)
    return "Cat" if prediction[0] == 0 else "Dog"

In [31]:
# Display model performance
def display_performance(model):
    y_pred = model.predict(X_test)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

In [30]:
best_model = train_best_model()
save_model(best_model)

 # Load and evaluate the model
loaded_model = load_model()
display_performance(loaded_model)

# Predict a single image
image_path = ".jpg"  # Replace with an actual image path
prediction = predict_image(loaded_model, image_path)
print(f"Prediction for the image: {prediction}")

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.76      0.76       500
           1       0.76      0.76      0.76       500

    accuracy                           0.76      1000
   macro avg       0.76      0.76      0.76      1000
weighted avg       0.76      0.76      0.76      1000

Prediction for the image: Cat


In [32]:
!wget https://t4.ftcdn.net/jpg/02/66/72/41/360_F_266724172_Iy8gdKgMa7XmrhYYxLCxyhx6J7070Pr8.jpg

--2024-12-15 11:03:56--  https://t4.ftcdn.net/jpg/02/66/72/41/360_F_266724172_Iy8gdKgMa7XmrhYYxLCxyhx6J7070Pr8.jpg
Resolving t4.ftcdn.net (t4.ftcdn.net)... 151.101.1.91, 151.101.65.91, 151.101.129.91, ...
Connecting to t4.ftcdn.net (t4.ftcdn.net)|151.101.1.91|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 41304 (40K) [image/jpeg]
Saving to: ‘360_F_266724172_Iy8gdKgMa7XmrhYYxLCxyhx6J7070Pr8.jpg’


2024-12-15 11:03:57 (820 KB/s) - ‘360_F_266724172_Iy8gdKgMa7XmrhYYxLCxyhx6J7070Pr8.jpg’ saved [41304/41304]

