Importing the Dataset using kagglehub

In [None]:
import kagglehub

path = kagglehub.dataset_download("buraktaci/cerebrovascular-lesions")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'cerebrovascular-lesions' dataset.
Path to dataset files: /kaggle/input/cerebrovascular-lesions


Preprocessing the Image

In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm

nested_folder_name = 'cerebrovascular lesions'

if nested_folder_name in os.listdir(path):
    path = os.path.join(path, nested_folder_name)
    print(f"Path corrected to nested folder: {path}")
else:
    print(f"Could not find expected subfolder '{nested_folder_name}'.")

output_root = '/content/preprocessed_cerebrovascular_lesions'
os.makedirs(output_root, exist_ok=True)

target_size = (128, 128)

print("\nStarting Preprocessing...")

for class_name in sorted(os.listdir(path)):
    class_in = os.path.join(path, class_name)
    class_out = os.path.join(output_root, class_name)

    if not os.path.isdir(class_in):
        continue

    os.makedirs(class_out, exist_ok=True)

    print(f'Processing: {class_name}')
    for filename in tqdm(os.listdir(class_in)):
        if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(class_in, filename)

            img = cv2.imread(img_path)
            if img is None:
                continue

            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            resized = cv2.resize(gray, target_size)
            normalized_float = (resized / 255.0)

            save_img = (normalized_float * 255).astype(np.uint8)

            save_path = os.path.join(class_out, filename)
            cv2.imwrite(save_path, save_img)

Path corrected to nested folder: /kaggle/input/cerebrovascular-lesions/cerebrovascular lesions

Starting Preprocessing...
Processing: Acute ischemic infarction


100%|██████████| 1119/1119 [00:26<00:00, 42.71it/s]


Processing: Chronic ischemic infarction


100%|██████████| 1008/1008 [00:23<00:00, 43.54it/s]


Processing: Epidural hemorrhage


100%|██████████| 1011/1011 [00:25<00:00, 39.17it/s]


Processing: Healthy Control


100%|██████████| 1016/1016 [00:25<00:00, 39.17it/s]


Processing: Mixed cerebrovascular disease


100%|██████████| 1191/1191 [00:29<00:00, 40.12it/s]


Processing: Parenchymal hemorrhage


100%|██████████| 1077/1077 [00:30<00:00, 35.00it/s]


Processing: Subarachnoid arachnoid hemorrhage


100%|██████████| 1166/1166 [00:28<00:00, 40.74it/s]


Processing: Subdural effusion


100%|██████████| 1080/1080 [00:25<00:00, 41.64it/s]


Processing: Subdural hemorrhage


100%|██████████| 1177/1177 [00:31<00:00, 37.93it/s]


Processing: Ventricular hemorrhage


100%|██████████| 1020/1020 [00:25<00:00, 39.46it/s]


Verifying if the images are preprocessed

In [None]:
import os
import cv2
import numpy as np

preprocessed_root = '/content/preprocessed_cerebrovascular_lesions'

print("Verifying preprocessed images...")

all_grayscale = True
all_correct_size = True
all_normalized = True
target_size = (128, 128)

for class_name in sorted(os.listdir(preprocessed_root)):
    class_path = os.path.join(preprocessed_root, class_name)

    if not os.path.isdir(class_path):
        continue

    print(f"Checking class: {class_name}")
    for filename in os.listdir(class_path):
        if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(class_path, filename)

            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)

            if img is None:
                print(f"Could not read image: {img_path}")
                continue

            if len(img.shape) == 3 and img.shape[2] == 3:
                 all_grayscale = False
                 print(f"Image is not grayscale: {img_path}")

            if img.shape[:2][::-1] != target_size:
                all_correct_size = False
                print(f"Image has incorrect dimensions: {img_path} - {img.shape[:2][::-1]}")

            if img.dtype != np.uint8 or np.min(img) < 0 or np.max(img) > 255:
                all_normalized = False
                print(f"Image is not normalized (0-255 uint8): {img_path}")

print("\nVerification Results:")
print(f"All images are grayscale: {all_grayscale}")
print(f"All images have correct dimensions ({target_size[0]}x{target_size[1]}): {all_correct_size}")
print(f"All images are normalized (0-255 uint8): {all_normalized}")

Verifying preprocessed images...
Checking class: Acute ischemic infarction
Checking class: Chronic ischemic infarction
Checking class: Epidural hemorrhage
Checking class: Healthy Control
Checking class: Mixed cerebrovascular disease
Checking class: Parenchymal hemorrhage
Checking class: Subarachnoid arachnoid hemorrhage
Checking class: Subdural effusion
Checking class: Subdural hemorrhage
Checking class: Ventricular hemorrhage

Verification Results:
All images are grayscale: True
All images have correct dimensions (128x128): True
All images are normalized (0-255 uint8): True


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog, local_binary_pattern, graycomatrix, graycoprops
from skimage.filters import gabor

Radiomics Feature Extraction

In [None]:
def extract_radiomics_features(img):
    if len(img.shape) == 3:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (128, 128))
    img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype('uint8')

    # 1. HOG
    hog_features, _ = hog(img, orientations=9, pixels_per_cell=(8, 8),
                          cells_per_block=(2, 2), block_norm='L2-Hys',
                          visualize=True, feature_vector=True)

    # 2. LBP
    lbp = local_binary_pattern(img, P=8, R=1, method='uniform')
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 59), range=(0, 58))
    lbp_hist = lbp_hist.astype("float")
    lbp_hist /= (lbp_hist.sum() + 1e-6)

    # 3. GLCM
    glcm = graycomatrix(img, distances=[1, 2, 3],
                        angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
                        levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast').ravel()
    correlation = graycoprops(glcm, 'correlation').ravel()
    energy = graycoprops(glcm, 'energy').ravel()
    homogeneity = graycoprops(glcm, 'homogeneity').ravel()
    glcm_features = np.hstack([contrast, correlation, energy, homogeneity])

    # 4. Haralick-like descriptors (summary stats from GLCM)
    haralick_features = [
        glcm.mean(), glcm.var(), contrast.mean(),
        correlation.mean(), energy.mean(), homogeneity.mean()
    ]

    # 5. Gabor Filters
    gabor_features = []
    frequencies = (0.1, 0.3, 0.5)
    for theta in (0, np.pi/4, np.pi/2, 3*np.pi/4):
        for freq in frequencies:
            filt_real, filt_imag = gabor(img, frequency=freq, theta=theta)
            gabor_features.append(filt_real.mean())
            gabor_features.append(filt_real.var())
    gabor_features = np.array(gabor_features)

    # Combine all
    return np.hstack([hog_features, lbp_hist, glcm_features,
                      haralick_features, gabor_features])

Saving the Extraced features into CSV files

In [None]:
def process_dataset(dataset_path, output_csv='radiomics_features.csv', save_every=2500):
  data, labels = [], []
  count = 0
  file_index = 1

  for root, dirs, files in os.walk(dataset_path):
    for file in files:
      if file.lower().endswith(('.png', '.jpg', '.jpeg')):
        path = os.path.join(root, file)
        label = os.path.basename(root)

        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

        if img is None:
          print(f"Could not read image at {path}. Skipping file.")
          continue

        features = extract_radiomics_features(img)

        data.append(features)
        labels.append(label)
        count += 1

        if count % 500 == 0:
          print(f"Processed {count} images...")

          # Save partial CSV every N images
        if count % save_every == 0:
          df = pd.DataFrame(data)
          df['label'] = labels
          part_name = f"{output_csv.split('.')[0]}_pt{file_index}.csv"
          df.to_csv(part_name, index=False)
          print(f"Saved {part_name} after {count} images.")
          data, labels = [], []
          file_index += 1

    # Save remaining
    if data:
        df = pd.DataFrame(data)
        df['label'] = labels
        part_name = f"{output_csv.split('.')[0]}_pt{file_index}.csv"
        df.to_csv(part_name, index=False)
        print(f"Saved final part: {part_name}")


In [None]:
dataset_path = "/content/drive/MyDrive/paneer/preprocessed_cerebrovascular_lesions"
process_dataset(dataset_path, output_csv="radiomics_features.csv")

Combining the CSV files and shuffling it into 1 CSV file

In [None]:
import pandas as pd
import glob

path = "/content/drive/MyDrive/paneer/csv_files/part_csvs"

csv_files = glob.glob(path + "/*.csv")

combined_df = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True)
combined_shuff_df = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)

combined_shuff_df.to_csv('/content/drive/MyDrive/paneer/csv_files/radiomics_features_full_shuffled.csv')

Verifying the count

In [None]:
combined_shuff_df['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
Mixed cerebrovascular disease,1191
Subdural hemorrhage,1177
Subarachnoid arachnoid hemorrhage,1166
Acute ischemic infarction,1119
Subdural effusion,1080
Parenchymal hemorrhage,1077
Ventricular hemorrhage,1020
Healthy Control,1016
Epidural hemorrhage,1011
Chronic ischemic infarction,1008


In [None]:
import pandas as pd

shuffled_df = pd.read_csv('/content/drive/MyDrive/paneer/csv_files/radiomics_features_full_shuffled.csv')

In [None]:
from sklearn.preprocessing import LabelEncoder
X = shuffled_df.drop(columns=['label']).values
y = shuffled_df['label']

Saving the Label Encoder as a .pkl file

In [None]:
from sklearn.preprocessing import LabelEncoder
import joblib

le = LabelEncoder()
y_encoded = le.fit_transform(y)

joblib.dump(le, '/content/drive/MyDrive/paneer/label_encoder.pkl')

['/content/drive/MyDrive/paneer/label_encoder.pkl']

In [None]:
for i, class_name in enumerate(le.classes_):
    print(f"Class {class_name} -> Label {i}")

Class Acute ischemic infarction -> Label 0
Class Chronic ischemic infarction -> Label 1
Class Epidural hemorrhage -> Label 2
Class Healthy Control -> Label 3
Class Mixed cerebrovascular disease -> Label 4
Class Parenchymal hemorrhage -> Label 5
Class Subarachnoid arachnoid hemorrhage -> Label 6
Class Subdural effusion -> Label 7
Class Subdural hemorrhage -> Label 8
Class Ventricular hemorrhage -> Label 9


Load the Label Encoder later

In [None]:
from sklearn.preprocessing import LabelEncoder
import joblib

le = joblib.load('/content/drive/MyDrive/paneer/label_encoder.pkl')

y_encoded = le.transform(y)
#y_pred_original = le.inverse_transform(y_pred)

In [None]:
!pip install optuna --quiet

automated search for transforming 8000-feature dataset into an optimized feature set.

In [None]:
import numpy as np
import optuna
import warnings
import time
import gc
import pandas as pd
import os
import csv

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import IncrementalPCA
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import ExtraTreesClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score

warnings.filterwarnings('ignore')

SHARED_PATH = "/content/drive/MyDrive/paneer"
os.makedirs(f"{SHARED_PATH}/csv_files", exist_ok=True)

CSV_LOG = f"{SHARED_PATH}/csv_files/optuna_trials_log.csv"
if not os.path.exists(CSV_LOG):
    with open(CSV_LOG, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["trial_number", "selector_threshold", "learning_rate", "max_depth",
                         "fold1", "fold2", "fold3", "mean_cv"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)
gc.collect()
print(f"Data split: X_train={X_train.shape}, X_test={X_test.shape}")

N_COMPONENTS_IPCA = 256

def objective_pipeline(trial):
    selector_threshold = trial.suggest_categorical("selector_threshold", ["median", "mean"])
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.2)
    max_depth = trial.suggest_int("max_depth", 4, 8)

    xgb_params = {
        "n_estimators": 100,
        "learning_rate": learning_rate,
        "max_depth": max_depth,
        "objective": "multi:softmax",
        "num_class": len(np.unique(y_train)),
        "random_state": 42,
        "n_jobs": 1,
        "use_label_encoder": False,
        "eval_metric": "mlogloss"
    }

    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    cv_scores = []

    print(f"\n=== Trial {trial.number} started: selector_threshold={selector_threshold}, "
          f"learning_rate={learning_rate:.3f}, max_depth={max_depth} ===")

    for fold, (train_idx, val_idx) in enumerate(cv.split(X_train, y_train), start=1):
        print(f"  --> Fold {fold}/3 processing...")
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        y_tr, y_val = y_train[train_idx], y_train[val_idx]

        # Scale
        scaler = StandardScaler().fit(X_tr)
        X_tr_scaled = scaler.transform(X_tr)
        X_val_scaled = scaler.transform(X_val)

        # Feature selection
        selector = SelectFromModel(
            ExtraTreesClassifier(n_estimators=50, random_state=42, n_jobs=1),
            threshold=selector_threshold
        ).fit(X_tr_scaled, y_tr)
        X_tr_sel = selector.transform(X_tr_scaled)
        X_val_sel = selector.transform(X_val_scaled)

        if X_tr_sel.shape[1] == 0:
            print("    - WARNING: No features selected in this fold. Pruning trial.")
            return 0.0

        # Incremental PCA
        ipca = IncrementalPCA(n_components=min(N_COMPONENTS_IPCA, X_tr_sel.shape[1]), batch_size=256).fit(X_tr_sel)
        X_tr_pca = ipca.transform(X_tr_sel)
        X_val_pca = ipca.transform(X_val_sel)

        # Train XGBoost
        model = xgb.XGBClassifier(**xgb_params).fit(X_tr_pca, y_tr)
        fold_acc = accuracy_score(y_val, model.predict(X_val_pca))
        cv_scores.append(fold_acc)
        print(f"    - Fold {fold} accuracy: {fold_acc:.4f}")

        # Free memory
        del X_tr, X_val, X_tr_scaled, X_val_scaled, X_tr_sel, X_val_sel, X_tr_pca, X_val_pca
        gc.collect()

    mean_cv = np.mean(cv_scores)
    print(f"=== Trial {trial.number} finished. Mean CV accuracy: {mean_cv:.4f} ===")

    with open(CSV_LOG, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([trial.number, selector_threshold, learning_rate, max_depth,
                         cv_scores[0], cv_scores[1], cv_scores[2], mean_cv])

    return mean_cv

study = optuna.create_study(direction="maximize")
study.optimize(objective_pipeline, n_trials=30)

print("\nOptuna finished!")
print("Best CV accuracy:", study.best_value)
print("Best params:", study.best_params)
best_params = study.best_params

print("\n--- Building final pipeline on full dataset ---")
final_scaler = StandardScaler().fit(X_train)
X_train_scaled = final_scaler.transform(X_train)
X_test_scaled = final_scaler.transform(X_test)

final_selector = SelectFromModel(
    ExtraTreesClassifier(n_estimators=50, random_state=42),
    threshold=best_params["selector_threshold"]
).fit(X_train_scaled, y_train)
X_train_sel = final_selector.transform(X_train_scaled)
X_test_sel = final_selector.transform(X_test_scaled)

final_ipca = IncrementalPCA(n_components=min(N_COMPONENTS_IPCA, X_train_sel.shape[1]), batch_size=256).fit(X_train_sel)
X_train_final = final_ipca.transform(X_train_sel)
X_test_final = final_ipca.transform(X_test_sel)

pd.DataFrame(X_train_final).to_csv(f"{SHARED_PATH}/csv_files/X_train_final.csv", index=False)
pd.DataFrame(X_test_final).to_csv(f"{SHARED_PATH}/csv_files/X_test_final.csv", index=False)
pd.DataFrame(y_train, columns=['label']).to_csv(f"{SHARED_PATH}/csv_files/y_train.csv", index=False)
pd.DataFrame(y_test, columns=['label']).to_csv(f"{SHARED_PATH}/csv_files/y_test.csv", index=False)

print("\n✅ Final datasets saved!")
print(f"X_train_final shape: {X_train_final.shape}, X_test_final shape: {X_test_final.shape}")
print(f"Trial log saved at: {CSV_LOG}")


[I 2025-10-14 13:00:20,383] A new study created in memory with name: no-name-790e6034-cf60-4e9b-9149-5ccaef2c8e31


Data split: X_train=(8692, 8237), X_test=(2173, 8237)

=== Trial 0 started: selector_threshold=mean, learning_rate=0.095, max_depth=8 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8761
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8985
  --> Fold 3/3 processing...


[I 2025-10-14 13:09:15,542] Trial 0 finished with value: 0.8888647886936972 and parameters: {'selector_threshold': 'mean', 'learning_rate': 0.09460147454354628, 'max_depth': 8}. Best is trial 0 with value: 0.8888647886936972.


    - Fold 3 accuracy: 0.8920
=== Trial 0 finished. Mean CV accuracy: 0.8889 ===

=== Trial 1 started: selector_threshold=mean, learning_rate=0.058, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8489
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8740
  --> Fold 3/3 processing...


[I 2025-10-14 13:17:38,379] Trial 1 finished with value: 0.8643598134525781 and parameters: {'selector_threshold': 'mean', 'learning_rate': 0.057712122939594226, 'max_depth': 7}. Best is trial 0 with value: 0.8888647886936972.


    - Fold 3 accuracy: 0.8702
=== Trial 1 finished. Mean CV accuracy: 0.8644 ===

=== Trial 2 started: selector_threshold=mean, learning_rate=0.110, max_depth=6 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8775
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8951
  --> Fold 3/3 processing...


[I 2025-10-14 13:23:41,779] Trial 2 finished with value: 0.8872537680675033 and parameters: {'selector_threshold': 'mean', 'learning_rate': 0.11004321844002904, 'max_depth': 6}. Best is trial 0 with value: 0.8888647886936972.


    - Fold 3 accuracy: 0.8892
=== Trial 2 finished. Mean CV accuracy: 0.8873 ===

=== Trial 3 started: selector_threshold=mean, learning_rate=0.145, max_depth=8 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8816
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9006
  --> Fold 3/3 processing...


[I 2025-10-14 13:30:55,129] Trial 3 finished with value: 0.8930063695188037 and parameters: {'selector_threshold': 'mean', 'learning_rate': 0.1453075484593495, 'max_depth': 8}. Best is trial 3 with value: 0.8930063695188037.


    - Fold 3 accuracy: 0.8968
=== Trial 3 finished. Mean CV accuracy: 0.8930 ===

=== Trial 4 started: selector_threshold=mean, learning_rate=0.089, max_depth=4 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8175
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8257
  --> Fold 3/3 processing...


[I 2025-10-14 13:34:23,686] Trial 4 finished with value: 0.8260479277048142 and parameters: {'selector_threshold': 'mean', 'learning_rate': 0.08927251896813199, 'max_depth': 4}. Best is trial 3 with value: 0.8930063695188037.


    - Fold 3 accuracy: 0.8350
=== Trial 4 finished. Mean CV accuracy: 0.8260 ===

=== Trial 5 started: selector_threshold=median, learning_rate=0.196, max_depth=4 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8703
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8764
  --> Fold 3/3 processing...


[I 2025-10-14 13:38:08,891] Trial 5 finished with value: 0.8745978304742245 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.19600468767115226, 'max_depth': 4}. Best is trial 3 with value: 0.8930063695188037.


    - Fold 3 accuracy: 0.8771
=== Trial 5 finished. Mean CV accuracy: 0.8746 ===

=== Trial 6 started: selector_threshold=median, learning_rate=0.194, max_depth=8 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8892
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9016
  --> Fold 3/3 processing...


[I 2025-10-14 13:44:47,275] Trial 6 finished with value: 0.8958820349839544 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.194390559146668, 'max_depth': 8}. Best is trial 6 with value: 0.8958820349839544.


    - Fold 3 accuracy: 0.8968
=== Trial 6 finished. Mean CV accuracy: 0.8959 ===

=== Trial 7 started: selector_threshold=mean, learning_rate=0.147, max_depth=6 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8851
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8985
  --> Fold 3/3 processing...


[I 2025-10-14 13:50:24,342] Trial 7 finished with value: 0.8943867111761935 and parameters: {'selector_threshold': 'mean', 'learning_rate': 0.14665664638577774, 'max_depth': 6}. Best is trial 6 with value: 0.8958820349839544.


    - Fold 3 accuracy: 0.8996
=== Trial 7 finished. Mean CV accuracy: 0.8944 ===

=== Trial 8 started: selector_threshold=median, learning_rate=0.049, max_depth=4 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.7692
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.7777
  --> Fold 3/3 processing...


[I 2025-10-14 13:54:12,657] Trial 8 finished with value: 0.7696733228467706 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.04889310057876216, 'max_depth': 4}. Best is trial 6 with value: 0.8958820349839544.


    - Fold 3 accuracy: 0.7622
=== Trial 8 finished. Mean CV accuracy: 0.7697 ===

=== Trial 9 started: selector_threshold=median, learning_rate=0.139, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8892
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9040
  --> Fold 3/3 processing...


[I 2025-10-14 14:01:09,268] Trial 9 finished with value: 0.8976079583529569 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.13927469865877376, 'max_depth': 7}. Best is trial 9 with value: 0.8976079583529569.


    - Fold 3 accuracy: 0.8996
=== Trial 9 finished. Mean CV accuracy: 0.8976 ===

=== Trial 10 started: selector_threshold=median, learning_rate=0.014, max_depth=6 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.7429
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.7518
  --> Fold 3/3 processing...


[I 2025-10-14 14:08:08,345] Trial 10 finished with value: 0.7406808277349016 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.013556308775940315, 'max_depth': 6}. Best is trial 9 with value: 0.8976079583529569.


    - Fold 3 accuracy: 0.7273
=== Trial 10 finished. Mean CV accuracy: 0.7407 ===

=== Trial 11 started: selector_threshold=median, learning_rate=0.198, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8954
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9099
  --> Fold 3/3 processing...


[I 2025-10-14 14:14:05,971] Trial 11 finished with value: 0.9034753831395035 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.19843366032826454, 'max_depth': 7}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9051
=== Trial 11 finished. Mean CV accuracy: 0.9035 ===

=== Trial 12 started: selector_threshold=median, learning_rate=0.150, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8916
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9020
  --> Fold 3/3 processing...


[I 2025-10-14 14:20:49,942] Trial 12 finished with value: 0.8992185422375574 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.15009436913121876, 'max_depth': 7}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9040
=== Trial 12 finished. Mean CV accuracy: 0.8992 ===

=== Trial 13 started: selector_threshold=median, learning_rate=0.170, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8916
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9061
  --> Fold 3/3 processing...


[I 2025-10-14 14:27:19,440] Trial 13 finished with value: 0.8992185422375574 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.17017620854635046, 'max_depth': 7}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8999
=== Trial 13 finished. Mean CV accuracy: 0.8992 ===

=== Trial 14 started: selector_threshold=median, learning_rate=0.175, max_depth=5 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8803
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9009
  --> Fold 3/3 processing...


[I 2025-10-14 14:32:05,147] Trial 14 finished with value: 0.893351713007729 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.17502001414060775, 'max_depth': 5}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8989
=== Trial 14 finished. Mean CV accuracy: 0.8934 ===

=== Trial 15 started: selector_threshold=median, learning_rate=0.164, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8851
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9075
  --> Fold 3/3 processing...


[I 2025-10-14 14:38:36,101] Trial 15 finished with value: 0.8977234963562649 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.16388292546402716, 'max_depth': 7}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9006
=== Trial 15 finished. Mean CV accuracy: 0.8977 ===

=== Trial 16 started: selector_threshold=median, learning_rate=0.124, max_depth=6 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8844
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9020
  --> Fold 3/3 processing...


[I 2025-10-14 14:44:46,884] Trial 16 finished with value: 0.893236175004421 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.12365711889147805, 'max_depth': 6}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8933
=== Trial 16 finished. Mean CV accuracy: 0.8932 ===

=== Trial 17 started: selector_threshold=median, learning_rate=0.199, max_depth=5 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8861
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9040
  --> Fold 3/3 processing...


[I 2025-10-14 14:49:33,438] Trial 17 finished with value: 0.8984137465925223 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.19902093091055686, 'max_depth': 5}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9051
=== Trial 17 finished. Mean CV accuracy: 0.8984 ===

=== Trial 18 started: selector_threshold=median, learning_rate=0.173, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8892
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9023
  --> Fold 3/3 processing...


[I 2025-10-14 14:55:53,605] Trial 18 finished with value: 0.8974928967950234 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.17337118878362792, 'max_depth': 7}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9009
=== Trial 18 finished. Mean CV accuracy: 0.8975 ===

=== Trial 19 started: selector_threshold=median, learning_rate=0.155, max_depth=5 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8785
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8916
  --> Fold 3/3 processing...


[I 2025-10-14 15:00:41,427] Trial 19 finished with value: 0.8882892029775613 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.1551081386018172, 'max_depth': 5}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8947
=== Trial 19 finished. Mean CV accuracy: 0.8883 ===

=== Trial 20 started: selector_threshold=median, learning_rate=0.130, max_depth=8 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8872
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8989
  --> Fold 3/3 processing...


[I 2025-10-14 15:08:35,105] Trial 20 finished with value: 0.8953069654169742 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.1300631990183858, 'max_depth': 8}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8999
=== Trial 20 finished. Mean CV accuracy: 0.8953 ===

=== Trial 21 started: selector_threshold=median, learning_rate=0.179, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8882
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9051
  --> Fold 3/3 processing...


[I 2025-10-14 15:14:51,893] Trial 21 finished with value: 0.8977231390222341 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.1788018131877688, 'max_depth': 7}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8999
=== Trial 21 finished. Mean CV accuracy: 0.8977 ===

=== Trial 22 started: selector_threshold=median, learning_rate=0.184, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8906
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9099
  --> Fold 3/3 processing...


[I 2025-10-14 15:21:05,522] Trial 22 finished with value: 0.9012897693917039 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.18435063551604042, 'max_depth': 7}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9033
=== Trial 22 finished. Mean CV accuracy: 0.9013 ===

=== Trial 23 started: selector_threshold=median, learning_rate=0.186, max_depth=7 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8903
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9106
  --> Fold 3/3 processing...


[I 2025-10-14 15:27:18,881] Trial 23 finished with value: 0.8999090704002831 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.18614347604180848, 'max_depth': 7}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8989
=== Trial 23 finished. Mean CV accuracy: 0.8999 ===

=== Trial 24 started: selector_threshold=median, learning_rate=0.186, max_depth=6 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8934
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9065
  --> Fold 3/3 processing...


[I 2025-10-14 15:32:57,854] Trial 24 finished with value: 0.9010593286455872 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.18633788852872785, 'max_depth': 6}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9033
=== Trial 24 finished. Mean CV accuracy: 0.9011 ===

=== Trial 25 started: selector_threshold=median, learning_rate=0.163, max_depth=6 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8923
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9037
  --> Fold 3/3 processing...


[I 2025-10-14 15:38:39,606] Trial 25 finished with value: 0.8994485859458621 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.16269143522089455, 'max_depth': 6}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9023
=== Trial 25 finished. Mean CV accuracy: 0.8994 ===

=== Trial 26 started: selector_threshold=median, learning_rate=0.187, max_depth=6 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8923
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9078
  --> Fold 3/3 processing...


[I 2025-10-14 15:44:10,363] Trial 26 finished with value: 0.9012895708727978 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.18702436286840027, 'max_depth': 6}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.9037
=== Trial 26 finished. Mean CV accuracy: 0.9013 ===

=== Trial 27 started: selector_threshold=median, learning_rate=0.070, max_depth=5 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8402
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8578
  --> Fold 3/3 processing...


[I 2025-10-14 15:49:12,763] Trial 27 finished with value: 0.8496329266316209 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.07016987685331269, 'max_depth': 5}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8509
=== Trial 27 finished. Mean CV accuracy: 0.8496 ===

=== Trial 28 started: selector_threshold=median, learning_rate=0.200, max_depth=6 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8879
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.9082
  --> Fold 3/3 processing...


[I 2025-10-14 15:54:36,168] Trial 28 finished with value: 0.8985286096315498 and parameters: {'selector_threshold': 'median', 'learning_rate': 0.19958865737541653, 'max_depth': 6}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8996
=== Trial 28 finished. Mean CV accuracy: 0.8985 ===

=== Trial 29 started: selector_threshold=mean, learning_rate=0.110, max_depth=8 ===
  --> Fold 1/3 processing...
    - Fold 1 accuracy: 0.8834
  --> Fold 2/3 processing...
    - Fold 2 accuracy: 0.8975
  --> Fold 3/3 processing...


[I 2025-10-14 16:02:46,240] Trial 29 finished with value: 0.8923158016522966 and parameters: {'selector_threshold': 'mean', 'learning_rate': 0.11048379809432742, 'max_depth': 8}. Best is trial 11 with value: 0.9034753831395035.


    - Fold 3 accuracy: 0.8961
=== Trial 29 finished. Mean CV accuracy: 0.8923 ===

Optuna finished!
Best CV accuracy: 0.9034753831395035
Best params: {'selector_threshold': 'median', 'learning_rate': 0.19843366032826454, 'max_depth': 7}

--- Building final pipeline on full dataset ---

✅ Final datasets saved!
X_train_final shape: (8692, 256), X_test_final shape: (2173, 256)
Trial log saved at: /content/drive/MyDrive/paneer/csv_files/optuna_trials_log.csv


**Model Training** (w/out Naive Bayes and Bayesian Network)

In [None]:
import pandas as pd
import numpy as np
import optuna
import warnings
import time
import joblib
import os

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
import xgboost as xgb

from sklearn.model_selection import cross_val_score

warnings.filterwarnings('ignore')

def tune_and_save_model(model_name: str):

    print(f"--- 1. Loading Optimized Data for [{model_name}] ---")
    try:
        X_train_final = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/X_train_final.csv").values
        y_train = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/y_train.csv")['label'].values
        print("Data loaded successfully.\n")
    except FileNotFoundError:
        print("Error: Could not find data files. Please run Script 1 first.")
        return

    print(f"--- 2. Preparing Tuning Objective for [{model_name}] ---")
    models_to_tune = {
        "Logistic Regression": LogisticRegression(max_iter=2000, random_state=42),
        "KNN": KNeighborsClassifier(),
        "SVM": SVC(random_state=42),
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Random Forest": RandomForestClassifier(random_state=42, n_jobs=-1),
        "AdaBoost": AdaBoostClassifier(random_state=42),
        "MLP": MLPClassifier(max_iter=500, random_state=42, early_stopping=True),
        "XGBoost": xgb.XGBClassifier(random_state=42, n_jobs=-1, use_label_encoder=False, eval_metric='mlogloss')
    }

    def objective_model_tuning(trial):
        if model_name == "Logistic Regression": params = {'C': trial.suggest_float('C', 1e-3, 1e3, log=True)}
        elif model_name == "KNN": params = {'n_neighbors': trial.suggest_int('n_neighbors', 3, 31)}
        elif model_name == "SVM": params = {'C': trial.suggest_float('C', 1e-2, 1e2, log=True), 'gamma': trial.suggest_float('gamma', 1e-3, 1e-1, log=True)}
        elif model_name == "Decision Tree": params = {'max_depth': trial.suggest_int('max_depth', 3, 50), 'min_samples_split': trial.suggest_int('min_samples_split', 2, 32)}
        elif model_name == "Random Forest": params = {'n_estimators': trial.suggest_int('n_estimators', 100, 800), 'max_depth': trial.suggest_int('max_depth', 10, 50)}
        elif model_name == "AdaBoost": params = {'n_estimators': trial.suggest_int('n_estimators', 50, 500), 'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0)}
        elif model_name == "MLP": params = {'alpha': trial.suggest_float('alpha', 1e-5, 1e-1, log=True), 'learning_rate_init': trial.suggest_float('learning_rate_init', 1e-4, 1e-2, log=True)}
        elif model_name == "XGBoost": params = {"n_estimators": 500, "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3), "max_depth": trial.suggest_int("max_depth", 3, 10)}

        model = models_to_tune[model_name].set_params(**params)
        return cross_val_score(model, X_train_final, y_train, n_jobs=-1, cv=5).mean()

    def print_callback(study, trial):
        print(f"  > Trial {trial.number} finished. Score: {trial.value:.4f}. Best score: {study.best_value:.4f}")

    print(f"--- 3. Starting Optuna Study for [{model_name}] ---")
    start_time = time.time()
    study = optuna.create_study(direction="maximize")

    study.optimize(
        objective_model_tuning,
        n_trials=20,
        callbacks=[print_callback]
    )

    print("\n--- 4. Training and Saving the Best Model ---")
    best_params = study.best_params
    print(f"Best parameters found: {best_params}")

    best_model = models_to_tune[model_name].set_params(**best_params)
    best_model.fit(X_train_final, y_train)

    os.makedirs("/content/drive/MyDrive/paneer/models", exist_ok=True)
    save_path = f"/content/drive/MyDrive/paneer/models/{model_name.replace(' ', '_')}_best_model.pkl"
    joblib.dump(best_model, save_path)
    print(f"[{model_name}] saved successfully at: {save_path}\n")


In [None]:
tune_and_save_model("Logistic Regression")

--- 1. Loading Optimized Data for [Logistic Regression] ---


[I 2025-10-15 10:43:24,515] A new study created in memory with name: no-name-747ecb36-9a69-4f17-80b1-8d96dc58662e


Data loaded successfully.

--- 2. Preparing Tuning Objective for [Logistic Regression] ---
--- 3. Starting Optuna Study for [Logistic Regression] ---


[I 2025-10-15 10:45:08,905] Trial 0 finished with value: 0.7356170067185419 and parameters: {'C': 43.90162235960686}. Best is trial 0 with value: 0.7356170067185419.


  > Trial 0 finished. Score: 0.7356. Best score: 0.7356


[I 2025-10-15 10:45:18,759] Trial 1 finished with value: 0.7736985596129146 and parameters: {'C': 0.004626444834674163}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 1 finished. Score: 0.7737. Best score: 0.7737


[I 2025-10-15 10:47:01,840] Trial 2 finished with value: 0.7366526799061137 and parameters: {'C': 70.4173135062354}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 2 finished. Score: 0.7367. Best score: 0.7737


[I 2025-10-15 10:47:13,121] Trial 3 finished with value: 0.7715124693040126 and parameters: {'C': 0.01057647770060889}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 3 finished. Score: 0.7715. Best score: 0.7737


[I 2025-10-15 10:48:37,391] Trial 4 finished with value: 0.7384933473002421 and parameters: {'C': 7.002028541078502}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 4 finished. Score: 0.7385. Best score: 0.7737


[I 2025-10-15 10:48:43,425] Trial 5 finished with value: 0.7634590862438964 and parameters: {'C': 0.0012951719745161819}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 5 finished. Score: 0.7635. Best score: 0.7737


[I 2025-10-15 10:50:28,424] Trial 6 finished with value: 0.7349266240997995 and parameters: {'C': 245.94445166563344}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 6 finished. Score: 0.7349. Best score: 0.7737


[I 2025-10-15 10:50:53,595] Trial 7 finished with value: 0.7556363159918236 and parameters: {'C': 0.16236490173628096}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 7 finished. Score: 0.7556. Best score: 0.7737


[I 2025-10-15 10:52:35,353] Trial 8 finished with value: 0.7352717823226845 and parameters: {'C': 832.828233635322}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 8 finished. Score: 0.7353. Best score: 0.7737


[I 2025-10-15 10:54:15,076] Trial 9 finished with value: 0.7350420297632795 and parameters: {'C': 482.3743949676494}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 9 finished. Score: 0.7350. Best score: 0.7737


[I 2025-10-15 10:54:39,326] Trial 10 finished with value: 0.7558663332431175 and parameters: {'C': 0.1521606155679862}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 10 finished. Score: 0.7559. Best score: 0.7737


[I 2025-10-15 10:54:45,770] Trial 11 finished with value: 0.7647245119908734 and parameters: {'C': 0.0013588565430886163}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 11 finished. Score: 0.7647. Best score: 0.7737


[I 2025-10-15 10:54:59,905] Trial 12 finished with value: 0.7692117674072966 and parameters: {'C': 0.01570378721096641}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 12 finished. Score: 0.7692. Best score: 0.7737


[I 2025-10-15 10:55:13,382] Trial 13 finished with value: 0.7704773255002181 and parameters: {'C': 0.013584128516313866}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 13 finished. Score: 0.7705. Best score: 0.7737


[I 2025-10-15 10:55:27,473] Trial 14 finished with value: 0.7695568594572095 and parameters: {'C': 0.01611100221643221}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 14 finished. Score: 0.7696. Best score: 0.7737


[I 2025-10-15 10:56:00,795] Trial 15 finished with value: 0.7527596445452627 and parameters: {'C': 0.3308896099752765}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 15 finished. Score: 0.7528. Best score: 0.7737


[I 2025-10-15 10:57:00,363] Trial 16 finished with value: 0.7421752114722759 and parameters: {'C': 2.4527821199336755}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 16 finished. Score: 0.7422. Best score: 0.7737


[I 2025-10-15 10:57:10,603] Trial 17 finished with value: 0.7731233179657634 and parameters: {'C': 0.005461907560199394}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 17 finished. Score: 0.7731. Best score: 0.7737


[I 2025-10-15 10:57:18,202] Trial 18 finished with value: 0.7732384589373547 and parameters: {'C': 0.0030405677124782453}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 18 finished. Score: 0.7732. Best score: 0.7737


[I 2025-10-15 10:57:38,939] Trial 19 finished with value: 0.7623089999874271 and parameters: {'C': 0.06340705397721486}. Best is trial 1 with value: 0.7736985596129146.


  > Trial 19 finished. Score: 0.7623. Best score: 0.7737

--- 4. Training and Saving the Best Model ---
Best parameters found: {'C': 0.004626444834674163}
✅ [Logistic Regression] saved successfully at: /content/drive/MyDrive/paneer/models/Logistic_Regression_best_model.pkl



In [None]:
tune_and_save_model("KNN")

--- 1. Loading Optimized Data for [KNN] ---


[I 2025-10-15 11:02:18,550] A new study created in memory with name: no-name-932d6215-85ef-4a81-9804-ed2bab596290


Data loaded successfully.

--- 2. Preparing Tuning Objective for [KNN] ---
--- 3. Starting Optuna Study for [KNN] ---


[I 2025-10-15 11:02:20,106] Trial 0 finished with value: 0.9389091120844419 and parameters: {'n_neighbors': 4}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 0 finished. Score: 0.9389. Best score: 0.9389


[I 2025-10-15 11:02:21,623] Trial 1 finished with value: 0.9389091120844419 and parameters: {'n_neighbors': 4}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 1 finished. Score: 0.9389. Best score: 0.9389


[I 2025-10-15 11:02:23,096] Trial 2 finished with value: 0.8691879451373122 and parameters: {'n_neighbors': 12}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 2 finished. Score: 0.8692. Best score: 0.9389


[I 2025-10-15 11:02:24,479] Trial 3 finished with value: 0.8430716567263834 and parameters: {'n_neighbors': 16}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 3 finished. Score: 0.8431. Best score: 0.9389


[I 2025-10-15 11:02:26,078] Trial 4 finished with value: 0.9186599840787828 and parameters: {'n_neighbors': 6}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 4 finished. Score: 0.9187. Best score: 0.9389


[I 2025-10-15 11:02:28,366] Trial 5 finished with value: 0.8297254284865382 and parameters: {'n_neighbors': 18}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 5 finished. Score: 0.8297. Best score: 0.9389


[I 2025-10-15 11:02:30,660] Trial 6 finished with value: 0.8430716567263834 and parameters: {'n_neighbors': 16}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 6 finished. Score: 0.8431. Best score: 0.9389


[I 2025-10-15 11:02:32,227] Trial 7 finished with value: 0.7452793194242158 and parameters: {'n_neighbors': 31}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 7 finished. Score: 0.7453. Best score: 0.9389


[I 2025-10-15 11:02:33,781] Trial 8 finished with value: 0.7680588357130237 and parameters: {'n_neighbors': 27}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 8 finished. Score: 0.7681. Best score: 0.9389


[I 2025-10-15 11:02:35,263] Trial 9 finished with value: 0.7954404836979574 and parameters: {'n_neighbors': 23}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 9 finished. Score: 0.7954. Best score: 0.9389


[I 2025-10-15 11:02:36,741] Trial 10 finished with value: 0.8946133877186935 and parameters: {'n_neighbors': 9}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 10 finished. Score: 0.8946. Best score: 0.9389


[I 2025-10-15 11:02:38,082] Trial 11 finished with value: 0.9303948342730999 and parameters: {'n_neighbors': 5}. Best is trial 0 with value: 0.9389091120844419.


  > Trial 11 finished. Score: 0.9304. Best score: 0.9389


[I 2025-10-15 11:02:39,513] Trial 12 finished with value: 0.9500682574208025 and parameters: {'n_neighbors': 3}. Best is trial 12 with value: 0.9500682574208025.


  > Trial 12 finished. Score: 0.9501. Best score: 0.9501


[I 2025-10-15 11:02:41,066] Trial 13 finished with value: 0.9500682574208025 and parameters: {'n_neighbors': 3}. Best is trial 12 with value: 0.9500682574208025.


  > Trial 13 finished. Score: 0.9501. Best score: 0.9501


[I 2025-10-15 11:02:43,476] Trial 14 finished with value: 0.8787367711956993 and parameters: {'n_neighbors': 11}. Best is trial 12 with value: 0.9500682574208025.


  > Trial 14 finished. Score: 0.8787. Best score: 0.9501


[I 2025-10-15 11:02:45,705] Trial 15 finished with value: 0.9020922570343524 and parameters: {'n_neighbors': 8}. Best is trial 12 with value: 0.9500682574208025.


  > Trial 15 finished. Score: 0.9021. Best score: 0.9501


[I 2025-10-15 11:02:47,325] Trial 16 finished with value: 0.8600986903707076 and parameters: {'n_neighbors': 13}. Best is trial 12 with value: 0.9500682574208025.


  > Trial 16 finished. Score: 0.8601. Best score: 0.9501


[I 2025-10-15 11:02:48,784] Trial 17 finished with value: 0.9500682574208025 and parameters: {'n_neighbors': 3}. Best is trial 12 with value: 0.9500682574208025.


  > Trial 17 finished. Score: 0.9501. Best score: 0.9501


[I 2025-10-15 11:02:50,139] Trial 18 finished with value: 0.9020922570343524 and parameters: {'n_neighbors': 8}. Best is trial 12 with value: 0.9500682574208025.


  > Trial 18 finished. Score: 0.9021. Best score: 0.9501


[I 2025-10-15 11:02:51,646] Trial 19 finished with value: 0.8066008201478171 and parameters: {'n_neighbors': 21}. Best is trial 12 with value: 0.9500682574208025.


  > Trial 19 finished. Score: 0.8066. Best score: 0.9501

--- 4. Training and Saving the Best Model ---
Best parameters found: {'n_neighbors': 3}
✅ [KNN] saved successfully at: /content/drive/MyDrive/paneer/models/KNN_best_model.pkl



In [None]:
tune_and_save_model("SVM")

--- 1. Loading Optimized Data for [SVM] ---


[I 2025-10-15 11:05:43,550] A new study created in memory with name: no-name-97991f90-1843-4712-85b2-333431a35ad5


Data loaded successfully.

--- 2. Preparing Tuning Objective for [SVM] ---
--- 3. Starting Optuna Study for [SVM] ---


[I 2025-10-15 11:07:11,743] Trial 0 finished with value: 0.9389085827006646 and parameters: {'C': 16.916515572790665, 'gamma': 0.002294206871076437}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 0 finished. Score: 0.9389. Best score: 0.9389


[I 2025-10-15 11:08:43,113] Trial 1 finished with value: 0.2596636692516035 and parameters: {'C': 15.118141328479531, 'gamma': 0.01755346718140355}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 1 finished. Score: 0.2597. Best score: 0.9389


[I 2025-10-15 11:10:12,377] Trial 2 finished with value: 0.12505771937498306 and parameters: {'C': 0.6396486231470222, 'gamma': 0.02188307389194149}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 2 finished. Score: 0.1251. Best score: 0.9389


[I 2025-10-15 11:11:37,685] Trial 3 finished with value: 0.10964107118160445 and parameters: {'C': 0.015805732382171, 'gamma': 0.027811221131908887}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 3 finished. Score: 0.1096. Best score: 0.9389


[I 2025-10-15 11:13:09,386] Trial 4 finished with value: 0.10964107118160445 and parameters: {'C': 0.04140661904044152, 'gamma': 0.08585156312752527}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 4 finished. Score: 0.1096. Best score: 0.9389


[I 2025-10-15 11:14:34,683] Trial 5 finished with value: 0.10964107118160445 and parameters: {'C': 0.015911627202650654, 'gamma': 0.009358228853951031}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 5 finished. Score: 0.1096. Best score: 0.9389


[I 2025-10-15 11:16:00,928] Trial 6 finished with value: 0.10964107118160445 and parameters: {'C': 0.19182868826042432, 'gamma': 0.052992127688307925}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 6 finished. Score: 0.1096. Best score: 0.9389


[I 2025-10-15 11:17:28,654] Trial 7 finished with value: 0.10964107118160445 and parameters: {'C': 0.20602211013860144, 'gamma': 0.03317474440476826}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 7 finished. Score: 0.1096. Best score: 0.9389


[I 2025-10-15 11:18:53,230] Trial 8 finished with value: 0.11757931327012931 and parameters: {'C': 0.08091701945025977, 'gamma': 0.002809282595521858}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 8 finished. Score: 0.1176. Best score: 0.9389


[I 2025-10-15 11:20:26,310] Trial 9 finished with value: 0.1256329610221342 and parameters: {'C': 57.57856824208531, 'gamma': 0.07169294913683398}. Best is trial 0 with value: 0.9389085827006646.


  > Trial 9 finished. Score: 0.1256. Best score: 0.9389


[I 2025-10-15 11:21:53,482] Trial 10 finished with value: 0.963529229594406 and parameters: {'C': 4.775510758722773, 'gamma': 0.0010226747698128193}. Best is trial 10 with value: 0.963529229594406.


  > Trial 10 finished. Score: 0.9635. Best score: 0.9635


[I 2025-10-15 11:23:20,400] Trial 11 finished with value: 0.9619185132785993 and parameters: {'C': 5.840868934954009, 'gamma': 0.0011699535156842716}. Best is trial 10 with value: 0.963529229594406.


  > Trial 11 finished. Score: 0.9619. Best score: 0.9635


[I 2025-10-15 11:24:49,784] Trial 12 finished with value: 0.9615734874016587 and parameters: {'C': 3.2562279665908873, 'gamma': 0.0012478956114049973}. Best is trial 10 with value: 0.963529229594406.


  > Trial 12 finished. Score: 0.9616. Best score: 0.9635


[I 2025-10-15 11:26:17,514] Trial 13 finished with value: 0.9616883636813613 and parameters: {'C': 3.2024993609944206, 'gamma': 0.0011816812494691792}. Best is trial 10 with value: 0.963529229594406.


  > Trial 13 finished. Score: 0.9617. Best score: 0.9635


[I 2025-10-15 11:27:45,879] Trial 14 finished with value: 0.8389307506463446 and parameters: {'C': 2.509017327171957, 'gamma': 0.00560835829213599}. Best is trial 10 with value: 0.963529229594406.


  > Trial 14 finished. Score: 0.8389. Best score: 0.9635


[I 2025-10-15 11:29:17,203] Trial 15 finished with value: 0.9370676506146477 and parameters: {'C': 11.221983565033154, 'gamma': 0.0023458178143435913}. Best is trial 10 with value: 0.963529229594406.


  > Trial 15 finished. Score: 0.9371. Best score: 0.9635


[I 2025-10-15 11:30:46,052] Trial 16 finished with value: 0.8615959862121996 and parameters: {'C': 55.327038867980114, 'gamma': 0.004494943758666932}. Best is trial 10 with value: 0.963529229594406.


  > Trial 16 finished. Score: 0.8616. Best score: 0.9635


[I 2025-10-15 11:32:12,473] Trial 17 finished with value: 0.9619185132785993 and parameters: {'C': 1.2700072889290555, 'gamma': 0.001072153546229187}. Best is trial 10 with value: 0.963529229594406.


  > Trial 17 finished. Score: 0.9619. Best score: 0.9635


[I 2025-10-15 11:33:42,790] Trial 18 finished with value: 0.953979675633325 and parameters: {'C': 9.33167462986442, 'gamma': 0.0017233436204535791}. Best is trial 10 with value: 0.963529229594406.


  > Trial 18 finished. Score: 0.9540. Best score: 0.9635


[I 2025-10-15 11:35:12,148] Trial 19 finished with value: 0.7986644970754855 and parameters: {'C': 0.7414858795964644, 'gamma': 0.004446200508174477}. Best is trial 10 with value: 0.963529229594406.


  > Trial 19 finished. Score: 0.7987. Best score: 0.9635

--- 4. Training and Saving the Best Model ---
Best parameters found: {'C': 4.775510758722773, 'gamma': 0.0010226747698128193}
✅ [SVM] saved successfully at: /content/drive/MyDrive/paneer/models/SVM_best_model.pkl



In [None]:
tune_and_save_model("Decision Tree")

--- 1. Loading Optimized Data for [Decision Tree] ---


[I 2025-10-15 14:16:50,606] A new study created in memory with name: no-name-63d00692-21aa-416d-a2f0-8f7339901cb5


Data loaded successfully.

--- 2. Preparing Tuning Objective for [Decision Tree] ---
--- 3. Starting Optuna Study for [Decision Tree] ---


[I 2025-10-15 14:17:11,695] Trial 0 finished with value: 0.5011538581158834 and parameters: {'max_depth': 13, 'min_samples_split': 7}. Best is trial 0 with value: 0.5011538581158834.


  > Trial 0 finished. Score: 0.5012. Best score: 0.5012


[I 2025-10-15 14:17:27,648] Trial 1 finished with value: 0.4570894082879001 and parameters: {'max_depth': 11, 'min_samples_split': 2}. Best is trial 0 with value: 0.5011538581158834.


  > Trial 1 finished. Score: 0.4571. Best score: 0.5012


[I 2025-10-15 14:17:47,248] Trial 2 finished with value: 0.5606326400832191 and parameters: {'max_depth': 25, 'min_samples_split': 18}. Best is trial 2 with value: 0.5606326400832191.


  > Trial 2 finished. Score: 0.5606. Best score: 0.5606


[I 2025-10-15 14:18:08,093] Trial 3 finished with value: 0.5916945640888543 and parameters: {'max_depth': 21, 'min_samples_split': 2}. Best is trial 3 with value: 0.5916945640888543.


  > Trial 3 finished. Score: 0.5917. Best score: 0.5917


[I 2025-10-15 14:18:25,712] Trial 4 finished with value: 0.5017300261846451 and parameters: {'max_depth': 15, 'min_samples_split': 26}. Best is trial 3 with value: 0.5916945640888543.


  > Trial 4 finished. Score: 0.5017. Best score: 0.5917


[I 2025-10-15 14:18:44,979] Trial 5 finished with value: 0.53865758861719 and parameters: {'max_depth': 39, 'min_samples_split': 29}. Best is trial 3 with value: 0.5916945640888543.


  > Trial 5 finished. Score: 0.5387. Best score: 0.5917


[I 2025-10-15 14:19:05,470] Trial 6 finished with value: 0.5877812268601387 and parameters: {'max_depth': 27, 'min_samples_split': 7}. Best is trial 3 with value: 0.5916945640888543.


  > Trial 6 finished. Score: 0.5878. Best score: 0.5917


[I 2025-10-15 14:19:25,629] Trial 7 finished with value: 0.5612077493844259 and parameters: {'max_depth': 26, 'min_samples_split': 19}. Best is trial 3 with value: 0.5916945640888543.


  > Trial 7 finished. Score: 0.5612. Best score: 0.5917


[I 2025-10-15 14:19:37,860] Trial 8 finished with value: 0.3720675943676213 and parameters: {'max_depth': 8, 'min_samples_split': 7}. Best is trial 3 with value: 0.5916945640888543.


  > Trial 8 finished. Score: 0.3721. Best score: 0.5917


[I 2025-10-15 14:19:53,740] Trial 9 finished with value: 0.4685943735768675 and parameters: {'max_depth': 12, 'min_samples_split': 16}. Best is trial 3 with value: 0.5916945640888543.


  > Trial 9 finished. Score: 0.4686. Best score: 0.5917


[I 2025-10-15 14:20:13,459] Trial 10 finished with value: 0.5786940234556717 and parameters: {'max_depth': 50, 'min_samples_split': 13}. Best is trial 3 with value: 0.5916945640888543.


  > Trial 10 finished. Score: 0.5787. Best score: 0.5917


[I 2025-10-15 14:20:34,229] Trial 11 finished with value: 0.602967328418446 and parameters: {'max_depth': 34, 'min_samples_split': 2}. Best is trial 11 with value: 0.602967328418446.


  > Trial 11 finished. Score: 0.6030. Best score: 0.6030


[I 2025-10-15 14:20:55,196] Trial 12 finished with value: 0.6023922852902114 and parameters: {'max_depth': 36, 'min_samples_split': 2}. Best is trial 11 with value: 0.602967328418446.


  > Trial 12 finished. Score: 0.6024. Best score: 0.6030


[I 2025-10-15 14:21:15,180] Trial 13 finished with value: 0.5797300275081045 and parameters: {'max_depth': 37, 'min_samples_split': 10}. Best is trial 11 with value: 0.602967328418446.


  > Trial 13 finished. Score: 0.5797. Best score: 0.6030


[I 2025-10-15 14:21:36,462] Trial 14 finished with value: 0.6023922852902114 and parameters: {'max_depth': 37, 'min_samples_split': 2}. Best is trial 11 with value: 0.602967328418446.


  > Trial 14 finished. Score: 0.6024. Best score: 0.6030


[I 2025-10-15 14:21:56,223] Trial 15 finished with value: 0.5808791211699911 and parameters: {'max_depth': 45, 'min_samples_split': 11}. Best is trial 11 with value: 0.602967328418446.


  > Trial 15 finished. Score: 0.5809. Best score: 0.6030


[I 2025-10-15 14:22:15,675] Trial 16 finished with value: 0.5484370274836204 and parameters: {'max_depth': 29, 'min_samples_split': 23}. Best is trial 11 with value: 0.602967328418446.


  > Trial 16 finished. Score: 0.5484. Best score: 0.6030


[I 2025-10-15 14:22:35,779] Trial 17 finished with value: 0.5361263400854028 and parameters: {'max_depth': 32, 'min_samples_split': 32}. Best is trial 11 with value: 0.602967328418446.


  > Trial 17 finished. Score: 0.5361. Best score: 0.6030


[I 2025-10-15 14:22:56,025] Trial 18 finished with value: 0.5992877803004385 and parameters: {'max_depth': 43, 'min_samples_split': 5}. Best is trial 11 with value: 0.602967328418446.


  > Trial 18 finished. Score: 0.5993. Best score: 0.6030


[I 2025-10-15 14:23:15,348] Trial 19 finished with value: 0.5616683794437632 and parameters: {'max_depth': 20, 'min_samples_split': 14}. Best is trial 11 with value: 0.602967328418446.


  > Trial 19 finished. Score: 0.5617. Best score: 0.6030

--- 4. Training and Saving the Best Model ---
Best parameters found: {'max_depth': 34, 'min_samples_split': 2}
✅ [Decision Tree] saved successfully at: /content/drive/MyDrive/paneer/models/Decision_Tree_best_model.pkl



In [None]:
tune_and_save_model("Random Forest")

--- 1. Loading Optimized Data for [Random Forest] ---


[I 2025-10-15 12:55:21,679] A new study created in memory with name: no-name-241b2884-e7f4-483f-b74d-03f038acd183


Data loaded successfully.

--- 2. Preparing Tuning Objective for [Random Forest] ---
--- 3. Starting Optuna Study for [Random Forest] ---


[I 2025-10-15 13:00:11,719] Trial 0 finished with value: 0.9164738937698808 and parameters: {'n_estimators': 433, 'max_depth': 12}. Best is trial 0 with value: 0.9164738937698808.


  > Trial 0 finished. Score: 0.9165. Best score: 0.9165


[I 2025-10-15 13:10:31,003] Trial 1 finished with value: 0.9397134445612766 and parameters: {'n_estimators': 774, 'max_depth': 41}. Best is trial 1 with value: 0.9397134445612766.


  > Trial 1 finished. Score: 0.9397. Best score: 0.9397


[I 2025-10-15 13:16:05,269] Trial 2 finished with value: 0.938103059110331 and parameters: {'n_estimators': 416, 'max_depth': 36}. Best is trial 1 with value: 0.9397134445612766.


  > Trial 2 finished. Score: 0.9381. Best score: 0.9397


[I 2025-10-15 13:24:41,191] Trial 3 finished with value: 0.9323510396766524 and parameters: {'n_estimators': 684, 'max_depth': 16}. Best is trial 1 with value: 0.9397134445612766.


  > Trial 3 finished. Score: 0.9324. Best score: 0.9397


[I 2025-10-15 13:32:18,965] Trial 4 finished with value: 0.9395985021086017 and parameters: {'n_estimators': 574, 'max_depth': 26}. Best is trial 1 with value: 0.9397134445612766.


  > Trial 4 finished. Score: 0.9396. Best score: 0.9397


[I 2025-10-15 13:37:20,431] Trial 5 finished with value: 0.9321202283496923 and parameters: {'n_estimators': 392, 'max_depth': 17}. Best is trial 1 with value: 0.9397134445612766.


  > Trial 5 finished. Score: 0.9321. Best score: 0.9397


[I 2025-10-15 13:46:44,325] Trial 6 finished with value: 0.9367217644890685 and parameters: {'n_estimators': 730, 'max_depth': 17}. Best is trial 1 with value: 0.9397134445612766.


  > Trial 6 finished. Score: 0.9367. Best score: 0.9397


[I 2025-10-15 13:54:05,429] Trial 7 finished with value: 0.9402890170732885 and parameters: {'n_estimators': 552, 'max_depth': 46}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 7 finished. Score: 0.9403. Best score: 0.9403


[I 2025-10-15 14:00:25,874] Trial 8 finished with value: 0.9386777713737045 and parameters: {'n_estimators': 479, 'max_depth': 22}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 8 finished. Score: 0.9387. Best score: 0.9403


[I 2025-10-15 14:03:01,511] Trial 9 finished with value: 0.9300495437042704 and parameters: {'n_estimators': 195, 'max_depth': 37}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 9 finished. Score: 0.9300. Best score: 0.9403


[I 2025-10-15 14:05:54,883] Trial 10 finished with value: 0.9321201621767201 and parameters: {'n_estimators': 218, 'max_depth': 50}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 10 finished. Score: 0.9321. Best score: 0.9403


[I 2025-10-15 14:16:35,564] Trial 11 finished with value: 0.9390231281155061 and parameters: {'n_estimators': 800, 'max_depth': 49}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 11 finished. Score: 0.9390. Best score: 0.9403


[I 2025-10-15 14:24:39,620] Trial 12 finished with value: 0.9394835596559268 and parameters: {'n_estimators': 606, 'max_depth': 42}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 12 finished. Score: 0.9395. Best score: 0.9403


[I 2025-10-15 14:32:27,764] Trial 13 finished with value: 0.9400586689571337 and parameters: {'n_estimators': 587, 'max_depth': 41}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 13 finished. Score: 0.9401. Best score: 0.9403


[I 2025-10-15 14:40:08,257] Trial 14 finished with value: 0.9395985682815737 and parameters: {'n_estimators': 574, 'max_depth': 44}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 14 finished. Score: 0.9396. Best score: 0.9403


[I 2025-10-15 14:44:29,678] Trial 15 finished with value: 0.9347664193341544 and parameters: {'n_estimators': 325, 'max_depth': 32}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 15 finished. Score: 0.9348. Best score: 0.9403


[I 2025-10-15 14:51:23,432] Trial 16 finished with value: 0.9392536747505776 and parameters: {'n_estimators': 518, 'max_depth': 44}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 16 finished. Score: 0.9393. Best score: 0.9403


[I 2025-10-15 15:00:05,643] Trial 17 finished with value: 0.939368153992447 and parameters: {'n_estimators': 657, 'max_depth': 32}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 17 finished. Score: 0.9394. Best score: 0.9403


[I 2025-10-15 15:04:13,200] Trial 18 finished with value: 0.93534179332725 and parameters: {'n_estimators': 311, 'max_depth': 47}. Best is trial 7 with value: 0.9402890170732885.


  > Trial 18 finished. Score: 0.9353. Best score: 0.9403


[I 2025-10-15 15:11:12,013] Trial 19 finished with value: 0.9402890832462608 and parameters: {'n_estimators': 525, 'max_depth': 36}. Best is trial 19 with value: 0.9402890832462608.


  > Trial 19 finished. Score: 0.9403. Best score: 0.9403

--- 4. Training and Saving the Best Model ---
Best parameters found: {'n_estimators': 525, 'max_depth': 36}
✅ [Random Forest] saved successfully at: /content/drive/MyDrive/paneer/models/Random_Forest_best_model.pkl



In [None]:
tune_and_save_model("AdaBoost")

--- 1. Loading Optimized Data for [AdaBoost] ---


[I 2025-10-15 14:48:38,745] A new study created in memory with name: no-name-14d21633-8155-4b82-ace8-f54863c7e0b8


Data loaded successfully.

--- 2. Preparing Tuning Objective for [AdaBoost] ---
--- 3. Starting Optuna Study for [AdaBoost] ---


[I 2025-10-15 14:55:09,944] Trial 0 finished with value: 0.3115491026614108 and parameters: {'n_estimators': 216, 'learning_rate': 0.30240424717722986}. Best is trial 0 with value: 0.3115491026614108.


  > Trial 0 finished. Score: 0.3115. Best score: 0.3115


[I 2025-10-15 14:57:43,518] Trial 1 finished with value: 0.19696100625268415 and parameters: {'n_estimators': 86, 'learning_rate': 0.08239185903237159}. Best is trial 0 with value: 0.3115491026614108.


  > Trial 1 finished. Score: 0.1970. Best score: 0.3115


[I 2025-10-15 15:12:15,943] Trial 2 finished with value: 0.34479910216511345 and parameters: {'n_estimators': 486, 'learning_rate': 0.23192930281330612}. Best is trial 2 with value: 0.34479910216511345.


  > Trial 2 finished. Score: 0.3448. Best score: 0.3448


[I 2025-10-15 15:14:03,151] Trial 3 finished with value: 0.28083061638138396 and parameters: {'n_estimators': 59, 'learning_rate': 0.7334975904323715}. Best is trial 2 with value: 0.34479910216511345.


  > Trial 3 finished. Score: 0.2808. Best score: 0.3448


[I 2025-10-15 15:23:24,250] Trial 4 finished with value: 0.3878262906541926 and parameters: {'n_estimators': 313, 'learning_rate': 0.5581526869991507}. Best is trial 4 with value: 0.3878262906541926.


  > Trial 4 finished. Score: 0.3878. Best score: 0.3878


[I 2025-10-15 15:32:24,114] Trial 5 finished with value: 0.38288138296218016 and parameters: {'n_estimators': 302, 'learning_rate': 0.5445465521783219}. Best is trial 4 with value: 0.3878262906541926.


  > Trial 5 finished. Score: 0.3829. Best score: 0.3878


[I 2025-10-15 15:36:40,471] Trial 6 finished with value: 0.32247703963297825 and parameters: {'n_estimators': 143, 'learning_rate': 0.49503303956522415}. Best is trial 4 with value: 0.3878262906541926.


  > Trial 6 finished. Score: 0.3225. Best score: 0.3878


[I 2025-10-15 15:43:08,979] Trial 7 finished with value: 0.32339724098409794 and parameters: {'n_estimators': 217, 'learning_rate': 0.34393687452848515}. Best is trial 4 with value: 0.3878262906541926.


  > Trial 7 finished. Score: 0.3234. Best score: 0.3878


[I 2025-10-15 15:48:41,455] Trial 8 finished with value: 0.36481761736272916 and parameters: {'n_estimators': 186, 'learning_rate': 0.8686597113037874}. Best is trial 4 with value: 0.3878262906541926.


  > Trial 8 finished. Score: 0.3648. Best score: 0.3878


[I 2025-10-15 16:00:34,845] Trial 9 finished with value: 0.28589159146659815 and parameters: {'n_estimators': 398, 'learning_rate': 0.11142673918123373}. Best is trial 4 with value: 0.3878262906541926.


  > Trial 9 finished. Score: 0.2859. Best score: 0.3878


[I 2025-10-15 16:10:29,040] Trial 10 finished with value: 0.40945671328111405 and parameters: {'n_estimators': 330, 'learning_rate': 0.9535371932363237}. Best is trial 10 with value: 0.40945671328111405.


  > Trial 10 finished. Score: 0.4095. Best score: 0.4095


[I 2025-10-15 16:20:30,334] Trial 11 finished with value: 0.4148632436270465 and parameters: {'n_estimators': 333, 'learning_rate': 0.9942149156227372}. Best is trial 11 with value: 0.4148632436270465.


  > Trial 11 finished. Score: 0.4149. Best score: 0.4149


[I 2025-10-15 16:31:57,524] Trial 12 finished with value: 0.42165292143746225 and parameters: {'n_estimators': 380, 'learning_rate': 0.9918149341690835}. Best is trial 12 with value: 0.42165292143746225.


  > Trial 12 finished. Score: 0.4217. Best score: 0.4217


[I 2025-10-15 16:44:08,160] Trial 13 finished with value: 0.42372142237480237 and parameters: {'n_estimators': 404, 'learning_rate': 0.9978140833991707}. Best is trial 13 with value: 0.42372142237480237.


  > Trial 13 finished. Score: 0.4237. Best score: 0.4237


[I 2025-10-15 16:56:55,335] Trial 14 finished with value: 0.4339618883384032 and parameters: {'n_estimators': 426, 'learning_rate': 0.7598121093413158}. Best is trial 14 with value: 0.4339618883384032.


  > Trial 14 finished. Score: 0.4340. Best score: 0.4340


[I 2025-10-15 17:11:21,884] Trial 15 finished with value: 0.44293395077127906 and parameters: {'n_estimators': 480, 'learning_rate': 0.7503341904031118}. Best is trial 15 with value: 0.44293395077127906.


  > Trial 15 finished. Score: 0.4429. Best score: 0.4429


[I 2025-10-15 17:25:48,053] Trial 16 finished with value: 0.4385640200345291 and parameters: {'n_estimators': 483, 'learning_rate': 0.7417684954075124}. Best is trial 15 with value: 0.44293395077127906.


  > Trial 16 finished. Score: 0.4386. Best score: 0.4429


[I 2025-10-15 17:40:29,381] Trial 17 finished with value: 0.4382185309467831 and parameters: {'n_estimators': 492, 'learning_rate': 0.692882881011584}. Best is trial 15 with value: 0.44293395077127906.


  > Trial 17 finished. Score: 0.4382. Best score: 0.4429


[I 2025-10-15 17:54:01,746] Trial 18 finished with value: 0.4348810309219682 and parameters: {'n_estimators': 453, 'learning_rate': 0.8525927101961053}. Best is trial 15 with value: 0.44293395077127906.


  > Trial 18 finished. Score: 0.4349. Best score: 0.4429


[I 2025-10-15 18:07:28,787] Trial 19 finished with value: 0.42130471925785684 and parameters: {'n_estimators': 450, 'learning_rate': 0.6527667206649479}. Best is trial 15 with value: 0.44293395077127906.


  > Trial 19 finished. Score: 0.4213. Best score: 0.4429

--- 4. Training and Saving the Best Model ---
Best parameters found: {'n_estimators': 480, 'learning_rate': 0.7503341904031118}
✅ [AdaBoost] saved successfully at: /content/drive/MyDrive/paneer/models/AdaBoost_best_model.pkl



In [None]:
tune_and_save_model("MLP")

--- 1. Loading Optimized Data for [MLP] ---


[I 2025-10-15 14:31:33,338] A new study created in memory with name: no-name-5fc384fa-21cf-46f9-888a-3cea9457706a


Data loaded successfully.

--- 2. Preparing Tuning Objective for [MLP] ---
--- 3. Starting Optuna Study for [MLP] ---


[I 2025-10-15 14:32:10,960] Trial 0 finished with value: 0.9129066411856608 and parameters: {'alpha': 0.0010223115894471173, 'learning_rate_init': 0.00025054117563278}. Best is trial 0 with value: 0.9129066411856608.


  > Trial 0 finished. Score: 0.9129. Best score: 0.9129


[I 2025-10-15 14:32:22,314] Trial 1 finished with value: 0.9200400214135739 and parameters: {'alpha': 0.0002758443359595196, 'learning_rate_init': 0.00308787102778192}. Best is trial 1 with value: 0.9200400214135739.


  > Trial 1 finished. Score: 0.9200. Best score: 0.9200


[I 2025-10-15 14:32:54,804] Trial 2 finished with value: 0.918084874777576 and parameters: {'alpha': 1.580356165555417e-05, 'learning_rate_init': 0.000491296878833124}. Best is trial 1 with value: 0.9200400214135739.


  > Trial 2 finished. Score: 0.9181. Best score: 0.9200


[I 2025-10-15 14:33:16,447] Trial 3 finished with value: 0.9202703695297284 and parameters: {'alpha': 0.00014293195746528404, 'learning_rate_init': 0.0009726321723199619}. Best is trial 3 with value: 0.9202703695297284.


  > Trial 3 finished. Score: 0.9203. Best score: 0.9203


[I 2025-10-15 14:33:41,405] Trial 4 finished with value: 0.917394227466945 and parameters: {'alpha': 0.014106366416281256, 'learning_rate_init': 0.0006518031126151203}. Best is trial 3 with value: 0.9202703695297284.


  > Trial 4 finished. Score: 0.9174. Best score: 0.9203


[I 2025-10-15 14:33:53,008] Trial 5 finished with value: 0.9207309995890658 and parameters: {'alpha': 0.0353310865480401, 'learning_rate_init': 0.002764446665494366}. Best is trial 5 with value: 0.9207309995890658.


  > Trial 5 finished. Score: 0.9207. Best score: 0.9207


[I 2025-10-15 14:34:04,978] Trial 6 finished with value: 0.9195799207380141 and parameters: {'alpha': 0.0014396375457648605, 'learning_rate_init': 0.001689622393188119}. Best is trial 5 with value: 0.9207309995890658.


  > Trial 6 finished. Score: 0.9196. Best score: 0.9207


[I 2025-10-15 14:34:17,120] Trial 7 finished with value: 0.9210757607741179 and parameters: {'alpha': 0.00032799411482358973, 'learning_rate_init': 0.0020480452449642243}. Best is trial 7 with value: 0.9210757607741179.


  > Trial 7 finished. Score: 0.9211. Best score: 0.9211


[I 2025-10-15 14:34:56,298] Trial 8 finished with value: 0.9102614427957816 and parameters: {'alpha': 3.143645723909671e-05, 'learning_rate_init': 0.00018234117551433704}. Best is trial 7 with value: 0.9210757607741179.


  > Trial 8 finished. Score: 0.9103. Best score: 0.9211


[I 2025-10-15 14:35:28,802] Trial 9 finished with value: 0.9088806113853245 and parameters: {'alpha': 0.0001058745825254229, 'learning_rate_init': 0.0002179964704782168}. Best is trial 7 with value: 0.9210757607741179.


  > Trial 9 finished. Score: 0.9089. Best score: 0.9211


[I 2025-10-15 14:35:36,814] Trial 10 finished with value: 0.9223405909643454 and parameters: {'alpha': 0.005481055699619951, 'learning_rate_init': 0.008164608435441536}. Best is trial 10 with value: 0.9223405909643454.


  > Trial 10 finished. Score: 0.9223. Best score: 0.9223


[I 2025-10-15 14:35:45,834] Trial 11 finished with value: 0.9253315431338593 and parameters: {'alpha': 0.006725414039263305, 'learning_rate_init': 0.009498497652431997}. Best is trial 11 with value: 0.9253315431338593.


  > Trial 11 finished. Score: 0.9253. Best score: 0.9253


[I 2025-10-15 14:35:56,664] Trial 12 finished with value: 0.9237216870666911 and parameters: {'alpha': 0.00793018080830928, 'learning_rate_init': 0.008919393579439348}. Best is trial 11 with value: 0.9253315431338593.


  > Trial 12 finished. Score: 0.9237. Best score: 0.9253


[I 2025-10-15 14:36:04,609] Trial 13 finished with value: 0.9285519831708898 and parameters: {'alpha': 0.0944155595028503, 'learning_rate_init': 0.008504385013621538}. Best is trial 13 with value: 0.9285519831708898.


  > Trial 13 finished. Score: 0.9286. Best score: 0.9286


[I 2025-10-15 14:36:15,209] Trial 14 finished with value: 0.93142984573095 and parameters: {'alpha': 0.09913461163046518, 'learning_rate_init': 0.004983375773243675}. Best is trial 14 with value: 0.93142984573095.


  > Trial 14 finished. Score: 0.9314. Best score: 0.9314


[I 2025-10-15 14:36:28,272] Trial 15 finished with value: 0.9299351968083454 and parameters: {'alpha': 0.08516239939506985, 'learning_rate_init': 0.004306097960195881}. Best is trial 14 with value: 0.93142984573095.


  > Trial 15 finished. Score: 0.9299. Best score: 0.9314


[I 2025-10-15 14:36:41,602] Trial 16 finished with value: 0.9336154728290469 and parameters: {'alpha': 0.08447161591584045, 'learning_rate_init': 0.005133416724000536}. Best is trial 16 with value: 0.9336154728290469.


  > Trial 16 finished. Score: 0.9336. Best score: 0.9336


[I 2025-10-15 14:36:53,452] Trial 17 finished with value: 0.9264833498876051 and parameters: {'alpha': 0.026599728160300103, 'learning_rate_init': 0.004377713730492036}. Best is trial 16 with value: 0.9336154728290469.


  > Trial 17 finished. Score: 0.9265. Best score: 0.9336


[I 2025-10-15 14:37:10,798] Trial 18 finished with value: 0.9224570553953801 and parameters: {'alpha': 0.038316797283696274, 'learning_rate_init': 0.0014014690532320974}. Best is trial 16 with value: 0.9336154728290469.


  > Trial 18 finished. Score: 0.9225. Best score: 0.9336


[I 2025-10-15 14:37:21,106] Trial 19 finished with value: 0.9226861462250635 and parameters: {'alpha': 0.003571104038414152, 'learning_rate_init': 0.004646715701858819}. Best is trial 16 with value: 0.9336154728290469.


  > Trial 19 finished. Score: 0.9227. Best score: 0.9336

--- 4. Training and Saving the Best Model ---
Best parameters found: {'alpha': 0.08447161591584045, 'learning_rate_init': 0.005133416724000536}
✅ [MLP] saved successfully at: /content/drive/MyDrive/paneer/models/MLP_best_model.pkl



In [None]:
tune_and_save_model("XGBoost")

--- 1. Loading Optimized Data for [XGBoost] ---


[I 2025-10-16 05:18:50,539] A new study created in memory with name: no-name-e87b40d4-7845-4e7b-883e-f701c49520da


Data loaded successfully.

--- 2. Preparing Tuning Objective for [XGBoost] ---
--- 3. Starting Optuna Study for [XGBoost] ---


[I 2025-10-16 05:28:49,962] Trial 0 finished with value: 0.9268286404564348 and parameters: {'learning_rate': 0.1643228615169814, 'max_depth': 4}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 0 finished. Score: 0.9268. Best score: 0.9268


[I 2025-10-16 05:39:46,127] Trial 1 finished with value: 0.9229168913790513 and parameters: {'learning_rate': 0.2338044226720043, 'max_depth': 9}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 1 finished. Score: 0.9229. Best score: 0.9268


[I 2025-10-16 05:48:10,757] Trial 2 finished with value: 0.9241823832990006 and parameters: {'learning_rate': 0.27400573213773094, 'max_depth': 5}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 2 finished. Score: 0.9242. Best score: 0.9268


[I 2025-10-16 06:01:05,788] Trial 3 finished with value: 0.9225715346372498 and parameters: {'learning_rate': 0.17634756268073107, 'max_depth': 8}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 3 finished. Score: 0.9226. Best score: 0.9268


[I 2025-10-16 06:10:22,707] Trial 4 finished with value: 0.9260228521742123 and parameters: {'learning_rate': 0.25959184686865566, 'max_depth': 6}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 4 finished. Score: 0.9260. Best score: 0.9268


[I 2025-10-16 06:18:19,135] Trial 5 finished with value: 0.9234917359883694 and parameters: {'learning_rate': 0.2963476702085078, 'max_depth': 5}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 5 finished. Score: 0.9235. Best score: 0.9268


[I 2025-10-16 06:32:47,471] Trial 6 finished with value: 0.8054501383345982 and parameters: {'learning_rate': 0.011576854828098209, 'max_depth': 4}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 6 finished. Score: 0.8055. Best score: 0.9268


[I 2025-10-16 07:08:49,850] Trial 7 finished with value: 0.9216512009401855 and parameters: {'learning_rate': 0.04551472113137741, 'max_depth': 9}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 7 finished. Score: 0.9217. Best score: 0.9268


[I 2025-10-16 07:22:00,879] Trial 8 finished with value: 0.9171645410805119 and parameters: {'learning_rate': 0.0718077225951311, 'max_depth': 4}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 8 finished. Score: 0.9172. Best score: 0.9268


[I 2025-10-16 07:38:03,043] Trial 9 finished with value: 0.9199252113068435 and parameters: {'learning_rate': 0.13513479364611147, 'max_depth': 9}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 9 finished. Score: 0.9199. Best score: 0.9268


[I 2025-10-16 07:45:38,912] Trial 10 finished with value: 0.9146330940298084 and parameters: {'learning_rate': 0.15435564614764677, 'max_depth': 3}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 10 finished. Score: 0.9146. Best score: 0.9268


[I 2025-10-16 07:56:23,237] Trial 11 finished with value: 0.9241825818179171 and parameters: {'learning_rate': 0.21497998677995997, 'max_depth': 7}. Best is trial 0 with value: 0.9268286404564348.


  > Trial 11 finished. Score: 0.9242. Best score: 0.9268


[I 2025-10-16 08:12:11,700] Trial 12 finished with value: 0.9270589223996172 and parameters: {'learning_rate': 0.10855983328259305, 'max_depth': 6}. Best is trial 12 with value: 0.9270589223996172.


  > Trial 12 finished. Score: 0.9271. Best score: 0.9271


[I 2025-10-16 08:28:32,099] Trial 13 finished with value: 0.9270585253617842 and parameters: {'learning_rate': 0.10317006313861614, 'max_depth': 6}. Best is trial 12 with value: 0.9270589223996172.


  > Trial 13 finished. Score: 0.9271. Best score: 0.9271


[I 2025-10-16 08:45:45,827] Trial 14 finished with value: 0.9256779586432158 and parameters: {'learning_rate': 0.10705315055692333, 'max_depth': 7}. Best is trial 12 with value: 0.9270589223996172.


  > Trial 14 finished. Score: 0.9257. Best score: 0.9271


[I 2025-10-16 09:02:54,241] Trial 15 finished with value: 0.9271736001604033 and parameters: {'learning_rate': 0.09576639896717229, 'max_depth': 6}. Best is trial 15 with value: 0.9271736001604033.


  > Trial 15 finished. Score: 0.9272. Best score: 0.9272


[I 2025-10-16 09:27:00,193] Trial 16 finished with value: 0.9226868741277574 and parameters: {'learning_rate': 0.0732947430068756, 'max_depth': 8}. Best is trial 15 with value: 0.9271736001604033.


  > Trial 16 finished. Score: 0.9227. Best score: 0.9272


[I 2025-10-16 09:40:29,229] Trial 17 finished with value: 0.9269439137739702 and parameters: {'learning_rate': 0.11986703785944279, 'max_depth': 5}. Best is trial 15 with value: 0.9271736001604033.


  > Trial 17 finished. Score: 0.9269. Best score: 0.9272


[I 2025-10-16 09:51:25,038] Trial 18 finished with value: 0.9259082405863985 and parameters: {'learning_rate': 0.19534989534517103, 'max_depth': 6}. Best is trial 15 with value: 0.9271736001604033.


  > Trial 18 finished. Score: 0.9259. Best score: 0.9272


[I 2025-10-16 10:16:57,257] Trial 19 finished with value: 0.9211906370538205 and parameters: {'learning_rate': 0.07726752440143321, 'max_depth': 10}. Best is trial 15 with value: 0.9271736001604033.


  > Trial 19 finished. Score: 0.9212. Best score: 0.9272

--- 4. Training and Saving the Best Model ---
Best parameters found: {'learning_rate': 0.09576639896717229, 'max_depth': 6}
✅ [XGBoost] saved successfully at: /content/drive/MyDrive/paneer/models/XGBoost_best_model.pkl



Naive Bayes Training

In [None]:
import pandas as pd
import numpy as np
import optuna
import warnings
import joblib
import os
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score

warnings.filterwarnings('ignore')

X_train = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/X_train_final.csv").values
y_train = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/y_train.csv")['label'].values

def objective(trial):
    var_smoothing = trial.suggest_float('var_smoothing', 1e-12, 1e-6, log=True)
    model = GaussianNB(var_smoothing=var_smoothing)
    return cross_val_score(model, X_train, y_train, cv=5, n_jobs=-1).mean()

def print_callback(study, trial):
    print(f"Trial {trial.number} finished. Score: {trial.value:.4f}. Best score: {study.best_value:.4f}")

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20, callbacks=[print_callback])

print("\nBest var_smoothing:", study.best_params)

best_model = GaussianNB(var_smoothing=study.best_params['var_smoothing'])
best_model.fit(X_train, y_train)

os.makedirs("/content/drive/MyDrive/paneer/models", exist_ok=True)
save_path = "/content/drive/MyDrive/paneer/models/Naive_Bayes_best_model.pkl"
joblib.dump(best_model, save_path)
print(f"Naive Bayes model saved at: {save_path}")

[I 2025-10-16 10:24:30,835] A new study created in memory with name: no-name-f6adf4ba-0015-4653-9a2b-1e6a951a53b2
[I 2025-10-16 10:24:33,885] Trial 0 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 2.9240467333093715e-09}. Best is trial 0 with value: 0.5761558267618058.


Trial 0 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:34,271] Trial 1 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 1.306049753319959e-11}. Best is trial 0 with value: 0.5761558267618058.


Trial 1 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:34,657] Trial 2 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 9.52264243503519e-12}. Best is trial 0 with value: 0.5761558267618058.


Trial 2 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:35,031] Trial 3 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 9.033098977899526e-12}. Best is trial 0 with value: 0.5761558267618058.


Trial 3 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:35,406] Trial 4 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 1.3040239954786832e-10}. Best is trial 0 with value: 0.5761558267618058.


Trial 4 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:35,791] Trial 5 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 6.925889967528684e-10}. Best is trial 0 with value: 0.5761558267618058.


Trial 5 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:36,166] Trial 6 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 4.837036111591765e-11}. Best is trial 0 with value: 0.5761558267618058.


Trial 6 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:36,542] Trial 7 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 7.895277005239315e-07}. Best is trial 0 with value: 0.5761558267618058.


Trial 7 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:36,939] Trial 8 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 1.0276834635899185e-08}. Best is trial 0 with value: 0.5761558267618058.


Trial 8 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:37,318] Trial 9 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 7.305811385042971e-11}. Best is trial 0 with value: 0.5761558267618058.


Trial 9 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:37,690] Trial 10 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 2.2538698358950895e-08}. Best is trial 0 with value: 0.5761558267618058.


Trial 10 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:38,099] Trial 11 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 1.105560738658117e-12}. Best is trial 0 with value: 0.5761558267618058.


Trial 11 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:38,474] Trial 12 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 3.2760032531956337e-09}. Best is trial 0 with value: 0.5761558267618058.


Trial 12 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:38,753] Trial 13 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 4.979948223466058e-08}. Best is trial 0 with value: 0.5761558267618058.


Trial 13 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:39,155] Trial 14 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 7.198018534670384e-10}. Best is trial 0 with value: 0.5761558267618058.


Trial 14 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:39,434] Trial 15 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 2.7413177271222676e-07}. Best is trial 0 with value: 0.5761558267618058.


Trial 15 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:39,811] Trial 16 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 1.0030756002281737e-12}. Best is trial 0 with value: 0.5761558267618058.


Trial 16 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:40,277] Trial 17 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 2.4254141372035638e-09}. Best is trial 0 with value: 0.5761558267618058.


Trial 17 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:40,789] Trial 18 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 1.0508194251547628e-11}. Best is trial 0 with value: 0.5761558267618058.


Trial 18 finished. Score: 0.5762. Best score: 0.5762


[I 2025-10-16 10:24:41,338] Trial 19 finished with value: 0.5761558267618058 and parameters: {'var_smoothing': 4.153348405098605e-10}. Best is trial 0 with value: 0.5761558267618058.


Trial 19 finished. Score: 0.5762. Best score: 0.5762

Best var_smoothing: {'var_smoothing': 2.9240467333093715e-09}
Naive Bayes model saved at: /content/drive/MyDrive/paneer/models/Naive_Bayes_best_model.pkl


Bayesian Network Training

In [None]:
!pip install bnlearn

Collecting bnlearn
  Downloading bnlearn-0.12.0-py3-none-any.whl.metadata (15 kB)
Collecting pgmpy==0.1.25 (from bnlearn)
  Downloading pgmpy-0.1.25-py3-none-any.whl.metadata (6.4 kB)
Collecting ismember (from bnlearn)
  Downloading ismember-1.1.0-py3-none-any.whl.metadata (4.8 kB)
Collecting funcsigs (from bnlearn)
  Downloading funcsigs-1.0.2-py2.py3-none-any.whl.metadata (14 kB)
Collecting df2onehot (from bnlearn)
  Downloading df2onehot-1.0.8-py3-none-any.whl.metadata (3.3 kB)
Collecting pypickle (from bnlearn)
  Downloading pypickle-2.0.1-py3-none-any.whl.metadata (7.2 kB)
Collecting datazets>=1.1.2 (from bnlearn)
  Downloading datazets-1.1.3-py3-none-any.whl.metadata (13 kB)
Collecting setgraphviz>=1.0.3 (from bnlearn)
  Downloading setgraphviz-1.0.3-py3-none-any.whl.metadata (5.1 kB)
Collecting lingam (from bnlearn)
  Downloading lingam-1.11.0-py3-none-any.whl.metadata (9.3 kB)
Collecting pygam (from lingam->bnlearn)
  Downloading pygam-0.10.1-py3-none-any.whl.metadata (9.7 kB)


In [None]:
# BAYESIAN NETWORK ANALYSIS
import pandas as pd
import numpy as np
import warnings
import time

try:
    import bnlearn as bn
except ImportError:
    print("Error: bnlearn not found. Please install it first: pip install bnlearn")
    exit()

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, balanced_accuracy_score, accuracy_score

warnings.filterwarnings('ignore')

# --- 1. Load the Pre-processed Data ---
print("--- 1. Loading Optimized Data from Script 1 ---")
try:
    X_train_final = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/X_train_final.csv")
    X_test_final = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/X_test_final.csv")
    y_train = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/y_train.csv")['label'].values
    y_test = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/y_test.csv")['label'].values

    # Combine into a single training DataFrame for bnlearn
    train_df = X_train_final.copy()
    train_df['label'] = y_train

    # bnlearn requires all column names to be strings
    train_df.columns = train_df.columns.astype(str)
    X_test_final.columns = X_test_final.columns.astype(str)

    print("Data loaded successfully.\n")
except FileNotFoundError:
    print("Error: Could not find data files. Please run Script 1 first.")
    exit()

# --- 2. Main Structure Learning (The Slowest Step, Done Once) ---
print("--- 2. Starting Main Structure Learning (this will take a long time) ---")
print("The script will learn the network graph from the entire training dataset.")
start_time_structure = time.time()

# Learn the Directed Acyclic Graph (DAG) structure using Hill Climbing
main_structure = bn.structure_learning.fit(train_df, methodtype='hc', scoretype='bic')

print(f"Structure learning finished in {(time.time() - start_time_structure) / 60:.2f} minutes.\n")

# --- 3. Main Parameter Learning ---
print("--- 3. Learning Parameters for the Final Model ---")
# Learn the Conditional Probability Distributions (CPDs) for the learned structure
final_model_bn = bn.parameter_learning.fit(main_structure, train_df, methodtype='maximumlikelihood')
print("Final model created.\n")

# --- 4. Efficient Stability Calculation (CV) ---
print("--- 4. Calculating Stability (CV Standard Deviation) ---")
print("This will reuse the learned structure for speed.")
start_time_cv = time.time()

cv_scores_bn = []
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_idx, test_idx) in enumerate(skf.split(X_train_final, y_train)):
    print(f"  - Processing Fold {fold+1}/5...")

    # Get the data for this fold
    bn_fold_train = train_df.iloc[train_idx]
    bn_fold_test = train_df.iloc[test_idx]

    # EFFICIENT STEP: Relearn parameters only, using the main structure. This is fast.
    model_fold = bn.parameter_learning.fit(main_structure, bn_fold_train)

    # Predict on the validation part of the fold
    preds_fold = bn.predict(model_fold, bn_fold_test.drop('label', axis=1), variables=['label'])['label']
    cv_scores_bn.append(accuracy_score(bn_fold_test['label'], preds_fold))

cv_std_dev = np.std(cv_scores_bn)
print(f"Stability calculation finished in {(time.time() - start_time_cv) / 60:.2f} minutes.\n")


# --- 5. Final Evaluation on the Test Set ---
print("--- 5. Evaluating Final Model on Unseen Test Data ---")
bn_preds = bn.predict(final_model_bn, X_test_final, variables=['label'])['label']

report_bn = classification_report(y_test, bn_preds, output_dict=True, zero_division=0)
bal_acc_bn = balanced_accuracy_score(y_test, bn_preds)

results = {
    'Accuracy': report_bn['accuracy'],
    'Macro_F1_Score': report_bn['macro avg']['f1-score'],
    'Balanced_Accuracy': bal_acc_bn,
    'CV_Std_Dev': cv_std_dev
}

# --- 6. Display Final Results ---
print("\n--- BAYESIAN NETWORK RESULTS ---")
print("Performance on the unseen test set & stability on the training set:\n")
print(f"{'Bayesian Network':<20} | Acc: {results['Accuracy']:.4f} | Macro F1: {results['Macro_F1_Score']:.4f} | Bal Acc: {results['Balanced_Accuracy']:.4f} | CV Std: {results['CV_Std_Dev']:.4f}")

--- 1. Loading Optimized Data from Script 1 ---
Data loaded successfully.

--- 2. Starting Main Structure Learning (this will take a long time) ---
The script will learn the network graph from the entire training dataset.
[bnlearn] >Computing best DAG using [hc]
[bnlearn] >Set scoring type at [bic]


Benchmarking Models (w/out Bayesian Network)

In [None]:
import pandas as pd
import numpy as np
import joblib
import warnings
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, balanced_accuracy_score
from sklearn.model_selection import cross_val_score
import os

warnings.filterwarnings('ignore')

# --- 1. Load Train/Test Data ---
print("--- 1. Loading Train/Test Data ---")
try:
    X_train_final = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/X_train_final.csv").values
    y_train = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/y_train.csv")['label'].values
    X_test_final = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/X_test_final.csv").values
    y_test = pd.read_csv("/content/drive/MyDrive/paneer/csv_files/y_test.csv")['label'].values
    print("Train and Test data loaded successfully.\n")
except FileNotFoundError as e:
    print("Error loading CSV files:", e)
    raise

# --- 2. Load Models and Evaluate ---
print("--- 2. Loading Saved Models and Evaluating ---")
model_names = [
    "Logistic Regression", "KNN", "SVM", "Decision Tree",
    "Random Forest", "AdaBoost", "MLP" , "XGBoost"
]

results = {}
model_folder = "/content/drive/MyDrive/paneer/models"

for name in model_names:
    filename = os.path.join(model_folder, f"{name.replace(' ', '_')}_best_model.pkl")
    print(f"Checking {name}: {filename}")
    if os.path.exists(filename):
        print(f"Loading {name}...")
        model = joblib.load(filename)
        cv_scores = cross_val_score(model, X_train_final, y_train, cv=5, n_jobs=-1)
        preds = model.predict(X_test_final)
        report = classification_report(y_test, preds, output_dict=True, zero_division=0)
        bal_acc = balanced_accuracy_score(y_test, preds)
        results[name] = {
            'Accuracy': report['accuracy'],
            'Macro_F1_Score': report['macro avg']['f1-score'],
            'Balanced_Accuracy': bal_acc,
            'CV_Std_Dev': cv_scores.std()
        }
        print(f"{name} loaded and evaluated.\n")
    else:
        results[name] = "Model file not found. Please run the tuner script first."
        print(f"{name} model file not found.\n")

# --- 3. Evaluate Naive Bayes (train fresh) ---
print("Evaluating Naive Bayes...")
nb_model = GaussianNB().fit(X_train_final, y_train)
nb_preds = nb_model.predict(X_test_final)
nb_cv_scores = cross_val_score(GaussianNB(), X_train_final, y_train, cv=5, n_jobs=-1)
nb_report = classification_report(y_test, nb_preds, output_dict=True, zero_division=0)
nb_bal_acc = balanced_accuracy_score(y_test, nb_preds)
results["Naive Bayes"] = {
    'Accuracy': nb_report['accuracy'],
    'Macro_F1_Score': nb_report['macro avg']['f1-score'],
    'Balanced_Accuracy': nb_bal_acc,
    'CV_Std_Dev': nb_cv_scores.std()
}

# --- 4. Display Final Sorted Results ---
print("\n--- FINAL BENCHMARK RESULTS ---")
sorted_results = sorted(results.items(), key=lambda item: item[1]['Macro_F1_Score'] if isinstance(item[1], dict) else -1, reverse=True)
for name, metrics in sorted_results:
    if isinstance(metrics, dict):
        print(f"{name:<20} | Acc: {metrics['Accuracy']:.4f} | Macro F1: {metrics['Macro_F1_Score']:.4f} | "
              f"Bal Acc: {metrics['Balanced_Accuracy']:.4f} | CV Std: {metrics['CV_Std_Dev']:.4f}")
    else:
        print(f"{name:<20} | {metrics}")


--- 1. Loading Train/Test Data ---
Train and Test data loaded successfully.

--- 2. Loading Saved Models and Evaluating ---
Checking Logistic Regression: /content/drive/MyDrive/paneer/models/Logistic_Regression_best_model.pkl
Loading Logistic Regression...
Logistic Regression loaded and evaluated.

Checking KNN: /content/drive/MyDrive/paneer/models/KNN_best_model.pkl
Loading KNN...
KNN loaded and evaluated.

Checking SVM: /content/drive/MyDrive/paneer/models/SVM_best_model.pkl
Loading SVM...
SVM loaded and evaluated.

Checking Decision Tree: /content/drive/MyDrive/paneer/models/Decision_Tree_best_model.pkl
Loading Decision Tree...
Decision Tree loaded and evaluated.

Checking Random Forest: /content/drive/MyDrive/paneer/models/Random_Forest_best_model.pkl
Loading Random Forest...
Random Forest loaded and evaluated.

Checking AdaBoost: /content/drive/MyDrive/paneer/models/AdaBoost_best_model.pkl
Loading AdaBoost...
AdaBoost loaded and evaluated.

Checking MLP: /content/drive/MyDrive/pan