<a href="https://colab.research.google.com/github/falishak/project-5/blob/main/Copy_of_HP_tuning_recap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Full example: hyperparameter tuning on the Iris dataset

import warnings
warnings.filterwarnings('ignore')

from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# 1. Load the dataset
X, y = load_iris(return_X_y=True)

# 2. Define models and their parameter grids
models = {
    'LogisticRegression': LogisticRegression,
    'RandomForestClassifier': RandomForestClassifier,
    'SVC': SVC
}

param_grid = {
    'LogisticRegression': {'C': [0.01, 0.1, 1, 10]},
    'RandomForestClassifier': {'n_estimators': [10, 50, 100]},
    'SVC': {'kernel': ['linear', 'rbf']}
}

# 3. Hyperparameter tuning loop
for name, Model in models.items():
    print(f"\n=== {name} ===")
    for param_name, values in param_grid[name].items():
        for v in values:
            # instantiate model with the current hyperparameter
            model = Model(**{param_name: v})
            # evaluate with 5-fold CV
            scores = cross_val_score(model, X, y, cv=5)
            print(f"{param_name}={v:<6}  Mean CV Accuracy: {scores.mean():.3f}")

# 4. (Optional) — see convergence warnings suppressed above.


=== LogisticRegression ===
C=0.01    Mean CV Accuracy: 0.860
C=0.1     Mean CV Accuracy: 0.947
C=1       Mean CV Accuracy: 0.973
C=10      Mean CV Accuracy: 0.980

=== RandomForestClassifier ===
n_estimators=10      Mean CV Accuracy: 0.947
n_estimators=50      Mean CV Accuracy: 0.953
n_estimators=100     Mean CV Accuracy: 0.967

=== SVC ===
kernel=linear  Mean CV Accuracy: 0.980
kernel=rbf     Mean CV Accuracy: 0.967


In [6]:
import warnings
warnings.filterwarnings('ignore')

from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# 1. Load the dataset
X, y = load_iris(return_X_y=True)

# 2. Define models and their parameter grids
models = {
    'LogisticRegression': LogisticRegression(),
    'RandomForestClassifier': RandomForestClassifier(),
    'SVC': SVC()
}

param_grid = {
    'LogisticRegression': {'C': [0.01, 0.1, 1, 10]},
    'RandomForestClassifier': {'n_estimators': [10, 50, 100]},
    'SVC': {'kernel': ['linear', 'rbf']}
}

# 3. Grid search loop
for name, estimator in models.items():
    print(f"\n=== {name} ===")
    grid = GridSearchCV(
        estimator=estimator,
        param_grid=param_grid[name],
        cv=5,
        scoring='accuracy',
        return_train_score=False
    )
    grid.fit(X, y)
    print(f"Best params: {grid.best_params_}")
    print(f"Best CV accuracy: {grid.best_score_:.3f}")
    print("All results:")
    for mean, std, params in zip(
            grid.cv_results_['mean_test_score'],
            grid.cv_results_['std_test_score'],
            grid.cv_results_['params']
        ):
        print(f"  {params} → {mean:.3f} (±{std:.3f})")



=== LogisticRegression ===


KeyboardInterrupt: 

In [4]:
!pip install pycaret

Collecting pycaret
  Downloading pycaret-3.3.2-py3-none-any.whl.metadata (17 kB)
Collecting numpy<1.27,>=1.21 (from pycaret)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas<2.2.0 (from pycaret)
  Downloading pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scipy<=1.11.4,>=1.6.1 (from pycaret)
  Downloading scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib<1.4,>=1.2.0 (from pycaret)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting pyod>=1.1.3 (from pycaret)
  Downloading pyod-2.0.4.tar.gz (169 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# 0. Install PyCaret (run this once in your Jupyter environment)
!pip install -q pycaret

# 1. Suppress warnings and imports
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from sklearn.datasets import load_iris
from IPython.display import display

from pycaret.classification import (
    setup,
    create_model,
    tune_model,
    pull
)

# 2. Load the Iris dataset into a pandas DataFrame
data = load_iris(as_frame=True)
df = data.frame.rename(columns={'target': 'species'})

# 3. Initialize PyCaret’s classification setup
clf_setup = setup(
    data=df,
    target='species',
    session_id=42,
    # skip interactive confirmations
    verbose=False     # suppress setup logging
)

# 4. Define and tune each model with one iteration
model_ids = {
    'Logistic Regression': 'lr',
    'Random Forest':      'rf',
    'SVM (RBF kernel)':   'svm'
}

results = []
for name, m_id in model_ids.items():
    # create base model
    base_model = create_model(m_id, verbose=False)
    # tune with exactly one iteration
    tuned_model = tune_model(
        estimator=base_model,
        n_iter=1,
        optimize='Accuracy',
        verbose=False
    )
    # grab the results table and annotate
    res = pull()
    res['Model'] = name
    results.append(res)

# 5. Combine into one DataFrame and display
df_results = pd.concat(results, ignore_index=True)
display(df_results)
