In [4]:
import numpy as np  
import pandas as pd  
from sklearn.pipeline import Pipeline  
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV, cross_val_score  
from sklearn.preprocessing import StandardScaler, MinMaxScaler  
from sklearn.neighbors import KNeighborsClassifier  
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier  
from sklearn.svm import SVC  
from sklearn.linear_model import LogisticRegression  
from sklearn.neural_network import MLPClassifier  
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA  
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score  
from scipy.stats import uniform, randint  
from sklearn.decomposition import PCA  

In [5]:
def Grid_search(features, target):  
    # Split features and target variable  
    X = features  
    y = target  

    # Initialize Stratified K-Fold
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)  

    # Hyperparameter grids using lists for RandomizedSearchCV  
    param_grids = {  
        "KNN": {  
            #'lda__n_components': [1, 2],  
            'classifier__n_neighbors': list(range(1, 30)),  # List of integers from 1 to 29  
            'classifier__weights': ['uniform', 'distance']  
        },  
        "Random Forest": {  
            #'lda__n_components': [1, 2],  
            'classifier__n_estimators': list(range(50, 300)),  # List of integers from 50 to 299  
            'classifier__max_depth': [None] + list(range(5, 31, 5)),  # [None, 5, 10, 15, 20, 25, 30]  
            'classifier__min_samples_split': [2, 5, 10, 15]  
        },  
        "SVM": {  
           # 'lda__n_components': [1, 2],  
            'classifier__C': uniform(loc=0.001, scale=99.999),  # Uniform distribution from 0.001 to 100  
            'classifier__kernel': ['linear', 'rbf']  
        },  
        "Logistic Regression": {  
            #'lda__n_components': [1, 2],  
            'classifier__C': uniform(loc=0.001, scale=99.999),  # To explore a range of regularization strengths  
            'classifier__solver': ['lbfgs', 'liblinear']  
        },  
        "ANN": {  
            #'lda__n_components': [1, 2],  
            'classifier__hidden_layer_sizes': [(50,), (100,), (150,), (50, 50), (100, 100)],  
            'classifier__activation': ['relu', 'tanh', 'logistic'],  
            'classifier__alpha': uniform(loc=1e-4, scale=1e-1),  # Uniform distribution between 0.0001 and 0.1  
            'classifier__max_iter': [1000]  # Fixed max_iter for ANN  
        }  
    }  

    # Models to test  
    models = {  
        "KNN": KNeighborsClassifier(),  
        "Random Forest": RandomForestClassifier(),  
        "SVM": SVC(probability=True),  # Enable probability for ROC AUC  
        "Logistic Regression": LogisticRegression(),  
        "ANN": MLPClassifier()  
    }  

    # Define multiple metrics for evaluation  
    metrics = {  
        'accuracy': make_scorer(accuracy_score),  
        'precision': make_scorer(precision_score, average='weighted'),  
        'recall': make_scorer(recall_score, average='weighted'),  
        'f1': make_scorer(f1_score, average='weighted')  
    }  

    # Results dictionary  
    results = {}  
    for model_name, model in models.items():  
        # Create pipeline with scaling, LDA, and classifier  
        pipeline = Pipeline([  
            ('scaler', StandardScaler()),  
            #('lda', LDA()),  
            ('classifier', model)  
        ])  

        # Choose search strategy based on model  
        if model_name in ["Random Forest", "SVM", "Logistic Regression", "ANN"]:  
            search = RandomizedSearchCV(pipeline, param_grids[model_name], n_iter=50,  
                                        cv=skf, scoring='accuracy', n_jobs=-1, random_state=42)  
        else:  
            search = GridSearchCV(pipeline, param_grids[model_name], cv=skf, scoring='accuracy', n_jobs=-1)  

        # Fit the model to the data  
        search.fit(X, y)  

        best_params = search.best_params_  
        best_score = search.best_score_  # The best cross-validated score  
        results[model_name] = (best_params, best_score)  

        # Evaluate the model with other metrics  
        for metric_name, scorer in metrics.items():  
            score = cross_val_score(search.best_estimator_, X, y, cv=skf, scoring=scorer)  
            results[model_name] += (metric_name, score.mean())  # Append the mean score for each metric  

    # Print out the results  
    for model_name, metrics in results.items():  
        best_params, best_score = metrics[:2]  
        print(f"{model_name}: Best Parameters = {best_params}, Best CV Accuracy = {best_score:.4f}")  
        for i in range(2, len(metrics), 2):  
            print(f"    {metrics[i]}: {metrics[i + 1]:.4f}")  

# Example usage with defined 'features' and 'target'  

In [9]:
all_data = pd.read_csv('./datasets/augmented_dataset.csv')
target = all_data[['storage_1', 'storage_2', 'storage_3']].idxmax(axis=1).str[-1].astype(int)  
features_sc_pt = all_data.drop(columns=['Replica', 'storage_1', 'storage_2', 'storage_3'])  
features_sc = all_data.iloc[:, 5:] 

In [None]:

Grid_search(features_sc, target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

KNN: Best Parameters = {'classifier__n_neighbors': 1, 'classifier__weights': 'uniform'}, Best CV Accuracy = 0.9722
    accuracy: 0.9722
    precision: 0.9786
    recall: 0.9722
    f1: 0.9715
Random Forest: Best Parameters = {'classifier__n_estimators': 281, 'classifier__min_samples_split': 2, 'classifier__max_depth': 30}, Best CV Accuracy = 0.9667
    accuracy: 0.9611
    precision: 0.9702
    recall: 0.9611
    f1: 0.9657
SVM: Best Parameters = {'classifier__C': 70.80754970702675, 'classifier__kernel': 'rbf'}, Best CV Accuracy = 0.9556
    accuracy: 0.9556
    precision: 0.9627
    recall: 0.9556
    f1: 0.9548
Logistic Regression: Best Parameters = {'classifier__C': 37.454637344617396, 'classifier__solver': 'lbfgs'}, Best CV Accuracy = 0.9556
    accuracy: 0.9556
    precision: 0.9616
    recall: 0.9556
    f1: 0.9554
ANN: Best Parameters = {'classifier__activation': 'relu', 'classifier__alpha': 0.0066051592985279526, 'classifier__hidden_layer_sizes': (50, 50), 'classifier__max_iter

In [10]:
Grid_search(features_sc_pt, target)

  _data = np.array(data, dtype=dtype, copy=copy,
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refe

KNN: Best Parameters = {'classifier__n_neighbors': 1, 'classifier__weights': 'uniform'}, Best CV Accuracy = 0.9722
    accuracy: 0.9722
    precision: 0.9786
    recall: 0.9722
    f1: 0.9715
Random Forest: Best Parameters = {'classifier__n_estimators': 97, 'classifier__min_samples_split': 2, 'classifier__max_depth': 10}, Best CV Accuracy = 0.9722
    accuracy: 0.9611
    precision: 0.9655
    recall: 0.9667
    f1: 0.9660
SVM: Best Parameters = {'classifier__C': 70.80754970702675, 'classifier__kernel': 'rbf'}, Best CV Accuracy = 0.9611
    accuracy: 0.9611
    precision: 0.9690
    recall: 0.9611
    f1: 0.9604
Logistic Regression: Best Parameters = {'classifier__C': 61.165704395667596, 'classifier__solver': 'lbfgs'}, Best CV Accuracy = 0.9722
    accuracy: 0.9722
    precision: 0.9755
    recall: 0.9722
    f1: 0.9718
ANN: Best Parameters = {'classifier__activation': 'relu', 'classifier__alpha': 0.09621720243493492, 'classifier__hidden_layer_sizes': (100, 100), 'classifier__max_iter'

In [11]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from scipy.stats import uniform

def Grid_search_with_LDA(features, target):
    # Split features and target variable
    X = features
    y = target

    # Initialize Stratified K-Fold
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    # Hyperparameter grids for LDA-based pipelines
    param_grids = {
        "KNN": {
            #'lda__n_components': [1, 2],  # Adjust based on your dataset
            'classifier__n_neighbors': list(range(1, 30)),
            'classifier__weights': ['uniform', 'distance']
        },
        "Random Forest": {
            #'lda__n_components': [1, 2],  # Adjust based on your dataset
            'classifier__n_estimators': list(range(50, 300)),
            'classifier__max_depth': [None] + list(range(5, 31, 5)),
            'classifier__min_samples_split': [2, 5, 10, 15]
        },
        "SVM": {
           # 'lda__n_components': [1, 2],  # Adjust based on your dataset
            'classifier__C': uniform(loc=0.001, scale=99.999),
            'classifier__kernel': ['linear', 'rbf']
        },
       
        "Logistic Regression": {
            'classifier__C': uniform(loc=0.001, scale=1000),
            'classifier__solver': ['lbfgs', 'liblinear', 'saga'],
            'classifier__penalty': ['l2', 'elasticnet'],
            'classifier__l1_ratio': uniform(0, 1),
            'classifier__max_iter': [500, 1000, 2000],  # Higher iterations
            'classifier__tol': [1e-3, 1e-4, 1e-5]  # Relaxed tolerance
        },
        "ANN": {
            #'lda__n_components': [1, 2],  # Adjust based on your dataset
            'classifier__hidden_layer_sizes': [(50,), (100,), (150,), (50, 50), (100, 100)],
            'classifier__activation': ['relu', 'tanh', 'logistic'],
            'classifier__alpha': uniform(loc=1e-4, scale=1e-1),
            'classifier__max_iter': [1000]
        }
    }

    # Models to test
    models = {
        "KNN": KNeighborsClassifier(),
        "Random Forest": RandomForestClassifier(),
        "SVM": SVC(probability=True),  # Enable probability for ROC AUC
        "Logistic Regression": LogisticRegression(),
        "ANN": MLPClassifier()
    }

    # Define multiple metrics for evaluation
    metrics = {
        'accuracy': make_scorer(accuracy_score),
        'precision': make_scorer(precision_score, average='weighted'),
        'recall': make_scorer(recall_score, average='weighted'),
        'f1': make_scorer(f1_score, average='weighted')
    }

    # Results dictionary
    results = {}
    for model_name, model in models.items():
        print(f"Performing tuning for {model_name}...")

        # Create pipeline with scaling, LDA, and classifier
        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            #('lda', LDA()),  # Always use LDA
            ('classifier', model)
        ])

        # Choose search strategy based on model
        if model_name in ["Random Forest", "SVM", "Logistic Regression", "ANN"]:
            search = RandomizedSearchCV(pipeline, param_grids[model_name], n_iter=100,
                                        cv=skf, scoring='accuracy', n_jobs=-1, random_state=42)
        else:
            search = GridSearchCV(pipeline, param_grids[model_name], cv=skf,
                                  scoring='accuracy', n_jobs=-1)

        # Fit the model to the data
        search.fit(X, y)

        # Store the best parameters and best CV score
        best_params = search.best_params_
        best_score = search.best_score_  # The best cross-validated score
        results[model_name] = (best_params, best_score)

        # Evaluate the model with other metrics
        for metric_name, scorer in metrics.items():
            score = cross_val_score(search.best_estimator_, X, y, cv=skf, scoring=scorer)
            results[model_name] += (metric_name, score.mean())  # Append the mean score for each metric

    # Print out the results
    for model_name, metrics in results.items():
        best_params, best_score = metrics[:2]
        print(f"{model_name}: Best Parameters = {best_params}, Best CV Accuracy = {best_score:.4f}")
        for i in range(2, len(metrics), 2):
            print(f"    {metrics[i]}: {metrics[i + 1]:.4f}")

# Example usage with defined 'features' and 'target'
# features = ...
# target = ...
# Grid_search_with_LDA(features, target)


In [12]:
all_data = pd.read_csv('./datasets/augmented_dataset.csv')

In [13]:
target = all_data[['storage_1', 'storage_2', 'storage_3']].idxmax(axis=1).str[-1].astype(int)  
features_sc_pt = all_data.drop(columns=['Replica', 'storage_1', 'storage_2', 'storage_3'])  
features_sc = all_data.iloc[:, 5:] 
Grid_search_with_LDA(features_sc, target)

Performing tuning for KNN...
Performing tuning for Random Forest...
Performing tuning for SVM...
Performing tuning for Logistic Regression...


410 fits failed out of a total of 1000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/pipeline.py", line 473, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "/root/miniconda3/envs/project39/lib/python3.9/site-package

Performing tuning for ANN...
KNN: Best Parameters = {'classifier__n_neighbors': 1, 'classifier__weights': 'uniform'}, Best CV Accuracy = 0.9722
    accuracy: 0.9722
    precision: 0.9786
    recall: 0.9722
    f1: 0.9715
Random Forest: Best Parameters = {'classifier__n_estimators': 74, 'classifier__min_samples_split': 2, 'classifier__max_depth': 10}, Best CV Accuracy = 0.9778
    accuracy: 0.9611
    precision: 0.9660
    recall: 0.9611
    f1: 0.9660
SVM: Best Parameters = {'classifier__C': 70.80754970702675, 'classifier__kernel': 'rbf'}, Best CV Accuracy = 0.9556
    accuracy: 0.9556
    precision: 0.9627
    recall: 0.9556
    f1: 0.9548
Logistic Regression: Best Parameters = {'classifier__C': 623.2991268275579, 'classifier__l1_ratio': 0.3308980248526492, 'classifier__max_iter': 2000, 'classifier__penalty': 'l2', 'classifier__solver': 'liblinear', 'classifier__tol': 0.001}, Best CV Accuracy = 0.9500
    accuracy: 0.9500
    precision: 0.9544
    recall: 0.9500
    f1: 0.9501
ANN: Be

In [14]:
Grid_search_with_LDA(features_sc_pt, target)

Performing tuning for KNN...
Performing tuning for Random Forest...
Performing tuning for SVM...
Performing tuning for Logistic Regression...


410 fits failed out of a total of 1000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/pipeline.py", line 473, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "/root/miniconda3/envs/project39/lib/python3.9/site-package

Performing tuning for ANN...
KNN: Best Parameters = {'classifier__n_neighbors': 1, 'classifier__weights': 'uniform'}, Best CV Accuracy = 0.9722
    accuracy: 0.9722
    precision: 0.9786
    recall: 0.9722
    f1: 0.9715
Random Forest: Best Parameters = {'classifier__n_estimators': 87, 'classifier__min_samples_split': 2, 'classifier__max_depth': None}, Best CV Accuracy = 0.9778
    accuracy: 0.9611
    precision: 0.9655
    recall: 0.9611
    f1: 0.9660
SVM: Best Parameters = {'classifier__C': 70.80754970702675, 'classifier__kernel': 'rbf'}, Best CV Accuracy = 0.9611
    accuracy: 0.9611
    precision: 0.9690
    recall: 0.9611
    f1: 0.9604
Logistic Regression: Best Parameters = {'classifier__C': 727.2729958564208, 'classifier__l1_ratio': 0.3265407688058354, 'classifier__max_iter': 1000, 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs', 'classifier__tol': 0.0001}, Best CV Accuracy = 0.9667
    accuracy: 0.9667
    precision: 0.9707
    recall: 0.9667
    f1: 0.9662
ANN: Bes

In [27]:
Grid_search_with_LDA(features_sc_pt, target)

Performing tuning for KNN...
Performing tuning for Random Forest...
Performing tuning for SVM...
Performing tuning for Logistic Regression...


410 fits failed out of a total of 1000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/root/miniconda3/envs/project39/lib/python3.9/site-packages/sklearn/pipeline.py", line 473, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "/root/miniconda3/envs/project39/lib/python3.9/site-package

Performing tuning for ANN...
KNN: Best Parameters = {'classifier__n_neighbors': 1, 'classifier__weights': 'uniform'}, Best CV Accuracy = 0.9722
    accuracy: 0.9722
    precision: 0.9786
    recall: 0.9722
    f1: 0.9715
Random Forest: Best Parameters = {'classifier__n_estimators': 87, 'classifier__min_samples_split': 2, 'classifier__max_depth': None}, Best CV Accuracy = 0.9722
    accuracy: 0.9611
    precision: 0.9750
    recall: 0.9611
    f1: 0.9604
SVM: Best Parameters = {'classifier__C': 70.80754970702675, 'classifier__kernel': 'rbf'}, Best CV Accuracy = 0.9611
    accuracy: 0.9611
    precision: 0.9690
    recall: 0.9611
    f1: 0.9604
Logistic Regression: Best Parameters = {'classifier__C': 727.2729958564208, 'classifier__l1_ratio': 0.3265407688058354, 'classifier__max_iter': 1000, 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs', 'classifier__tol': 0.0001}, Best CV Accuracy = 0.9667
    accuracy: 0.9667
    precision: 0.9707
    recall: 0.9667
    f1: 0.9662
ANN: Bes

In [13]:
def extensive_hyperparameter_tuning(features, target):
    from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
    from scipy.stats import uniform, randint
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.svm import SVC
    from sklearn.linear_model import LogisticRegression
    from sklearn.neural_network import MLPClassifier

    X = features
    y = target
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    # Expanded parameter grids
    param_grids = {
        "KNN": {
            'classifier__n_neighbors': list(range(1, 50)),  # Larger range for neighbors
            'classifier__weights': ['uniform', 'distance'],
            'classifier__p': [1, 2]  # Adding different distance metrics
        },
        "Random Forest": {
            'classifier__n_estimators': list(range(50, 500, 50)),  # Larger range
            'classifier__max_depth': [None] + list(range(5, 50, 5)),  # Deeper trees
            'classifier__min_samples_split': [2, 5, 10, 15],
            'classifier__min_samples_leaf': [1, 2, 5]  # Adding leaf size
        },
        "SVM": {
            'classifier__C': uniform(loc=0.001, scale=1000),  # Larger range for C
            'classifier__kernel': ['linear', 'rbf', 'poly'],  # More kernels
            'classifier__gamma': ['scale', 'auto']  # Gamma for non-linear kernels
        },
        "Logistic Regression": {
            'classifier__C': uniform(loc=0.001, scale=1000),  # Larger C range
            'classifier__solver': ['lbfgs', 'liblinear', 'saga'],  # Adding solvers
            'classifier__penalty': ['l2', 'elasticnet'],  # Penalty types
            'classifier__l1_ratio': uniform(0, 1)  # For elasticnet
        },
        "ANN": {
            'classifier__hidden_layer_sizes': [(50,), (100,), (150,), (50, 50), (100, 100), (50, 100, 50)],  # More architectures
            'classifier__activation': ['relu', 'tanh', 'logistic'],
            'classifier__alpha': uniform(loc=1e-5, scale=1e-1),  # Smaller alpha range
            'classifier__learning_rate_init': uniform(1e-5, 1e-2),  # Learning rate
            'classifier__max_iter': [2000]  # More iterations
        }
    }

    models = {
        "KNN": KNeighborsClassifier(),
        "Random Forest": RandomForestClassifier(),
        "SVM": SVC(probability=True),
        "Logistic Regression": LogisticRegression(),
        "ANN": MLPClassifier()
    }

    # RandomizedSearchCV for all models
    results = {}
    for model_name, model in models.items():
        pipeline = Pipeline([
            ('scaler', StandardScaler()),  # Only scaling, no LDA
            ('classifier', model)
        ])
        
        search = RandomizedSearchCV(
            pipeline,
            param_distributions=param_grids[model_name],
            n_iter=200,  # More iterations
            cv=skf,
            scoring='accuracy',
            n_jobs=-1,
            random_state=42
        )
        
        search.fit(X, y)
        best_params = search.best_params_
        best_score = search.best_score_
        results[model_name] = (best_params, best_score)
    
    # Print results
    for model_name, (best_params, best_score) in results.items():
        print(f"{model_name}: Best Parameters = {best_params}, Best CV Accuracy = {best_score:.4f}")


In [14]:
all_data = pd.read_csv('./datasets/augmented_dataset.csv')

In [15]:
target = all_data[['storage_1', 'storage_2', 'storage_3']].idxmax(axis=1).str[-1].astype(int)  
features_sc_pt = all_data.drop(columns=['Replica', 'storage_1', 'storage_2', 'storage_3'])  
extensive_hyperparameter_tuning(features_sc_pt, target)

  _data = np.array(data, dtype=dtype, copy=copy,
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refe

KNN: Best Parameters = {'classifier__weights': 'uniform', 'classifier__p': 1, 'classifier__n_neighbors': 1}, Best CV Accuracy = 0.9722
Random Forest: Best Parameters = {'classifier__n_estimators': 50, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_depth': 20}, Best CV Accuracy = 0.9722
SVM: Best Parameters = {'classifier__C': 617.4825096277165, 'classifier__gamma': 'auto', 'classifier__kernel': 'rbf'}, Best CV Accuracy = 0.9611
Logistic Regression: Best Parameters = {'classifier__C': 212.34011067827615, 'classifier__l1_ratio': 0.18182496720710062, 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'}, Best CV Accuracy = 0.9722
ANN: Best Parameters = {'classifier__activation': 'relu', 'classifier__alpha': 0.06925360328902704, 'classifier__hidden_layer_sizes': (100,), 'classifier__learning_rate_init': 0.0033861517140362798, 'classifier__max_iter': 2000}, Best CV Accuracy = 0.9889


In [26]:
features_sc = all_data.iloc[:, 5:] 
extensive_hyperparameter_tuning(features_sc, target)

  _data = np.array(data, dtype=dtype, copy=copy,
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refe

KNN: Best Parameters = {'classifier__weights': 'uniform', 'classifier__p': 1, 'classifier__n_neighbors': 1}, Best CV Accuracy = 0.9722
Random Forest: Best Parameters = {'classifier__n_estimators': 150, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_depth': 35}, Best CV Accuracy = 0.9722
SVM: Best Parameters = {'classifier__C': 617.4825096277165, 'classifier__gamma': 'auto', 'classifier__kernel': 'rbf'}, Best CV Accuracy = 0.9611
Logistic Regression: Best Parameters = {'classifier__C': 609.9976577826209, 'classifier__l1_ratio': 0.8331949117361643, 'classifier__penalty': 'l2', 'classifier__solver': 'liblinear'}, Best CV Accuracy = 0.9500
ANN: Best Parameters = {'classifier__activation': 'relu', 'classifier__alpha': 0.015611864044243652, 'classifier__hidden_layer_sizes': (150,), 'classifier__learning_rate_init': 0.0010097491581800289, 'classifier__max_iter': 2000}, Best CV Accuracy = 0.9833
