In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [10]:
# df
# df.columns

In [11]:
df = pd.read_csv("/Users/aryansood/aims/AIMS_DRONE2/newdata_final_with_updown_flip.csv")
df_shuffled = df.sample(frac=1, random_state=42)

In [12]:
X = df_shuffled.drop('label', axis=1)  # Features
y = df_shuffled['label']  # Labels

# Split the shuffled data into 80% training, 10% validation, and 10% test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [5]:
# Initialize and train the model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the validation set
y_val_pred = clf.predict(X_val)

# Evaluate the model on the validation set
accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Make predictions on the test set
y_test_pred = clf.predict(X_test)

# Evaluate the model on the test set
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 80.46%
Test Accuracy: 77.14%


In [8]:
import optuna

def objective(trial):
    # Define the hyperparameter search space
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4)
    }

    # Initialize and train the Random Forest Classifier with the suggested hyperparameters
    rf_clf = RandomForestClassifier(**params, random_state=42)
    rf_clf.fit(X_train, y_train)

    # Make predictions on the validation set
    y_val_pred = rf_clf.predict(X_val)

    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_val_pred)

    return accuracy

# Create an Optuna study
study = optuna.create_study(direction='maximize')

study.optimize(objective, n_trials=50)  # You can adjust the number of trials

# Print the best hyperparameters and their corresponding accuracy
best_params = study.best_params
best_accuracy = study.best_value
print(f"Best Hyperparameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")


[I 2024-01-24 13:14:22,362] A new study created in memory with name: no-name-8ae75d92-6e07-484b-8f8b-88114f47d9fe
[I 2024-01-24 13:14:23,069] Trial 0 finished with value: 0.7514792899408284 and parameters: {'n_estimators': 170, 'max_depth': 10, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.7514792899408284.
[I 2024-01-24 13:14:23,716] Trial 1 finished with value: 0.757396449704142 and parameters: {'n_estimators': 161, 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.757396449704142.
[I 2024-01-24 13:14:23,950] Trial 2 finished with value: 0.7751479289940828 and parameters: {'n_estimators': 58, 'max_depth': 9, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.7751479289940828.
[I 2024-01-24 13:14:24,574] Trial 3 finished with value: 0.7633136094674556 and parameters: {'n_estimators': 146, 'max_depth': 15, 'min_samples_split': 5, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.7

[I 2024-01-24 13:14:41,686] Trial 36 finished with value: 0.7514792899408284 and parameters: {'n_estimators': 190, 'max_depth': 8, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 31 with value: 0.8106508875739645.
[I 2024-01-24 13:14:42,101] Trial 37 finished with value: 0.7869822485207101 and parameters: {'n_estimators': 151, 'max_depth': 4, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 31 with value: 0.8106508875739645.
[I 2024-01-24 13:14:42,811] Trial 38 finished with value: 0.7633136094674556 and parameters: {'n_estimators': 165, 'max_depth': 11, 'min_samples_split': 5, 'min_samples_leaf': 2}. Best is trial 31 with value: 0.8106508875739645.
[I 2024-01-24 13:14:43,076] Trial 39 finished with value: 0.6804733727810651 and parameters: {'n_estimators': 112, 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 31 with value: 0.8106508875739645.
[I 2024-01-24 13:14:43,841] Trial 40 finished with value: 0.7633136094674556 and parame

Best Hyperparameters: {'n_estimators': 132, 'max_depth': 4, 'min_samples_split': 3, 'min_samples_leaf': 1}
Best Validation Accuracy: 81.07%


In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Initialize and train the KNN model
knn_clf = KNeighborsClassifier(n_neighbors=5)
knn_clf.fit(X_train, y_train)

# Make predictions on the validation set
y_val_pred = knn_clf.predict(X_val)

# Evaluate the model on the validation set
accuracy_val = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy_val * 100:.2f}%")

# Make predictions on the test set
y_test_pred = knn_clf.predict(X_test)

# Evaluate the model on the test set
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")


Validation Accuracy: 79.29%
Test Accuracy: 82.94%


In [10]:
import optuna
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Assuming 'X_train' and 'y_train' are your training data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

def objective(trial):
    # Define the hyperparameter search space
    params = {
        'n_neighbors': trial.suggest_int('n_neighbors', 1, 10),
        'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
        'p': trial.suggest_int('p', 1, 2),
    }

    # Initialize and train the K-Nearest Neighbors model with the suggested hyperparameters
    knn_clf = KNeighborsClassifier(**params)
    knn_clf.fit(X_train, y_train)

    # Make predictions on the validation set
    y_val_pred = knn_clf.predict(X_val)

    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_val_pred)

    return accuracy

# Create an Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # You can adjust the number of trials

# Print the best hyperparameters and their corresponding accuracy
best_params = study.best_params
best_accuracy = study.best_value
print(f"Best Hyperparameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")


[I 2024-01-24 13:14:55,671] A new study created in memory with name: no-name-3de28c1a-d0f4-4f27-a8dc-87d58d8ed49d
[I 2024-01-24 13:14:55,710] Trial 0 finished with value: 0.7970479704797048 and parameters: {'n_neighbors': 1, 'weights': 'uniform', 'p': 2}. Best is trial 0 with value: 0.7970479704797048.
[I 2024-01-24 13:14:55,772] Trial 1 finished with value: 0.7785977859778598 and parameters: {'n_neighbors': 10, 'weights': 'uniform', 'p': 1}. Best is trial 0 with value: 0.7970479704797048.
[I 2024-01-24 13:14:55,806] Trial 2 finished with value: 0.7896678966789668 and parameters: {'n_neighbors': 5, 'weights': 'uniform', 'p': 2}. Best is trial 0 with value: 0.7970479704797048.
[I 2024-01-24 13:14:55,834] Trial 3 finished with value: 0.7896678966789668 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'p': 2}. Best is trial 0 with value: 0.7970479704797048.
[I 2024-01-24 13:14:55,864] Trial 4 finished with value: 0.7785977859778598 and parameters: {'n_neighbors': 10, 'weights': '

[I 2024-01-24 13:14:57,658] Trial 43 finished with value: 0.8228782287822878 and parameters: {'n_neighbors': 2, 'weights': 'uniform', 'p': 2}. Best is trial 25 with value: 0.8228782287822878.
[I 2024-01-24 13:14:57,707] Trial 44 finished with value: 0.8118081180811808 and parameters: {'n_neighbors': 3, 'weights': 'uniform', 'p': 2}. Best is trial 25 with value: 0.8228782287822878.
[I 2024-01-24 13:14:57,754] Trial 45 finished with value: 0.7970479704797048 and parameters: {'n_neighbors': 1, 'weights': 'uniform', 'p': 2}. Best is trial 25 with value: 0.8228782287822878.
[I 2024-01-24 13:14:57,796] Trial 46 finished with value: 0.8228782287822878 and parameters: {'n_neighbors': 2, 'weights': 'uniform', 'p': 2}. Best is trial 25 with value: 0.8228782287822878.
[I 2024-01-24 13:14:57,855] Trial 47 finished with value: 0.8118081180811808 and parameters: {'n_neighbors': 3, 'weights': 'uniform', 'p': 2}. Best is trial 25 with value: 0.8228782287822878.
[I 2024-01-24 13:14:57,898] Trial 48 fin

Best Hyperparameters: {'n_neighbors': 2, 'weights': 'uniform', 'p': 2}
Best Validation Accuracy: 82.29%


In [11]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression, RidgeClassifier, Perceptron, PassiveAggressiveClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier
def compare_classifiers(X_train, X_val, X_test, y_train, y_val, y_test):
    # Define classifiers
#     classifiers = {
#         'Random Forest': RandomForestClassifier(random_state=42),
#         'Gradient Boosting': GradientBoostingClassifier(random_state=42),
#         'K-Nearest Neighbors': KNeighborsClassifier(),
#         'Support Vector Machine': SVC(random_state=42),
# #         'Logistic Regression': LogisticRegression(max_iter=10000, random_state=42),
#         'Naive Bayes': GaussianNB(),
#         'Decision Tree': DecisionTreeClassifier(random_state=42),
#         'Neural Network': MLPClassifier(random_state=42),
#         'AdaBoost': AdaBoostClassifier(random_state=42),
#         'Bagging Classifier': BaggingClassifier(random_state=42),
#         'Extra Trees Classifier': ExtraTreesClassifier(random_state=42),
#         'Linear Discriminant Analysis': LinearDiscriminantAnalysis(),
#         'Quadratic Discriminant Analysis': QuadraticDiscriminantAnalysis(),
#         'Gaussian Process Classifier': GaussianProcessClassifier(random_state=42),
#         'XGBoost': XGBClassifier(random_state=42)
#     }
    
    classifiers = {
    'Support Vector Classifier': SVC(),
    'Decision Tree Classifier': DecisionTreeClassifier(),
    'Random Forest Classifier': RandomForestClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Gaussian Naive Bayes': GaussianNB(),
    'Bernoulli Naive Bayes': BernoulliNB(),
    'Gradient Boosting Classifier': GradientBoostingClassifier(),
    'AdaBoost Classifier': AdaBoostClassifier(),
    'XGBoost Classifier': XGBClassifier(),
    'Linear Discriminant Analysis': LinearDiscriminantAnalysis(),
    'Quadratic Discriminant Analysis': QuadraticDiscriminantAnalysis(),
    'Bagging Classifier': BaggingClassifier(),
    'Extra Trees Classifier': ExtraTreesClassifier(),
    'Passive Aggressive Classifier': PassiveAggressiveClassifier(),
    'Perceptron': Perceptron(),
    'Ridge Classifier': RidgeClassifier(),
    }


    # Train and evaluate each classifier on the validation set
    val_results = {}
    for name, clf in classifiers.items():
        clf.fit(X_train, y_train)
        y_val_pred = clf.predict(X_val)
        accuracy = accuracy_score(y_val, y_val_pred)
        val_results[name] = accuracy

    # Sort validation results by accuracy in descending order
    val_results = {k: v for k, v in sorted(val_results.items(), key=lambda item: item[1], reverse=True)}

    # Print the validation results
    print("Classifier Performance on Validation Set:")
    for name, accuracy in val_results.items():
        print(f"{name}: {accuracy * 100:.2f}%")

    # Train and evaluate each classifier on the test set
    test_results = {}
    for name, clf in classifiers.items():
        clf.fit(X_train, y_train)
        y_test_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_test_pred)
        test_results[name] = accuracy

    # Sort test results by accuracy in descending order
    test_results = {k: v for k, v in sorted(test_results.items(), key=lambda item: item[1], reverse=True)}

    # Print the test results
    print("\nClassifier Performance on Test Set:")
    for name, accuracy in test_results.items():
        print(f"{name}: {accuracy * 100:.2f}%")

# Example usage:
# compare_classifiers(X_train, X_val, X_test, y_train, y_val, y_test)


In [12]:
compare_classifiers(X_train, X_val, X_test, y_train, y_val, y_test)



Classifier Performance on Validation Set:
Ridge Classifier: 87.57%
Linear Discriminant Analysis: 86.98%
Passive Aggressive Classifier: 85.80%
Perceptron: 85.21%
Support Vector Classifier: 84.62%
Quadratic Discriminant Analysis: 79.88%
K-Nearest Neighbors: 79.29%
Random Forest Classifier: 76.33%
Gradient Boosting Classifier: 75.74%
Extra Trees Classifier: 75.74%
XGBoost Classifier: 75.15%
Bagging Classifier: 74.56%
Decision Tree Classifier: 72.19%
Gaussian Naive Bayes: 63.31%
AdaBoost Classifier: 25.44%
Bernoulli Naive Bayes: 18.34%





Classifier Performance on Test Set:
Support Vector Classifier: 88.82%
Passive Aggressive Classifier: 88.82%
Perceptron: 87.65%
Ridge Classifier: 87.06%
Linear Discriminant Analysis: 85.29%
K-Nearest Neighbors: 82.94%
Quadratic Discriminant Analysis: 82.94%
Extra Trees Classifier: 80.00%
Gradient Boosting Classifier: 79.41%
XGBoost Classifier: 79.41%
Random Forest Classifier: 78.82%
Bagging Classifier: 75.88%
Decision Tree Classifier: 74.12%
Gaussian Naive Bayes: 65.29%
AdaBoost Classifier: 23.53%
Bernoulli Naive Bayes: 13.53%


In [13]:
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score

# Initialize and train the Ridge Classifier model
ridge_clf = RidgeClassifier()
ridge_clf.fit(X_train, y_train)

# Make predictions on the validation set
y_val_pred = ridge_clf.predict(X_val)

# Evaluate the model on the validation set
accuracy_val = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy_val * 100:.2f}%")

# Make predictions on the test set
y_test_pred = ridge_clf.predict(X_test)

# Evaluate the model on the test set
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")


Validation Accuracy: 87.36%
Test Accuracy: 85.71%


In [14]:
import optuna
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Assuming 'X_train' and 'y_train' are your training data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

def objective(trial):
    # Define the hyperparameter search space
    params = {
        'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
        'solver': trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'])
    }

    # Initialize and train the Ridge Classifier model with the suggested hyperparameters
    ridge_clf = RidgeClassifier(**params)
    ridge_clf.fit(X_train, y_train)

    # Make predictions on the validation set
    y_val_pred = ridge_clf.predict(X_val)

    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_val_pred)

    return accuracy

# Create an Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # You can adjust the number of trials

# Print the best hyperparameters and their corresponding accuracy
best_params = study.best_params
best_accuracy = study.best_value
print(f"Best Hyperparameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")

# Use the best hyperparameters to train the final Ridge Classifier model
final_ridge_clf = RidgeClassifier(**best_params)
final_ridge_clf.fit(X_train, y_train)

# Make predictions on the test set using the best Ridge Classifier model
y_test_pred = final_ridge_clf.predict(X_test)

# Evaluate the model on the test set
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")


# Best Hyperparameters: {'alpha': 0.00046668235475350847, 'solver': 'sparse_cg'}
# Best Validation Accuracy: 88.17%
# Test Accuracy: 86.29%

[I 2024-01-24 13:49:32,132] A new study created in memory with name: no-name-e73003c7-5c7d-4461-96ea-83f44e03fadc
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:34,933] Trial 0 finished with value: 0.8781362007168458 and parameters: {'alpha': 1.6394413171252897, 'solver': 'saga'}. Best is trial 0 with value: 0.8781362007168458.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:35,034] Trial 1 finished with value: 0.8745519713261649 and parameters: {'alpha': 0.031075763172253888, 'solver': 'lsqr'}. Best is trial 0 with value: 0.8781362007168458.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:35,130] Trial 2 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.00046668235475350847, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:35,144] Trial 3 finished with value: 0.8817204301075269 and 

[I 2024-01-24 13:49:38,664] Trial 12 finished with value: 0.8781362007168458 and parameters: {'alpha': 0.16977441604149188, 'solver': 'sag'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:38,686] Trial 13 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.00012941753401600549, 'solver': 'svd'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:38,701] Trial 14 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.0011328230473998022, 'solver': 'cholesky'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:38,728] Trial 15 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.01662612172557457, 'solver': 'svd'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I

  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:44,429] Trial 20 finished with value: 0.8745519713261649 and parameters: {'alpha': 0.049285745855624036, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:44,449] Trial 21 finished with value: 0.8745519713261649 and parameters: {'alpha': 84.55001944712677, 'solver': 'auto'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:44,470] Trial 22 finished with value: 0.8817204301075269 and parameters: {'alpha': 14.180299681927295, 'solver': 'auto'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:44,504] Trial 23 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.7902876574526239, 'solver': 'auto'}. Best is trial 2 with value: 0.8817204301075269.
  'alp

  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:50,240] Trial 30 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.007649679993235776, 'solver': 'cholesky'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:50,364] Trial 31 finished with value: 0.8817204301075269 and parameters: {'alpha': 1.5817203049829889, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:50,453] Trial 32 finished with value: 0.8781362007168458 and parameters: {'alpha': 14.499394754354334, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:50,539] Trial 33 finished with value: 0.8745519713261649 and parameters: {'alpha': 5.810228564542511, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.8817204301

  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:53,917] Trial 43 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.006616414401910067, 'solver': 'auto'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:53,941] Trial 44 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.0018889570494095478, 'solver': 'svd'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:53,960] Trial 45 finished with value: 0.8817204301075269 and parameters: {'alpha': 0.011007181666626509, 'solver': 'auto'}. Best is trial 2 with value: 0.8817204301075269.
  'alpha': trial.suggest_loguniform('alpha', 1e-4, 1e2),
[I 2024-01-24 13:49:54,086] Trial 46 finished with value: 0.8745519713261649 and parameters: {'alpha': 0.0294024806425419, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.8817204301075269.
 

Best Hyperparameters: {'alpha': 0.00046668235475350847, 'solver': 'sparse_cg'}
Best Validation Accuracy: 88.17%
Test Accuracy: 86.29%


In [15]:
import optuna
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Assuming 'X_train' and 'y_train' are your training data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

def objective(trial):
    # Define the hyperparameter search space
    params = {
        'alpha': trial.suggest_float('alpha', 1e-4, 1e2, log=True),
        'solver': trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'])
    }

    # Initialize and train the Ridge Classifier model with the suggested hyperparameters
    ridge_clf = RidgeClassifier(**params)
    ridge_clf.fit(X_train, y_train)

    # Make predictions on the validation set
    y_val_pred = ridge_clf.predict(X_val)

    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_val_pred)

    return accuracy

# Create an Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # You can adjust the number of trials

# Print the best hyperparameters and their corresponding accuracy
best_params = study.best_params
best_accuracy = study.best_value
print(f"Best Hyperparameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")

# Use the best hyperparameters to train the final Ridge Classifier model
final_ridge_clf = RidgeClassifier(**best_params)
final_ridge_clf.fit(X_train, y_train)

# Make predictions on the test set using the best Ridge Classifier model
y_test_pred = final_ridge_clf.predict(X_test)

# Evaluate the model on the test set
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

# Best Hyperparameters: {'alpha': 0.00010687964225283955, 'solver': 'saga'}
# Best Validation Accuracy: 88.34%
# Test Accuracy: 86.86%

[I 2024-01-24 13:50:23,394] A new study created in memory with name: no-name-e64e894d-6624-4d37-b4c5-10fc275db6a2
[I 2024-01-24 13:50:25,656] Trial 0 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.00010687964225283955, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:25,771] Trial 1 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.1360765790919907, 'solver': 'sparse_cg'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:25,835] Trial 2 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.00026092176348772894, 'solver': 'sparse_cg'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:25,849] Trial 3 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.2874376071482597, 'solver': 'auto'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:25,924] Trial 4 finished with value: 0.8789237668161435 and parameters: {'alpha': 0.11419942

[I 2024-01-24 13:50:35,340] Trial 11 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.005248440394619826, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:37,579] Trial 12 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.009087580346565809, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:37,599] Trial 13 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.0001285035418473126, 'solver': 'cholesky'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:37,803] Trial 14 finished with value: 0.8654708520179372 and parameters: {'alpha': 2.00838560116668, 'solver': 'svd'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:40,212] Trial 15 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.0011662822839799023, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:42,450] Trial 16 fi

[I 2024-01-24 13:50:44,695] Trial 19 finished with value: 0.8789237668161435 and parameters: {'alpha': 0.0036710502244700007, 'solver': 'sag'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:44,703] Trial 20 finished with value: 0.8654708520179372 and parameters: {'alpha': 1.0605183844002106, 'solver': 'cholesky'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:47,047] Trial 21 finished with value: 0.8834080717488789 and parameters: {'alpha': 6.492166445337479, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:49,345] Trial 22 finished with value: 0.8834080717488789 and parameters: {'alpha': 59.01916543929019, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:51,577] Trial 23 finished with value: 0.8834080717488789 and parameters: {'alpha': 15.137065140692608, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:53,807] Trial 24 finished 

[I 2024-01-24 13:50:56,033] Trial 25 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.00044073946391106895, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:50:58,259] Trial 26 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.9998377174374585, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:00,489] Trial 27 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.0023770315643996945, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:00,506] Trial 28 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.04978109965905621, 'solver': 'cholesky'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:00,518] Trial 29 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.29624841643120836, 'solver': 'svd'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:00,533] Trial 30 f

[I 2024-01-24 13:51:05,077] Trial 32 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.0050143989416275665, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:07,364] Trial 33 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.00010380925913724467, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:09,447] Trial 34 finished with value: 0.8789237668161435 and parameters: {'alpha': 0.23815461182967113, 'solver': 'sag'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:09,497] Trial 35 finished with value: 0.8789237668161435 and parameters: {'alpha': 0.06589984396223728, 'solver': 'lsqr'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:09,566] Trial 36 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.0018506978841515273, 'solver': 'sparse_cg'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:11,868] Trial 

[I 2024-01-24 13:51:14,317] Trial 41 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.005923049977032445, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:16,569] Trial 42 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.009163514964011634, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:18,799] Trial 43 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.002583738372753477, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:21,055] Trial 44 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.0008125304352383701, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:23,292] Trial 45 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.0228292041119179, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.


[I 2024-01-24 13:51:25,345] Trial 46 finished with value: 0.8789237668161435 and parameters: {'alpha': 0.04252828827974076, 'solver': 'sag'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:25,375] Trial 47 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.00020245923529872622, 'solver': 'svd'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:25,398] Trial 48 finished with value: 0.8654708520179372 and parameters: {'alpha': 0.01277153464140976, 'solver': 'cholesky'}. Best is trial 0 with value: 0.8834080717488789.
[I 2024-01-24 13:51:27,796] Trial 49 finished with value: 0.8834080717488789 and parameters: {'alpha': 0.0012933124159103615, 'solver': 'saga'}. Best is trial 0 with value: 0.8834080717488789.


Best Hyperparameters: {'alpha': 0.00010687964225283955, 'solver': 'saga'}
Best Validation Accuracy: 88.34%




Test Accuracy: 86.86%




In [22]:
import optuna
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Assuming 'X_train' and 'y_train' are your training data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

def objective(trial):
    # Define the hyperparameter search space
    params = {
        'alpha': trial.suggest_float('alpha', 1e-4, 1e2, log=True),
        'solver': trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'])
    }

    # Initialize and train the Ridge Classifier model with the suggested hyperparameters
    ridge_clf = RidgeClassifier(**params)
    ridge_clf.fit(X_train, y_train)

    # Make predictions on the validation set
    y_val_pred = ridge_clf.predict(X_val)

    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_val_pred)

    return accuracy

# Create an Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)  # You can adjust the number of trials

# Print the best hyperparameters and their corresponding accuracy
best_params = study.best_params
best_accuracy = study.best_value
print(f"Best Hyperparameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")

# Use the best hyperparameters to train the final Ridge Classifier model
final_ridge_clf = RidgeClassifier(**best_params)
final_ridge_clf.fit(X_train, y_train)

# Make predictions on the test set using the best Ridge Classifier model
y_test_pred = final_ridge_clf.predict(X_test)

# Evaluate the model on the test set
accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")


[I 2024-01-24 13:54:51,257] A new study created in memory with name: no-name-f53f3ed6-3c8a-46cd-9089-2f620d30997a
[I 2024-01-24 13:54:51,266] Trial 0 finished with value: 0.9574468085106383 and parameters: {'alpha': 2.619412789107737, 'solver': 'cholesky'}. Best is trial 0 with value: 0.9574468085106383.
[I 2024-01-24 13:54:51,288] Trial 1 finished with value: 0.9574468085106383 and parameters: {'alpha': 0.0004242677786956592, 'solver': 'sparse_cg'}. Best is trial 0 with value: 0.9574468085106383.
[I 2024-01-24 13:54:51,753] Trial 2 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0015938822149380884, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:51,771] Trial 3 finished with value: 0.9574468085106383 and parameters: {'alpha': 0.15376279022291767, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:51,785] Trial 4 finished with value: 0.9787234042553191 and parameters: {'alpha': 3.947575

[I 2024-01-24 13:54:53,306] Trial 11 finished with value: 0.9787234042553191 and parameters: {'alpha': 71.50206777872619, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:53,322] Trial 12 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0021976733529730473, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:53,338] Trial 13 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.005893759793287844, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:53,804] Trial 14 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.00010583981757750326, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:54,231] Trial 15 finished with value: 0.9787234042553191 and parameters: {'alpha': 56.876089204640714, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:54,238] Trial 16 finish

[I 2024-01-24 13:54:55,776] Trial 22 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.7657462537784002, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:56,241] Trial 23 finished with value: 0.9787234042553191 and parameters: {'alpha': 8.172171152978917, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:56,708] Trial 24 finished with value: 0.9787234042553191 and parameters: {'alpha': 3.62748064819823, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:57,175] Trial 25 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.03281602657635239, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:57,182] Trial 26 finished with value: 0.9574468085106383 and parameters: {'alpha': 0.3951710900504891, 'solver': 'svd'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:57,201] Trial 27 finished with va

[I 2024-01-24 13:54:57,723] Trial 29 finished with value: 0.9787234042553191 and parameters: {'alpha': 1.3140133660249194, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:57,743] Trial 30 finished with value: 0.9574468085106383 and parameters: {'alpha': 2.8624655473839153, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:58,172] Trial 31 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.2945609479699013, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:58,603] Trial 32 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.049426051457651554, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:59,047] Trial 33 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.2907880633296738, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:59,067] Trial 34 finished w

[I 2024-01-24 13:54:59,522] Trial 37 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.1007455421880084, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:54:59,992] Trial 38 finished with value: 0.9787234042553191 and parameters: {'alpha': 5.537816740472411, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:00,010] Trial 39 finished with value: 0.9574468085106383 and parameters: {'alpha': 0.00013175906773884045, 'solver': 'svd'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:00,072] Trial 40 finished with value: 0.9574468085106383 and parameters: {'alpha': 0.00975659858541732, 'solver': 'sparse_cg'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:00,597] Trial 41 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0003885536873710314, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:01,065] Trial 42 fi

[I 2024-01-24 13:55:02,031] Trial 46 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.005164246000562374, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:02,047] Trial 47 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.05167977320891017, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:02,054] Trial 48 finished with value: 0.9787234042553191 and parameters: {'alpha': 16.70534351959021, 'solver': 'cholesky'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:02,524] Trial 49 finished with value: 0.9787234042553191 and parameters: {'alpha': 1.100966505730103, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:02,959] Trial 50 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0007459758816599955, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:02,975] Trial 51 finish

[I 2024-01-24 13:55:04,065] Trial 57 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.42833992213287325, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:04,073] Trial 58 finished with value: 0.9574468085106383 and parameters: {'alpha': 1.7966276146405804, 'solver': 'auto'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:04,092] Trial 59 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.013688255081410451, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:04,564] Trial 60 finished with value: 0.9787234042553191 and parameters: {'alpha': 9.700458513586472, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:04,580] Trial 61 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0022492954264119198, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:04,597] Trial 62 finished

[I 2024-01-24 13:55:05,998] Trial 69 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0010850312567144613, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:06,049] Trial 70 finished with value: 0.9787234042553191 and parameters: {'alpha': 17.39168593132797, 'solver': 'svd'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:06,088] Trial 71 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.005035820901815471, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:06,112] Trial 72 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0005675129233723339, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:06,154] Trial 73 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.006469248465235369, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:06,727] Trial 74 finish

[I 2024-01-24 13:55:08,187] Trial 81 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.00011293437543228017, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:08,668] Trial 82 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0011926938888234112, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:09,145] Trial 83 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.0006114522708762769, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:09,623] Trial 84 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.00014036726547599253, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:09,630] Trial 85 finished with value: 0.9574468085106383 and parameters: {'alpha': 0.008651151954759543, 'solver': 'cholesky'}. Best is trial 2 with value: 0.9787234042553191.


[I 2024-01-24 13:55:10,101] Trial 86 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.22739636352487216, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:10,118] Trial 87 finished with value: 0.9787234042553191 and parameters: {'alpha': 4.311600471691125, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:10,551] Trial 88 finished with value: 0.9787234042553191 and parameters: {'alpha': 7.417649609502458, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:11,021] Trial 89 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.00017235862113192606, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:11,037] Trial 90 finished with value: 0.9574468085106383 and parameters: {'alpha': 0.00031426485248109014, 'solver': 'svd'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:11,557] Trial 91 finished

[I 2024-01-24 13:55:12,431] Trial 93 finished with value: 0.9787234042553191 and parameters: {'alpha': 29.358587684958728, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:12,449] Trial 94 finished with value: 0.9787234042553191 and parameters: {'alpha': 48.24427339563006, 'solver': 'lsqr'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:12,456] Trial 95 finished with value: 0.9574468085106383 and parameters: {'alpha': 0.6017181702941812, 'solver': 'auto'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:12,889] Trial 96 finished with value: 0.9787234042553191 and parameters: {'alpha': 0.000434814529834729, 'solver': 'sag'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:13,360] Trial 97 finished with value: 0.9787234042553191 and parameters: {'alpha': 16.153867106743277, 'solver': 'saga'}. Best is trial 2 with value: 0.9787234042553191.
[I 2024-01-24 13:55:13,376] Trial 98 finished with 

Best Hyperparameters: {'alpha': 0.0015938822149380884, 'solver': 'saga'}
Best Validation Accuracy: 97.87%




Test Accuracy: 85.14%




In [31]:
best_params={'alpha': 0.00016956386509426897, 'solver': 'lsqr'}
# best_params={'alpha': 0.0015938822149380884, 'solver': 'saga'}
# best_params={'alpha': 0.003361829053719938, 'solver': 'saga'}
final_ridge_clf = RidgeClassifier(**best_params)
final_ridge_clf.fit(X, y)
# y_test_pred = final_ridge_clf.predict(X_test)
# accuracy_test = accuracy_score(y_test, y_test_pred)
# print(f"Test Accuracy: {accuracy_test * 100:.2f}%")


In [None]:
Best Hyperparameters: {'alpha': 0.00016956386509426897, 'solver': 'lsqr'}
Best Validation Accuracy: 92.31%
Test Accuracy: 86.86%
    
    
# Best Hyperparameters: {'alpha': 0.0015938822149380884, 'solver': 'saga'}
# Best Validation Accuracy: 97.87%
# Test Accuracy: 85.14%

# Best Hyperparameters: {'alpha': 0.003361829053719938, 'solver': 'saga'}
# Best Validation Accuracy: 90.99%
# Test Accuracy: 91.18%
      
      
# Classifier Performance on Test Set:
# Neural Network: 86.88%
# Support Vector Machine: 83.75%
# Random Forest: 81.25%
# Gradient Boosting: 80.00%
# K-Nearest Neighbors: 80.00%
# Logistic Regression: 80.00%
# Decision Tree: 75.62%
# Naive Bayes: 51.88%

# Classifier Performance on Validation Set:
# Neural Network: 86.27%
# Support Vector Machine: 82.75%
# Logistic Regression: 82.75%
# K-Nearest Neighbors: 76.86%
# Random Forest: 74.90%
# Gradient Boosting: 74.51%
# Decision Tree: 72.16%
# Naive Bayes: 57.25%


In [6]:
import optuna
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

# Assuming 'X_train', 'X_val', 'y_train', 'y_val' are your training and validation sets
# Replace them with your actual data

# Define the objective function for Optuna
def objective(trial):
    # Define the hyperparameter search space
    params = {
        'hidden_layer_sizes': (trial.suggest_int('n_layer_1', 1, 100),),
        'activation': trial.suggest_categorical('activation', ['identity', 'logistic', 'tanh', 'relu']),
        'solver': trial.suggest_categorical('solver', ['lbfgs', 'sgd', 'adam']),
        'alpha': trial.suggest_float('alpha', 1e-5, 1e-1, log=True),
        'learning_rate': trial.suggest_categorical('learning_rate', ['constant', 'invscaling', 'adaptive']),
    }

    # Initialize and train the MLPClassifier with the suggested hyperparameters
    mlp_clf = MLPClassifier(random_state=42, max_iter=1000, **params)
    mlp_clf.fit(X_train, y_train)

    # Make predictions on the validation set
    y_val_pred = mlp_clf.predict(X_val)

    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_val_pred)

    return accuracy

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=80)  # You can adjust the number of trials

# Print the best hyperparameters and their corresponding accuracy
best_params = study.best_params
best_accuracy = study.best_value
print(f"Best Hyperparameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy * 100:.2f}%")


[I 2024-01-24 01:34:33,311] A new study created in memory with name: no-name-a903a01b-20e8-496a-a52f-9f59443d49b9
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
[I 2024-01-24 01:34:34,923] Trial 0 finished with value: 0.4043887147335423 and parameters: {'n_layer_1': 7, 'activation': 'tanh', 'solver': 'lbfgs', 'alpha': 7.034593705835111e-05, 'learning_rate': 'invscaling'}. Best is trial 0 with value: 0.4043887147335423.
[I 2024-01-24 01:34:35,445] Trial 1 finished with value: 0.7711598746081505 and parameters: {'n_layer_1': 11, 'activation': 'relu', 'solver': 'adam', 'alpha': 4.624001988990149e-05, 'learning_rate': 'adaptive'}. Best is trial 1 with value: 0.7711598746081505.
[I 2024-01-24 01:34:35,523] Trial 2 finished with value: 0.7931034482758621 and parameters: {'n_layer_1': 

[I 2024-01-24 01:35:19,310] Trial 24 finished with value: 0.8808777429467085 and parameters: {'n_layer_1': 21, 'activation': 'logistic', 'solver': 'adam', 'alpha': 0.0003697255745000313, 'learning_rate': 'constant'}. Best is trial 11 with value: 0.8996865203761756.
[I 2024-01-24 01:35:19,974] Trial 25 finished with value: 0.9028213166144201 and parameters: {'n_layer_1': 80, 'activation': 'logistic', 'solver': 'adam', 'alpha': 0.0010703303099399992, 'learning_rate': 'constant'}. Best is trial 25 with value: 0.9028213166144201.
[I 2024-01-24 01:35:20,082] Trial 26 finished with value: 0.12852664576802508 and parameters: {'n_layer_1': 60, 'activation': 'relu', 'solver': 'sgd', 'alpha': 0.01742889622562204, 'learning_rate': 'constant'}. Best is trial 25 with value: 0.9028213166144201.
[I 2024-01-24 01:35:20,342] Trial 27 finished with value: 0.11285266457680251 and parameters: {'n_layer_1': 1, 'activation': 'tanh', 'solver': 'sgd', 'alpha': 0.00014940312964721933, 'learning_rate': 'constan

[I 2024-01-24 01:36:31,532] Trial 50 finished with value: 0.8338557993730408 and parameters: {'n_layer_1': 69, 'activation': 'tanh', 'solver': 'lbfgs', 'alpha': 7.585577005154996e-05, 'learning_rate': 'adaptive'}. Best is trial 25 with value: 0.9028213166144201.
[I 2024-01-24 01:36:33,223] Trial 51 finished with value: 0.896551724137931 and parameters: {'n_layer_1': 80, 'activation': 'logistic', 'solver': 'adam', 'alpha': 0.005987675092852193, 'learning_rate': 'constant'}. Best is trial 25 with value: 0.9028213166144201.
[I 2024-01-24 01:36:35,360] Trial 52 finished with value: 0.8463949843260188 and parameters: {'n_layer_1': 83, 'activation': 'logistic', 'solver': 'adam', 'alpha': 0.004694813994691457, 'learning_rate': 'constant'}. Best is trial 25 with value: 0.9028213166144201.
[I 2024-01-24 01:36:37,564] Trial 53 finished with value: 0.890282131661442 and parameters: {'n_layer_1': 74, 'activation': 'logistic', 'solver': 'adam', 'alpha': 0.011384504536914544, 'learning_rate': 'const

Best Hyperparameters: {'n_layer_1': 65, 'activation': 'logistic', 'solver': 'adam', 'alpha': 0.0007583028641934986, 'learning_rate': 'constant'}
Best Validation Accuracy: 90.60%


In [13]:
# Use the best hyperparameters to train the final SVM model
best_params={'hidden_layer_sizes': 65, 'activation': 'logistic', 'solver': 'adam', 'alpha': 0.0007583028641934986, 'learning_rate': 'constant'}
final_clf = MLPClassifier(
    random_state=42,
    max_iter=1000,  # You can adjust max_iter based on your needs
    **best_params
)
final_clf.fit(X_train, y_train)

# Make predictions on the test set using the best SVM model
y_test_pred_svm = final_clf.predict(X_test)

# Evaluate the SVM model on the test set
test_accuracy_svm = accuracy_score(y_test, y_test_pred_svm)
print(f"Test Accuracy with Best SVM Model: {test_accuracy_svm * 100:.2f}%")


Test Accuracy with Best SVM Model: 90.00%


In [30]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the neural network model
model = models.Sequential()

# Add input layer

# Add hidden layers
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='relu'))

# Add output layer
model.add(layers.Dense(8, activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [31]:
# Assuming you have X_train, y_train, X_val, y_val
model.fit(X_train, y_train, epochs=200, validation_data=(X_val, y_val))


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200


Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch

Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.src.callbacks.History at 0x16d873d50>

In [29]:

# Display the model summary
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 256)               11008     
                                                                 
 dropout_6 (Dropout)         (None, 256)               0         
                                                                 
 dense_13 (Dense)            (None, 128)               32896     
                                                                 
 dropout_7 (Dropout)         (None, 128)               0         
                                                                 
 dense_14 (Dense)            (None, 64)                8256      
                                                                 
 dense_15 (Dense)            (None, 8)                 520       
                                                                 
Total params: 52680 (205.78 KB)
Trainable params: 5268

In [33]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers.legacy import Adam
import optuna

# Assuming num_features is the number of features in your input data
num_features = X_train.shape[1]

def build_model(trial):
    model = models.Sequential()

    # Add input layer
    model.add(layers.InputLayer(input_shape=(num_features,)))

    # Determine the number of hidden layers
    num_hidden_layers = trial.suggest_int('num_hidden_layers', 1, 3)

    # Add hidden layers with batch normalization
    for i in range(num_hidden_layers):
        units = trial.suggest_int(f'units_layer_{i}', 64, 512)
        model.add(layers.Dense(units))
        model.add(layers.BatchNormalization())
        model.add(layers.Activation('relu'))
        dropout_rate = trial.suggest_float(f'dropout_layer_{i}', 0.2, 0.5)
        model.add(layers.Dropout(dropout_rate))

    # Add output layer
    model.add(layers.Dense(8, activation='softmax'))

    # Compile the model
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    model.compile(optimizer=Adam(learning_rate=lr),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Define the Optuna objective function
def objective(trial):
    model = build_model(trial)

    # Train the model
    model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), verbose=0)

    # Evaluate the model on the validation set
    _, val_accuracy = model.evaluate(X_val, y_val, verbose=0)

    return val_accuracy

# Create an Optuna study and optimize the model
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Get the best hyperparameters from the study
best_params = study.best_params
best_model = build_model(optuna.trial.FixedTrial(best_params))

# Train the best model on the full training set
best_model.fit(X_train, y_train, epochs=10, verbose=1)


[I 2024-01-24 01:13:52,316] A new study created in memory with name: no-name-f3325196-6325-4feb-bc92-3d6ecfa3bd6e
[I 2024-01-24 01:13:53,413] Trial 0 finished with value: 0.8777429461479187 and parameters: {'num_hidden_layers': 1, 'units_layer_0': 323, 'dropout_layer_0': 0.30400574242267187, 'lr': 0.003927064113359008}. Best is trial 0 with value: 0.8777429461479187.
[I 2024-01-24 01:13:55,121] Trial 1 finished with value: 0.852664589881897 and parameters: {'num_hidden_layers': 3, 'units_layer_0': 82, 'dropout_layer_0': 0.43621616299654425, 'units_layer_1': 173, 'dropout_layer_1': 0.2948673069049142, 'units_layer_2': 166, 'dropout_layer_2': 0.4081403055916702, 'lr': 0.009520909828257627}. Best is trial 0 with value: 0.8777429461479187.
[I 2024-01-24 01:13:56,796] Trial 2 finished with value: 0.846394956111908 and parameters: {'num_hidden_layers': 2, 'units_layer_0': 242, 'dropout_layer_0': 0.31293173180566725, 'units_layer_1': 94, 'dropout_layer_1': 0.3387752416510282, 'lr': 0.00064554

[I 2024-01-24 01:14:33,889] Trial 27 finished with value: 0.7680251002311707 and parameters: {'num_hidden_layers': 2, 'units_layer_0': 358, 'dropout_layer_0': 0.22914682814591522, 'units_layer_1': 330, 'dropout_layer_1': 0.3644220487851273, 'lr': 0.005503547615995007}. Best is trial 15 with value: 0.8934169411659241.
[I 2024-01-24 01:14:35,017] Trial 28 finished with value: 0.8495298027992249 and parameters: {'num_hidden_layers': 1, 'units_layer_0': 474, 'dropout_layer_0': 0.33393856767818364, 'lr': 0.0008979536455565512}. Best is trial 15 with value: 0.8934169411659241.
[I 2024-01-24 01:14:36,096] Trial 29 finished with value: 0.8808777332305908 and parameters: {'num_hidden_layers': 1, 'units_layer_0': 333, 'dropout_layer_0': 0.29362655636481894, 'lr': 0.0004086036928365907}. Best is trial 15 with value: 0.8934169411659241.
[I 2024-01-24 01:14:37,568] Trial 30 finished with value: 0.8401253819465637 and parameters: {'num_hidden_layers': 2, 'units_layer_0': 290, 'dropout_layer_0': 0.26

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2d5287ad0>

In [35]:
# Train the best model on the full training set
best_model.fit(X_train, y_train, epochs=200, verbose=1)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 15

<keras.src.callbacks.History at 0x2d33dfbd0>

In [40]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Assuming 'X_train', 'X_val', 'X_test', 'y_train', 'y_val', 'y_test' are your datasets
num_classes=8

# Encode labels using LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

# Define the neural network model
model = models.Sequential([
    layers.Flatten(input_shape=(X_train.shape[1:])),
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# preferred_model.compile(
#     loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),  #<-- Note
#     optimizer=tf.keras.optimizers.Adam(0.001),
# )

# Train the model
history = model.fit(X_train, y_train_encoded, epochs=200, validation_data=(X_val, y_val_encoded))

# # Evaluate the model on the test set
# test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
# print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200


Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch

Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [41]:
preds=model.predict(X_test)



In [42]:
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)



In [32]:
import joblib
joblib.dump(final_ridge_clf, 'final_ridge_clf2.pkl')

['final_ridge_clf2.pkl']

In [15]:
# FOR NEURAL
import cv2
import mediapipe as mp
import pandas as pd
import joblib
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Load the pretrained SVM model
# best_svm_model = joblib.load('gesture_recognition_model.pkl')
# best_svm_model = joblib.load('gesture_recognition_model_better.pkl')
# best_svm_model = joblib.load('rfc_model.pkl')
best_svm_model = joblib.load('mlpc1.pkl')


# Initialize MediaPipe Hand module
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Function to extract landmarks
def get_landmarks(image):
    landmark_list = []
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            for landmark in hand_landmarks.landmark:
                # Multiply x and y values by image width and height to get pixel values
                x = int(landmark.x * image.shape[1])
                y = int(landmark.y * image.shape[0])
                landmark_list.append((x, y))
    return landmark_list

# Create a DataFrame to store hand landmarks
columns = [f'l{i}_x' for i in range(21)] + [f'l{i}_y' for i in range(21)] + [f'l{i}_z' for i in range(21)]
hand_landmarks_df = pd.DataFrame(columns=columns)

# Initialize MediaPipe Hands
with mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5) as hands:
    # Open the camera
    cap = cv2.VideoCapture(1)

    # Set the frame width and height
#     cap.set(3, 640)
#     cap.set(4, 480)

    while cap.isOpened():
        # Read a frame from the camera
        ret, frame = cap.read()

        # Convert the BGR image to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame to get hand landmarks
        results = hands.process(rgb_frame)

        # Check if hand landmarks are detected
        if results.multi_hand_landmarks:
            # Use the landmarks of the first detected hand (assuming one hand in the frame)
            hand_landmarks = results.multi_hand_landmarks[0]
            
            # Extract landmark positions
            if get_landmarks(frame):
                lst=[]
                for i in get_landmarks(frame):
                    for j in i:
                        lst.append(j)
                df=pd.DataFrame(lst)

                # Append landmark positions to the DataFrame
                hand_landmarks_df = pd.concat([hand_landmarks_df, df ], ignore_index=True)

                # Display the frame with hand landmarks (optional)
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Make predictions using the pretrained SVM model
                X_frame = hand_landmarks_df.iloc[-1].values.reshape(1, -1)
                prediction_i = best_svm_model.predict(X_frame)
                print(prediction_i)
                predictions = label_encoder.inverse_transform(np.reshape(np.argmax(prediction_i), (1,)))

                # Decode the predicted label
                predicted_gesture = predictions
                print(predictions)
                # Display the predicted gesture on the frame
                cv2.putText(frame, f'Gesture: {predicted_gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        cv2.imshow('Hand Landmarks', frame)

        # Break the loop if 'q' key is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the camera and close all windows
    cap.release()
    cv2.destroyAllWindows()


objc[10128]: Class CaptureDelegate is implemented in both /Users/aryansood/anaconda3/lib/python3.11/site-packages/cv2/cv2.abi3.so (0x154112620) and /Users/aryansood/anaconda3/lib/python3.11/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x1503f8860). One of the two will be used. Which one is undefined.
objc[10128]: Class CVWindow is implemented in both /Users/aryansood/anaconda3/lib/python3.11/site-packages/cv2/cv2.abi3.so (0x154112670) and /Users/aryansood/anaconda3/lib/python3.11/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x146f10a68). One of the two will be used. Which one is undefined.
objc[10128]: Class CVView is implemented in both /Users/aryansood/anaconda3/lib/python3.11/site-packages/cv2/cv2.abi3.so (0x154112698) and /Users/aryansood/anaconda3/lib/python3.11/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x146f10a90). One of the two will be used. Which one is undefined.
objc[10128]: Class CVSlider is implemented in both /U

ValueError: Input X contains NaN.
MLPClassifier does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [17]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming you have your feature data in X and labels in y
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the HistGradientBoostingClassifier
clf = HistGradientBoostingClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 79.94%


In [6]:
# Initialize and train the model
from sklearn.svm import SVC
clf = SVC(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the validation set
y_val_pred = clf.predict(X_val)

# Evaluate the model on the validation set
accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Make predictions on the test set
y_test_pred = clf.predict(X_test)

# Evaluate the model on the test set
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 88.68%
Test Accuracy: 83.12%


In [None]:
import optuna
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming you have your feature data in X and labels in y
# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

def objective(trial):
    # Define hyperparameters to optimize
    C = trial.suggest_loguniform('C', 1e-3, 1e3)
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid'])
    degree = trial.suggest_int('degree', 2, 5) if kernel == 'poly' else None
    gamma = trial.suggest_loguniform('gamma', 1e-3, 1e3) if kernel in ['rbf', 'poly', 'sigmoid'] else 'scale'

    # Create and train the SVC with suggested hyperparameters
    svc = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma, random_state=42)
    svc.fit(X_train, y_train)

    # Make predictions on the validation set
    y_val_pred = svc.predict(X_val)

    # Evaluate the model on the validation set
    accuracy = accuracy_score(y_val, y_val_pred)

    return accuracy

# Create the Optuna study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Get the best hyperparameters
best_params = study.best_params

# Train the final SVC model with the best hyperparameters on the entire training set
final_svc = SVC(random_state=42, **best_params)
final_svc.fit(X_train, y_train)

# Make predictions on the test set
y_test_pred = final_svc.predict(X_test)

# Evaluate the model on the test set
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Best Validation Accuracy: {study.best_value * 100:.2f}%")
print(f"Test Accuracy with Best Hyperparameters: {test_accuracy * 100:.2f}%")


[I 2024-01-24 01:50:39,189] A new study created in memory with name: no-name-522c8ea8-ade8-49da-99be-f0d8a334d4d4
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
  gamma = trial.suggest_loguniform('gamma', 1e-3, 1e3) if kernel in ['rbf', 'poly', 'sigmoid'] else 'scale'


In [None]:
import optuna
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming you have your feature data in X and labels in y
# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

def objective(trial):
    # Define hyperparameters to optimize
    C = trial.suggest_float('C', 1e-3, 1e3, log=True)
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid'])
    
    if kernel == 'poly':
        degree = trial.suggest_int('degree', 2, 5)
    else:
        degree = 3  # Default degree for non-poly kernels
       
    gamma = trial.suggest_float('gamma', 1e-3, 1e3, log=True) if kernel in ['rbf', 'poly', 'sigmoid'] else 'scale'

    # Create and train the SVC with suggested hyperparameters
    svc = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma, random_state=42)
    svc.fit(X_train, y_train)

    # Make predictions on the validation set
    y_val_pred = svc.predict(X_val)

    # Evaluate the model on the validation set
    accuracy = accuracy_score(y_val, y_val_pred)

    return accuracy

# Create the Optuna study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Get the best hyperparameters
best_params = study.best_params

# Train the final SVC model with the best hyperparameters on the entire training set
final_svc = SVC(random_state=42, **best_params)
final_svc.fit(X_train, y_train)

# Make predictions on the test set
y_test_pred = final_svc.predict(X_test)

# Evaluate the model on the test set
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Best Validation Accuracy: {study.best_value * 100:.2f}%")
print(f"Test Accuracy with Best Hyperparameters: {test_accuracy * 100:.2f}%")
