In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.naive_bayes import *
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC, NuSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import RidgeClassifier, LogisticRegression
from hyperopt import fmin, tpe, hp, Trials
from sklearn.model_selection import cross_val_score
from functools import partial
import pandas as pd
import numpy as np
import scipy
import random
from sklearn.cluster import DBSCAN, KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import roc_auc_score
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK
import scipy.spatial.distance as dist
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
# Define the search space for each classifier
rf_space = {
    'n_estimators': hp.randint('n_estimators', 1, 50),
    'max_depth': hp.randint('max_depth',1,10),
    'min_samples_split':hp.uniform('min_samples_split', 0, 1),
    'min_samples_leaf':hp.randint('min_samples_leaf',1,10),
    'criterion':hp.choice('criterion',['gini','entropy']),
    'max_features':hp.choice('max_features',['sqrt','log2'])
}

ada_space = {
    'n_estimators': hp.randint('n_estimators', 1, 50),
    'learning_rate': hp.uniform('learning_rate', 0.01, 1.0),
    'algorithm': hp.choice('algorithm', ['SAMME', 'SAMME.R'])
}

nb_space = {
    'var_smoothing': hp.choice('var_smoothing', [1e-09])
}

cart_space = {
    'criterion': hp.choice('criterion', ['gini', 'entropy', 'log_loss']),
    'max_features': hp.choice('max_features', [None, 'sqrt', 'log2']),
    'splitter': hp.choice('splitter', ['best', 'random']),
    'max_depth': hp.randint('max_depth',1,10),
    'min_samples_split': hp.randint('min_samples_split', 2, 10)
}

knn_space = {
    'n_neighbors': hp.randint('n_neighbors', 5, 20),
    'algorithm': hp.choice('algorithm', ['ball_tree', 'kd_tree', 'brute'])
}

ridge_space = {
    'alpha': hp.uniform('alpha', 0.1, 1.0),
    'solver': hp.choice('solver', ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']),
    'max_iter': hp.randint('max_iter', 1000,15000)
}

svm_space = {
    'C': hp.lognormal('C', 0.1, 1.0),
    # 'kernel': hp.choice('kernel', ['linear', 'rbf', 'poly', 'sigmoid']),
    'kernel': hp.choice('kernel', ['linear']),
    'degree': hp.randint('degree', 1,5),
    #'gamma': hp.choice('gamma', ['scale', 'auto'])
}

mlp_space = {
    'activation': hp.choice('activation', ['identity', 'logistic', 'tanh', 'relu']),
    'hidden_layer_sizes': hp.choice('hidden_layer_sizes', [(50,), (100,), (50, 50)]),
    'alpha': hp.uniform('alpha', 0.0001, 0.01),
    'learning_rate': hp.choice('learning_rate', ['constant', 'invscaling', 'adaptive']),
    'max_iter': hp.randint('max_iter', 10, 100)
}

# Create a dictionary mapping classifiers to their search spaces
classifiers = {
    'RandomForest': (RandomForestClassifier, rf_space),
    'AdaBoost': (AdaBoostClassifier, ada_space),
    'NaiveBayes': (GaussianNB, nb_space),
    'DecisionTree': (DecisionTreeClassifier, cart_space),
    'KNN': (KNeighborsClassifier, knn_space),
    'Ridge': (RidgeClassifier, ridge_space),
    'SVM': (SVC, svm_space),
    'MLP': (MLPClassifier, mlp_space),
}

algorithm = tpe.suggest

In [None]:
# Load dataset files for each ML project by specifying correct path
def data_loading_transformers():
    transformers_2_0 = pd.read_csv('../Dataset/transformers_2.0.0.csv')
    transformers_3_5 = pd.read_csv('../Dataset/transformers_3.5.0.csv')
    transformers_4_13 = pd.read_csv('../Dataset/transformers_4.13.0.csv')
    
    transformers_train_data = pd.concat([transformers_2_0, transformers_3_5, transformers_4_13])
    transformers_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


    transformers_test_data = pd.read_csv('../Dataset/transformers_4.23.0.csv')
  # test_data1 = pd.read_csv('transformers_4.23.0.csv')
  # ml_files = pd.read_csv('transformers_ml_files.csv')
    transformers_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)

    X_source = transformers_train_data.drop(columns='Buggy')
    Y_source = transformers_train_data['Buggy']
    X_target = transformers_test_data.drop(columns='Buggy')
    Y_target = transformers_test_data['Buggy']
    
    return X_source, Y_source, X_target, Y_target

  # print(X_source.shape)
  # print(Y_source.shape)
  # print(X_target.shape)
  # print(Y_target.shape)

# Load your dataset (replace X and y with your features and labels)
def data_loading_yolov5():
  yolov5_4_0 = pd.read_csv('../Dataset/yolov5_4.0.csv')
  yolov5_6_0 = pd.read_csv('../Dataset/yolov5_6.0.csv')
  

  yolov5_train_data = pd.concat([yolov5_4_0, yolov5_6_0])
  yolov5_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  yolov5_test_data = yolov5_7_0 = pd.read_csv('../Dataset/yolov5_7.0.csv')
  # test_data1 = pd.read_csv('yolov5_7.0.csv')
  # ml_files = pd.read_csv('yolov5_ml_files.csv')
  yolov5_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)

  X_source = yolov5_train_data.drop(columns='Buggy')
  Y_source = yolov5_train_data['Buggy']
  X_target = yolov5_test_data.drop(columns='Buggy')
  Y_target = yolov5_test_data['Buggy']

  return X_source, Y_source, X_target, Y_target

  # print(X_source.shape)
  # print(Y_source.shape)
  # print(X_target.shape)
  # print(Y_target.shape)

# Load your dataset (replace X and y with your features and labels)
def data_loading_jax():
  jax_1_73 = pd.read_csv('../Dataset/jax_0.1.73.csv')
  jax_2_21 = pd.read_csv('../Dataset/jax_0.2.21.csv') 
  jax_2_28 = pd.read_csv('../Dataset/jax_0.2.28.csv')
  
  jax_train_data = pd.concat([jax_1_73, jax_2_21, jax_2_28])
  jax_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  jax_test_data = pd.read_csv('../Dataset/jax_0.3.15.csv')
  # test_data1 = pd.read_csv('jax_0.3.15.csv')
  # ml_files = pd.read_csv('jax_ml_files.csv')
  jax_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)
  #jax_test_data.shape

  X_source = jax_train_data.drop(columns='Buggy')
  Y_source = jax_train_data['Buggy']
  X_target = jax_test_data.drop(columns='Buggy')
  Y_target = jax_test_data['Buggy']

  return X_source, Y_source, X_target, Y_target

  print(X_source.shape)
  print(Y_source.shape)
  print(X_target.shape)
  print(Y_target.shape)

# Load your dataset (replace X and y with your features and labels)
def data_loading_lightning():
  lightning_0_5 = pd.read_csv('../Dataset/lightning_0.5.1.csv')
  lightning_1_0 = pd.read_csv('../Dataset/lightning_1.0.0.csv')
  lightning_1_5 = pd.read_csv('../Dataset/lightning_1.5.0.csv') 
  

  lightning_train_data = pd.concat([lightning_0_5, lightning_1_0, lightning_1_5])
  lightning_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  lightning_test_data = pd.read_csv('../Dataset/lightning_1.8.0.csv')
  # test_data1 = pd.read_csv('lightning_1.8.0.csv')
  # ml_files = pd.read_csv('lightning_ml_files.csv')
  lightning_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)

  X_source = lightning_train_data.drop(columns='Buggy')
  Y_source = lightning_train_data['Buggy']
  X_target = lightning_test_data.drop(columns='Buggy')
  Y_target = lightning_test_data['Buggy']

  return X_source, Y_source, X_target, Y_target

  # print(X_source.shape)
  # print(Y_source.shape)
  # print(X_target.shape)
  # print(Y_target.shape)

# Load your dataset (replace X and y with your features and labels)
def data_loading_ray():
  ray_0_3 = pd.read_csv('../Dataset/ray_0.3.0.csv')
  ray_0_6 = pd.read_csv('../Dataset/ray_0.6.1.csv')
  ray_0_8 = pd.read_csv('../Dataset/ray_0.8.0.csv') 
  ray_1_1 = pd.read_csv('../Dataset/ray_1.1.0.csv')
  ray_1_9 = pd.read_csv('../Dataset/ray_1.9.0.csv')
  

  ray_train_data = pd.concat([ray_0_3, ray_0_6, ray_0_8, ray_1_1, ray_1_9])
  ray_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  ray_test_data = pd.read_csv('../Dataset/ray_2.0.0.csv')
  # test_data1 = pd.read_csv('ray_2.0.0.csv')
  # ml_files = pd.read_csv('ray_ml_files.csv')
  ray_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)

  X_source = ray_train_data.drop(columns='Buggy')
  Y_source = ray_train_data['Buggy']
  X_target = ray_test_data.drop(columns='Buggy')
  Y_target = ray_test_data['Buggy']

  return X_source, Y_source, X_target, Y_target

  # print(X_source.shape)
  # print(Y_source.shape)
  # print(X_target.shape)
  # print(Y_target.shape)

In [None]:
# Define the objective function for hyperparameter optimization
def objective_rf(search_space):
    model = RandomForestClassifier(**search_space, random_state=42)
    model.fit(X_source, Y_source)
    y_pred = model.predict(X_target)

    roc_auc = roc_auc_score(Y_target, y_pred)

    #print("auc: ", roc_auc)
    # Record the AUC for this trial
    #rf_trials.results.append({'auc': roc_auc, 'params': search_space, 'status': 'ok'})
    return -roc_auc

def objective_ada(search_space):
    model = AdaBoostClassifier(**search_space, random_state=42)
    model.fit(X_source, Y_source)
    y_pred = model.predict(X_target)

    roc_auc = roc_auc_score(Y_target, y_pred)

    return -roc_auc

def objective_nb(search_space):
    model = GaussianNB(**search_space)
    model.fit(X_source, Y_source)
    y_pred = model.predict(X_target)

    roc_auc = roc_auc_score(Y_target, y_pred)

    return -roc_auc

def objective_cart(search_space):
    model = DecisionTreeClassifier(**search_space, random_state=42)
    model.fit(X_source, Y_source)
    y_pred = model.predict(X_target)

    roc_auc = roc_auc_score(Y_target, y_pred)

    return -roc_auc

def objective_knn(search_space):
    model = KNeighborsClassifier(**search_space)
    model.fit(X_source, Y_source)
    y_pred = model.predict(X_target)

    roc_auc = roc_auc_score(Y_target, y_pred)

    return -roc_auc

def objective_ridge(search_space):
    model = RidgeClassifier(**search_space, random_state=42)
    model.fit(X_source, Y_source)
    y_pred = model.predict(X_target)

    roc_auc = roc_auc_score(Y_target, y_pred)

    return -roc_auc

def objective_mlp(search_space):
    model = MLPClassifier(**search_space, random_state=42)
    model.fit(X_source, Y_source)
    y_pred = model.predict(X_target)

    roc_auc = roc_auc_score(Y_target, y_pred)

    return -roc_auc

def objective_svm(search_space):
    model = SVC(**search_space, random_state=42)
    model.fit(X_source, Y_source)
    y_pred = model.predict(X_target)

    roc_auc = roc_auc_score(Y_target, y_pred)

    return -roc_auc

In [None]:

def hyperopt_classifier():
  #implement Hyperopt on Random Forest
  # Create a Trials object to store information about each trial
  rf_trials = Trials()
  best_params_rf = fmin(
      fn=objective_rf,
      space=rf_space,
      algo=algorithm,
      max_evals=10,
      trials=rf_trials)
  print("Random Forest: ", space_eval(rf_space, best_params_rf))

  all_auc_values_rf = [format(-result['loss'], '.2f') for result in rf_trials.results]
  #print("AUC values for RandomForest:", all_auc_values_rf)

  #implement Hyperopt on Naive Bayes
  nb_trials = Trials()
  best_params_nb = fmin(
      fn=objective_nb,
      space=nb_space,
      algo=algorithm,
      max_evals=10,
      trials=nb_trials)
  print("Naive Bayes: ", space_eval(rf_space, best_params_rf))

  all_auc_values_nb = [format(-result['loss'], '.2f') for result in nb_trials.results]
  #print("AUC values for Naive Bayes:", all_auc_values_nb)

  #implement Hyperopt on AdaBoost
  ada_trials = Trials()
  best_params_ada = fmin(
      fn=objective_ada,
      space=ada_space,
      algo=algorithm,
      max_evals=10,
      trials=ada_trials)
  print("AdaBoost: ", space_eval(rf_space, best_params_rf))

  all_auc_values_ada = [format(-result['loss'], '.2f') for result in ada_trials.results]
  #print("AUC values for AdaBoost:", all_auc_values_ada)

  #implement Hyperopt on CART
  cart_trials = Trials()
  best_params_cart = fmin(
      fn=objective_cart,
      space=cart_space,
      algo=algorithm,
      max_evals=10,
      trials=cart_trials)
  print("CART: ", space_eval(rf_space, best_params_rf))

  all_auc_values_cart = [format(-result['loss'], '.2f') for result in cart_trials.results]

  #print("AUC values for CART:", all_auc_values_cart)

  #print("CART: ", space_eval(cart_space, best_params_cart))

  #implement Hyperopt on KNN
  knn_trials = Trials()
  best_params_knn = fmin(
      fn=objective_knn,
      space=knn_space,
      algo=algorithm,
      max_evals=10,
      trials=knn_trials)
  print("KNN: ", space_eval(rf_space, best_params_rf))

  all_auc_values_knn = [format(-result['loss'], '.2f') for result in knn_trials.results]
  #print("AUC values for KNN:", all_auc_values_knn)

  # print("KNN: ", space_eval(knn_space, best_params_knn))

  #implement Hyperopt on Ridge
  ridge_trials = Trials()
  best_params_ridge = fmin(
      fn=objective_ridge,
      space=ridge_space,
      algo=algorithm,
      max_evals=10,
      trials=ridge_trials)
  print("Ridge: ", space_eval(rf_space, best_params_rf))

  all_auc_values_ridge = [format(-result['loss'], '.2f') for result in ridge_trials.results]
  #print("AUC values for Ridge:", all_auc_values_ridge)
  # print("Ridge: ", space_eval(ridge_space, best_params_ridge))

  #implement Hyperopt on MLP
  mlp_trials = Trials()
  best_params_mlp = fmin(
      fn=objective_mlp,
      space=mlp_space,
      algo=algorithm,
      max_evals=10,
      trials=mlp_trials)
  print("MLP: ", space_eval(rf_space, best_params_rf))

  all_auc_values_mlp = [format(-result['loss'], '.2f') for result in mlp_trials.results]


  #print("AUC values for MLP:", all_auc_values_mlp)

  # print("MLP: ", space_eval(mlp_space, best_params_mlp))
  svm_trials = Trials()
  best_params_svm = fmin(
      fn=objective_svm,
      space=svm_space,
      algo=algorithm,
      max_evals=10,
      trials=svm_trials)
  print("SVM: ", space_eval(rf_space, best_params_rf))
  all_auc_values_svm = [format(-result['loss'], '.2f') for result in svm_trials.results]

  #print("AUC values for SVM:", all_auc_values_svm)
  #print("SVM: ", space_eval(svm_space, best_params_svm))
  return all_auc_values_rf, all_auc_values_nb, all_auc_values_ada, all_auc_values_cart, all_auc_values_knn, all_auc_values_ridge, all_auc_values_mlp, all_auc_values_svm

In [None]:
# Load your dataset (replace X and y with your features and labels)
X_source = scaler.fit_transform(X_source)
def data_loading_transformers_svm():
  transformers_2_0 = pd.read_csv('../Dataset/transformers_2.0.0.csv')
  transformers_3_5 = pd.read_csv('../Dataset/transformers_3.5.0.csv')
  transformers_4_13 = pd.read_csv('../Dataset/transformers_4.13.0.csv')



  transformers_train_data = pd.concat([transformers_2_0, transformers_3_5, transformers_4_13])
  transformers_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  transformers_test_data = pd.read_csv('../Dataset/transformers_4.23.0.csv')
  # test_data1 = pd.read_csv('transformers_4.23.0.csv')
  # ml_files = pd.read_csv('transformers_ml_files.csv')
  transformers_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)

  X_source = transformers_train_data.drop(columns='Buggy')
  X_source = scaler.fit_transform(X_source)
  Y_source = transformers_train_data['Buggy']
  X_target = transformers_test_data.drop(columns='Buggy')
  Y_target = transformers_test_data['Buggy']

  return X_source, Y_source, X_target, Y_target

  # print(X_source.shape)
  # print(Y_source.shape)
  # print(X_target.shape)
  # print(Y_target.shape)

# Load your dataset (replace X and y with your features and labels)
def data_loading_yolov5_svm():
  yolov5_4_0 = pd.read_csv('../Dataset/yolov5_4.0.csv')
  yolov5_6_0 = pd.read_csv('../Dataset/yolov5_6.0.csv')

  yolov5_train_data = pd.concat([yolov5_4_0, yolov5_6_0])
  yolov5_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  yolov5_test_data = pd.read_csv('../Dataset/yolov5_7.0.csv')
  # test_data1 = pd.read_csv('yolov5_7.0.csv')
  # ml_files = pd.read_csv('yolov5_ml_files.csv')
  yolov5_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)

  X_source = yolov5_train_data.drop(columns='Buggy')
  X_source = scaler.fit_transform(X_source)
  Y_source = yolov5_train_data['Buggy']
  X_target = yolov5_test_data.drop(columns='Buggy')
  Y_target = yolov5_test_data['Buggy']
  return X_source, Y_source, X_target, Y_target



# Load your dataset (replace X and y with your features and labels)
def data_loading_jax_svm():
  jax_1_73 = pd.read_csv('../Dataset/jax_0.1.73.csv')
  jax_2_21 = pd.read_csv('../Dataset/jax_0.2.21.csv')
  jax_2_28 = pd.read_csv('../Dataset/jax_0.2.28.csv')

  jax_train_data = pd.concat([jax_1_73, jax_2_21, jax_2_28])
  jax_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  jax_test_data = pd.read_csv('../Dataset/jax_0.3.15.csv')
  # test_data1 = pd.read_csv('jax_0.3.15.csv')
  # ml_files = pd.read_csv('jax_ml_files.csv')
  jax_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)
  #jax_test_data.shape

  X_source = jax_train_data.drop(columns='Buggy')
  X_source = scaler.fit_transform(X_source)
  Y_source = jax_train_data['Buggy']
  X_target = jax_test_data.drop(columns='Buggy')
  Y_target = jax_test_data['Buggy']

  return X_source, Y_source, X_target, Y_target

  # print(X_source.shape)
  # print(Y_source.shape)
  # print(X_target.shape)
  # print(Y_target.shape)

# Load your dataset (replace X and y with your features and labels)
def data_loading_lightning_svm():
  lightning_0_5 = pd.read_csv('../Dataset/lightning_0.5.1.csv')
  lightning_1_0 = pd.read_csv('../Dataset/lightning_1.0.0.csv')
  lightning_1_5 = pd.read_csv('../Dataset/lightning_1.5.0.csv')

  lightning_train_data = pd.concat([lightning_0_5, lightning_1_0, lightning_1_5])
  lightning_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  lightning_test_data = pd.read_csv('../Dataset/lightning_1.8.0.csv')
  # test_data1 = pd.read_csv('lightning_1.8.0.csv')
  # ml_files = pd.read_csv('lightning_ml_files.csv')
  lightning_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)

  X_source = lightning_train_data.drop(columns='Buggy')
  X_source = scaler.fit_transform(X_source)
  Y_source = lightning_train_data['Buggy']
  X_target = lightning_test_data.drop(columns='Buggy')
  Y_target = lightning_test_data['Buggy']


  return X_source, Y_source, X_target, Y_target

  # print(X_source.shape)
  # print(Y_source.shape)
  # print(X_target.shape)
  # print(Y_target.shape)

# Load your dataset (replace X and y with your features and labels)
def data_loading_ray_svm():
  ray_0_3 = pd.read_csv('../Dataset/ray_0.3.0.csv')
  ray_0_6 = pd.read_csv('../Dataset/ray_0.6.1.csv')
  ray_0_8 = pd.read_csv('../Dataset/ray_0.8.0.csv')
  ray_1_1 = pd.read_csv('../Dataset/ray_1.1.0.csv')
  ray_1_9 = pd.read_csv('../Dataset/ray_1.9.0.csv')


  ray_train_data = pd.concat([ray_0_3, ray_0_6, ray_0_8, ray_1_1, ray_1_9])
  ray_train_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)


  ray_test_data = pd.read_csv('../Dataset/ray_2.0.0.csv')
  # test_data1 = pd.read_csv('ray_2.0.0.csv')
  # ml_files = pd.read_csv('ray_ml_files.csv')
  ray_test_data.drop(['Unnamed: 0','Project', 'Files'], axis=1, inplace=True)

  X_source = ray_train_data.drop(columns='Buggy')
  X_source = scaler.fit_transform(X_source)
  Y_source = ray_train_data['Buggy']
  X_target = ray_test_data.drop(columns='Buggy')
  Y_target = ray_test_data['Buggy']

  return X_source, Y_source, X_target, Y_target

  # print(X_source.shape)
  # print(Y_source.shape)
  # print(X_target.shape)
  # print(Y_target.shape)

In [None]:
#implement Hyperopt on SVM
def SVM_hyperopt_jax():
  X_source, Y_source, X_target, Y_target = data_loading_jax_svm()
  svm_trials = Trials()
  best_params_svm = fmin(
      fn=objective_svm,
      space=svm_space,
      algo=algorithm,
      max_evals=50,
      trials=svm_trials)
  print("SVM: ", space_eval(rf_space, best_params_rf))

  all_auc_values_svm = [format(-result['loss'], '.2f') for result in svm_trials.results]
  return all_auc_values_svm

def SVM_hyperopt_lightning():
  X_source, Y_source, X_target, Y_target = data_loading_lightning_svm()
  svm_trials = Trials()
  best_params_svm = fmin(
      fn=objective_svm,
      space=svm_space,
      algo=algorithm,
      max_evals=50,
      trials=svm_trials)
  print("SVM: ", space_eval(rf_space, best_params_rf))

  all_auc_values_svm = [format(-result['loss'], '.2f') for result in svm_trials.results]
  return all_auc_values_svm

def SVM_hyperopt_ray():
  X_source, Y_source, X_target, Y_target = data_loading_ray_svm()
  svm_trials = Trials()
  best_params_svm = fmin(
      fn=objective_svm,
      space=svm_space,
      algo=algorithm,
      max_evals=50,
      trials=svm_trials)
  print("SVM: ", space_eval(rf_space, best_params_rf))

  all_auc_values_svm = [format(-result['loss'], '.2f') for result in svm_trials.results]
  return all_auc_values_svm

def SVM_hyperopt_transformers():
  X_source, Y_source, X_target, Y_target = data_loading_transformers_svm()
  svm_trials = Trials()
  best_params_svm = fmin(
      fn=objective_svm,
      space=svm_space,
      algo=algorithm,
      max_evals=50,
      trials=svm_trials)
  print("SVM: ", space_eval(rf_space, best_params_rf))

  all_auc_values_svm = [format(-result['loss'], '.2f') for result in svm_trials.results]
  return all_auc_values_svm

def SVM_hyperopt_yolov5():
  X_source, Y_source, X_target, Y_target = data_loading_yolov5_svm()
  svm_trials = Trials()
  best_params_svm = fmin(
      fn=objective_svm,
      space=svm_space,
      algo=algorithm,
      max_evals=50,
      trials=svm_trials)
  print("SVM: ", space_eval(rf_space, best_params_rf))

  all_auc_values_svm = [format(-result['loss'], '.2f') for result in svm_trials.results]
  return all_auc_values_svm

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

def violin_plots(data_jax, data_lightning, data_ray, data_transformers, data_yolov5):
  # Create a DataFrame for Seaborn
  # data = pd.DataFrame({
  #     'Classifier': ['Random Forest'] * len(all_auc_values_rf) +
  #                   ['Naive Bayes'] * len(all_auc_values_nb) +
  #                   ['AdaBoost'] * len(all_auc_values_ada) +
  #                   ['CART'] * len(all_auc_values_cart) +
  #                   ['KNN'] * len(all_auc_values_knn) +
  #                   ['Ridge'] * len(all_auc_values_ridge) +
  #                   ['MLP'] * len(all_auc_values_mlp) ,
  #     'ROC AUC': all_auc_values_rf + all_auc_values_nb + all_auc_values_ada + all_auc_values_cart + all_auc_values_knn + all_auc_values_ridge + all_auc_values_mlp
  # })

  # #print(data)
  # data['ROC AUC'] = pd.to_numeric(data['ROC AUC'])

  # Create subplots
  fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(30, 4))

  # Plot violin plots
  sns.violinplot(x='Classifier', y='ROC AUC', data=data_jax, ax=axes[0])
  axes[0].set_title('Jax')

  sns.violinplot(x='Classifier', y='ROC AUC', data=data_lightning, ax=axes[1])
  axes[1].set_title('Lightning')

  sns.violinplot(x='Classifier', y='ROC AUC', data=data_ray, ax=axes[2])
  axes[2].set_title('Ray')

  sns.violinplot(x='Classifier', y='ROC AUC', data=data_transformers, ax=axes[3])
  axes[3].set_title('Transformers')

  sns.violinplot(x='Classifier', y='ROC AUC', data=data_yolov5, ax=axes[4])
  axes[4].set_title('Yolov5')


  # Tilt x-axis labels
  axes[0].tick_params(axis='x', rotation=45)
  axes[1].tick_params(axis='x', rotation=45)
  axes[2].tick_params(axis='x', rotation=45)
  axes[3].tick_params(axis='x', rotation=45)
  axes[4].tick_params(axis='x', rotation=45)
  plt.savefig('wpdp_results.png', bbox_inches='tight')
  plt.show()





In [None]:
print("Jax parameters hyper optimization: ")
# for jax
X_source, Y_source, X_target, Y_target = data_loading_jax()

all_auc_values_rf, all_auc_values_nb, all_auc_values_ada, all_auc_values_cart, all_auc_values_knn, all_auc_values_ridge, all_auc_values_mlp, all_auc_values_svm = hyperopt_classifier()
#all_auc_values_svm = SVM_hyperopt_jax()
data_jax = pd.DataFrame({
      'Classifier': ['Random Forest'] * len(all_auc_values_rf) +
                    ['Naive Bayes'] * len(all_auc_values_nb) +
                    ['AdaBoost'] * len(all_auc_values_ada) +
                    ['CART'] * len(all_auc_values_cart) +
                    ['KNN'] * len(all_auc_values_knn) +
                    ['Ridge'] * len(all_auc_values_ridge) +
                    ['MLP'] * len(all_auc_values_mlp) +
                    ['SVM'] * len(all_auc_values_svm),
      'ROC AUC': all_auc_values_rf + all_auc_values_nb + all_auc_values_ada + all_auc_values_cart + all_auc_values_knn + all_auc_values_ridge + all_auc_values_mlp + all_auc_values_svm
  })

data_jax['ROC AUC'] = pd.to_numeric(data_jax['ROC AUC'])

print("*****************************************************")

print("Lightning parameters hyper optimization: ")
# for Lightning
X_source, Y_source, X_target, Y_target = data_loading_lightning()
all_auc_values_rf, all_auc_values_nb, all_auc_values_ada, all_auc_values_cart, all_auc_values_knn, all_auc_values_ridge, all_auc_values_mlp, all_auc_values_svm = hyperopt_classifier()
#all_auc_values_svm = SVM_hyperopt_lightning()
data_lightning = pd.DataFrame({
      'Classifier': ['Random Forest'] * len(all_auc_values_rf) +
                    ['Naive Bayes'] * len(all_auc_values_nb) +
                    ['AdaBoost'] * len(all_auc_values_ada) +
                    ['CART'] * len(all_auc_values_cart) +
                    ['KNN'] * len(all_auc_values_knn) +
                    ['Ridge'] * len(all_auc_values_ridge) +
                    ['MLP'] * len(all_auc_values_mlp) +
                    ['SVM'] * len(all_auc_values_svm),
      'ROC AUC': all_auc_values_rf + all_auc_values_nb + all_auc_values_ada + all_auc_values_cart + all_auc_values_knn + all_auc_values_ridge + all_auc_values_mlp + all_auc_values_svm
  })

data_lightning['ROC AUC'] = pd.to_numeric(data_lightning['ROC AUC'])

print("****************************************************")

print("Ray parameters hyper optimization: ")
# for Ray
X_source, Y_source, X_target, Y_target = data_loading_ray()
all_auc_values_rf, all_auc_values_nb, all_auc_values_ada, all_auc_values_cart, all_auc_values_knn, all_auc_values_ridge, all_auc_values_mlp, all_auc_values_svm = hyperopt_classifier()
#all_auc_values_svm = SVM_hyperopt_ray()
data_ray = pd.DataFrame({
      'Classifier': ['Random Forest'] * len(all_auc_values_rf) +
                    ['Naive Bayes'] * len(all_auc_values_nb) +
                    ['AdaBoost'] * len(all_auc_values_ada) +
                    ['CART'] * len(all_auc_values_cart) +
                    ['KNN'] * len(all_auc_values_knn) +
                    ['Ridge'] * len(all_auc_values_ridge) +
                    ['MLP'] * len(all_auc_values_mlp) +
                    ['SVM'] * len(all_auc_values_svm),
      'ROC AUC': all_auc_values_rf + all_auc_values_nb + all_auc_values_ada + all_auc_values_cart + all_auc_values_knn + all_auc_values_ridge + all_auc_values_mlp + all_auc_values_svm
  })

data_ray['ROC AUC'] = pd.to_numeric(data_ray['ROC AUC'])

print("***********************************************************")

print("Transformers parameters hyper optimization: ")
X_source, Y_source, X_target, Y_target = data_loading_transformers()
all_auc_values_rf, all_auc_values_nb, all_auc_values_ada, all_auc_values_cart, all_auc_values_knn, all_auc_values_ridge, all_auc_values_mlp, all_auc_values_svm = hyperopt_classifier()
#all_auc_values_svm = SVM_hyperopt_transformers()
data_transformers = pd.DataFrame({
      'Classifier': ['Random Forest'] * len(all_auc_values_rf) +
                    ['Naive Bayes'] * len(all_auc_values_nb) +
                    ['AdaBoost'] * len(all_auc_values_ada) +
                    ['CART'] * len(all_auc_values_cart) +
                    ['KNN'] * len(all_auc_values_knn) +
                    ['Ridge'] * len(all_auc_values_ridge) +
                    ['MLP'] * len(all_auc_values_mlp) +
                    ['SVM'] * len(all_auc_values_svm),
      'ROC AUC': all_auc_values_rf + all_auc_values_nb + all_auc_values_ada + all_auc_values_cart + all_auc_values_knn + all_auc_values_ridge + all_auc_values_mlp + all_auc_values_svm
  })
data_transformers['ROC AUC'] = pd.to_numeric(data_transformers['ROC AUC'])

print("*************************************************************")

print("Yolov5 parameters hyper optimization: ")
# for yolov5
X_source, Y_source, X_target, Y_target = data_loading_yolov5()
all_auc_values_rf, all_auc_values_nb, all_auc_values_ada, all_auc_values_cart, all_auc_values_knn, all_auc_values_ridge, all_auc_values_mlp, all_auc_values_svm = hyperopt_classifier()
#all_auc_values_svm = SVM_hyperopt_yolov5()
data_yolov5 = pd.DataFrame({
      'Classifier': ['Random Forest'] * len(all_auc_values_rf) +
                    ['Naive Bayes'] * len(all_auc_values_nb) +
                    ['AdaBoost'] * len(all_auc_values_ada) +
                    ['CART'] * len(all_auc_values_cart) +
                    ['KNN'] * len(all_auc_values_knn) +
                    ['Ridge'] * len(all_auc_values_ridge) +
                    ['MLP'] * len(all_auc_values_mlp) +
                    ['SVM'] * len(all_auc_values_svm),
      'ROC AUC': all_auc_values_rf + all_auc_values_nb + all_auc_values_ada + all_auc_values_cart + all_auc_values_knn + all_auc_values_ridge + all_auc_values_mlp + all_auc_values_svm
  })

data_yolov5['ROC AUC'] = pd.to_numeric(data_yolov5['ROC AUC'])

violin_plots(data_jax, data_lightning, data_ray, data_transformers, data_yolov5)