In [None]:
!pip install smac

Collecting smac
  Downloading smac-2.1.0.tar.gz (148 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.1/148.1 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting pynisher>=1.0.0 (from smac)
  Downloading pynisher-1.0.10.tar.gz (30 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting ConfigSpace>=0.6.1 (from smac)
  Downloading ConfigSpace-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting pyrfr>=0.9.0 (from smac)
  Downloading pyrfr-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (601 bytes)
Collecting dask-jobqueue (from smac)
  Downloading dask_jobqueue-0.8.5-py2.py3-none-any.whl.metadata (1

In [None]:
from ConfigSpace import Configuration, ConfigurationSpace

import numpy as np
from smac import HyperparameterOptimizationFacade, Scenario
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

iris = datasets.load_iris()


def train(config: Configuration, seed: int = 0) -> float:
    classifier = SVC(C=config["C"], random_state=seed)
    scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
#     print (np.mean(scores))
    return 1 - np.mean(scores)


configspace = ConfigurationSpace({"C": (0.100, 1000.0)})

# Scenario object specifying the optimization environment
scenario = Scenario(configspace, deterministic=True, n_trials=200)

# Use SMAC to find the best configuration/hyperparameters
smac = HyperparameterOptimizationFacade(scenario, train)
incumbent = smac.optimize()

In [None]:
incumbent.values


In [None]:
classifier = SVC(C=6.724148702862232)
scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
print(np.mean(scores))

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV, ShuffleSplit
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
import seaborn as sn
import warnings

warnings.filterwarnings('ignore')

In [None]:
# Load the Credit Card Fraud Detection dataset
url = "https://www.openml.org/data/get_csv/31/dataset_31.csv"
df = pd.read_csv(url)

# Drop missing values
df = df.dropna()

# Convert categorical features to numerical
df = pd.get_dummies(df, drop_first=True)

In [None]:
df.head()

In [None]:
df['class_good'].value_counts()

In [None]:
target = "class_good"
X = df.drop(columns=target)
y = df[target]

# Split the dataset into training and testing sets
X_train_raw, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

def Which_features(X_train,y_train,number_of_columns):

    # Select the top k features based on ANOVA F-statistic
    selector = SelectKBest(score_func=f_classif, k=number_of_columns)
    X_train_selected = selector.fit_transform(X_train, y_train)

    # Get the column names of the selected features
    selected_feature_names = X_train.columns[selector.get_support()]

    
    return list(selected_feature_names)

selected_features = Which_features(X_train_raw,y_train,number_of_columns=15)
selected_features

In [None]:
X =X[selected_features]
X_train_raw = X_train_raw[selected_features]
X_test = X_test[selected_features]

In [None]:
X_train_raw.shape

In [None]:
classifer=KNeighborsClassifier(n_neighbors=7)
classifer.fit(X_train_raw,y_train)
y_pred = classifer.predict(X_test)
loss=1-accuracy_score(y_test,y_pred)
print(loss)


In [None]:
def plot_from_smac(smac) -> None:
    plt.figure()
    configs = smac.runhistory.get_configs()
    incumbents = smac.intensifier.get_incumbents()

    for i, config in enumerate(configs):
        if config in incumbents:
            continue

        label = None
        if i == 0:
            label = "Configuration"

        x = config["x"]
        f1, f2 = mymodell(x)
        plt.scatter(f1, f2, c="blue", alpha=0.1, marker="o", zorder=3000, label=label)

    for i, config in enumerate(incumbents):
        label = None
        if i == 0:
            label = "Incumbent"

        x = config["x"]
        f1, f2 = mymodell(x)
        plt.scatter(f1, f2, c="red", alpha=1, marker="x", zorder=3000, label=label)

    plt.xlabel("f1")
    plt.ylabel("f2")
    plt.title("Schaffer 2D")
    plt.legend()

    plt.show()

In [None]:
from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Float, Integer
from ConfigSpace.conditions import InCondition,EqualsCondition
from smac import HyperparameterOptimizationFacade, Scenario
from sklearn.linear_model import Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from smac import Callback
import time
#potential optimizations
#add timeserie
#search for the best hypers parameters for the each model
#add random state parameters
#what is the problem with linear svc
class CustomCallback(Callback):
    def __init__(self):
        self.trials_counter=0
    def on_start(self, smbo):
        print("let's start the optimization")
    def on_tell_end (self, smbo,info, value):
        self.trials_counter+=1
        print(f"the number of trials is: {self.trials_counter}")
        if self.trials_counter%10==0:
            incumbents = smbo.intensifier.get_incumbents()
            for incumbent in incumbents:
                print(f"the incumbent is: {incumbent.get_dictionary()}")
                print(f"the incumbent loss is: {smbo.runhistory.get_cost(incumbent)}")
            if self.trials_counter==100:
                print("let's stop the optimization at trial 100")
                return False
            return None
class Models:
    def __init__(self,similarModels,problemType,X_train,Y_train,X_test,Y_test):
        self.Models=similarModels
        self.Problemtype=problemType
        self.X_train=X_train
        self.Y_train=Y_train
        self.X_test=X_test
        self.Y_test=Y_test
    def configspace(self):
        confs = ConfigurationSpace(seed=0)
        #HPOs
        if self.Problemtype=='Classification':
            # models are ['KNN','LR',"RF",'SVC']
            models=Categorical('Models',self.Models)
            #KNN parameters
            Kneighbors=Integer('Ks',(1,10),default=1)
            #LR  and svc Parameters
            rc=Float('regularizationStre',(0.01,1))
            #RF parameters
            nestimators=Integer('n_estimators',(1,20),default=10)
            #SVC parameters
            kernel=Categorical('kernel',['linear','rbf'])
            #dependencies

            useks=InCondition(child=Kneighbors,parent=models,values=['KNN'])
            userc=InCondition(child=rc,parent=models,values=['LR','SVC'])
            usekernel=InCondition(child=kernel,parent=models,values=['SVC'])
            useEst=EqualsCondition(child=nestimators,parent=models,value='RF')



            #adding conditions and HPs
            confs.add_hyperparameters([models,Kneighbors,rc,nestimators,kernel])
            confs.add_conditions([useks,userc,usekernel,useEst])
        elif self.Problemtype=='Regression':
            models=Categorical('Models',self.Models)
            #linear regression parameters
            #lasso and ridge regression parameters
            alpha=Float('alpha',(0.01,100))
            
            #random forest and XGboost parameters
            nestimators=Integer('n_estimators',(1,20),default=10)
            #dependencies 
            usealpha=InCondition(child=alpha,parent=models,values=['Lasso','Ridge'])
            useEst=InCondition(child=nestimators,parent=models,values=['RF','XGboost'])
            #adding conditions and HPs
            confs.add_hyperparameters([models,alpha,nestimators])
            confs.add_conditions([usealpha,useEst])
        return confs
    def train(self,config:Configuration,seed: int=0):
        start_time=time.time()
        config_dict=config.get_dictionary()
        model=config_dict['Models']
        print(f"config_dict:{config_dict}")
        if self.Problemtype=='Classification':
            return self.classification(config_dict,start_time)
        elif self.Problemtype=='Regression':
            return self.regression(config_dict)
    def classification(self,configDict,start_time):
        model=configDict['Models']
        if model=='KNN':
            Classifier=KNeighborsClassifier(n_neighbors=configDict['Ks'])
        elif model=='LR':
            Classifier=LogisticRegression(C=configDict['regularizationStre'])
        elif model=='RF':
            Classifier=RandomForestClassifier(n_estimators=configDict['n_estimators'],random_state=42)
        elif model=='SVC':
            Classifier=SVC(C=configDict['regularizationStre'],kernel=configDict['kernel'])
        print(f"the type of the classifier is: {type(Classifier)}")
        Classifier.fit(self.X_train,self.Y_train)
        y_pred = Classifier.predict(self.X_test)
        loss=1-accuracy_score(self.Y_test,y_pred)
        print("the loss is: ",loss)
        return {'loss':loss,'time':time.time()-start_time}
    def regression(self, configDict,start_time):
            model=configDict['Models']
            if model=='LR':
                regressor=LinearRegression()
            elif model=='Lasso':
                regressor=Lasso(alpha=configDict['alpha'])
            elif model=='Ridge':
                regressor=Ridge(alpha=configDict['alpha'])
            elif model=='RF':
                regressor=RandomForestRegressor(n_estimators=configDict['n_estimators'],random_state=42)
            elif model=='XGboost':
                regressor=XGBRegressor(n_estimators=configDict['n_estimators'],random_state=42)
            regressor.fit(self.X_train,self.Y_train)
            y_pred = regressor.predict(self.X_test)
            mse = mean_squared_error(self.Y_test, y_pred)
            print("Mean Squared Error:", mse)
            return {'loss':mse,'time':time.time()-start_time}


In [None]:
from enum import Enum

class ProblemType(Enum):
    CLASSIFICATION = 'classification'
    REGRESSION = 'regression'
    TIME_SERIES = 'time series'
    UNBALANCED = 'unbalanced'

class Facade:
    def __init__(self, problem_type,Models,X_train,X_test,Y_train,Y_test):
        if isinstance(problem_type, ProblemType):
            self.problem_type = problem_type
            self.models=Models
            self.X_train=X_train
            self.X_test=X_test
            self.Y_train=Y_train
            self.Y_test=Y_test
        else:
            raise ValueError("problem_type must be a ProblemType Enum")
    def chooseFacade(self):
        if self.problem_type==ProblemType.CLASSIFICATION:
            return  self.ClassificationFacade()
        elif self.problem_type==ProblemType.REGRESSION:
            return self.RegressionFacade()
        elif self.problem_type==ProblemType.TIME_SERIES:
            return self.TimeSeriesFacade()
        elif self.problem_type==ProblemType.UNBALANCED:
            return self.UnbalancedFacade()
    def ClassificationFacade(self):
        classifier=Models(self.models,'Classification',self.X_train,self.Y_train,self.X_test,self.Y_test)
        scenario = Scenario(classifier.configspace(), deterministic=True,objectives=['loss','time'], n_trials=100)
        smac = HyperparameterOptimizationFacade(scenario, classifier.train,overwrite=True,callbacks=[CustomCallback()],
                                                multi_objective_algorithm=HyperparameterOptimizationFacade.get_multi_objective_algorithm(scenario,objective_weights=[2, 1]))
        incumbents = smac.optimize()
        for incumbent in incumbents:
            print(incumbent)
        return incumbents
    def RegressionFacade(self):
        Regressor=Models(self.models,'Regression',self.X_train,self.Y_train,self.X_test,self.Y_test)
        scenario = Scenario(Regressor.configspace(), deterministic=True,objectives=['loss','time'], n_trials=100)
        smac = HyperparameterOptimizationFacade(scenario, Regressor.train,overwrite=True,callbacks=[CustomCallback()],
                                                multi_objective_algorithm=HyperparameterOptimizationFacade.get_multi_objective_algorithm(scenario,objective_weights=[2, 1]))
        incumbents = smac.optimize()
        for incumbent in incumbents:
            print(incumbent)
        return incumbents
    def TimeSeriesFacade(self):
        pass
    def UnbalancedFacade(self):
        pass

In [None]:

if __name__ =="__main__":
#test classifcation
    classincummbet=Facade(ProblemType.CLASSIFICATION,['KNN','LR','RF','SVC'],X_train_raw,X_test,y_train,y_test)
    classincummbet.chooseFacade()
#     print(classincummbet)

    


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn import datasets
iris = datasets.load_iris()

X = iris.data[:, 1:]
y = iris.data[:, 0] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

In [None]:
if __name__=="__main__":
    regreincumbent=Facade(ProblemType.REGRESSION,['LR','Lasso','Ridge','RF','XGboost'],X_train,y_train,X_test,y_test)
    regreincumbent.chooseFacade()

In [None]:
regressor=XGBRegressor(n_estimators=10,random_state=42)
regressor.fit(X_train,y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

In [None]:
regressor=RandomForestRegressor(n_estimators=19)
regressor.fit(X_train,y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

In [None]:
Classifier=RandomForestClassifier(n_estimators=19)
Classifier.fit(X_train_raw,y_train)
y_pred = Classifier.predict(X_test)
loss=1-accuracy_score(y_test,y_pred)
print("the loss is: ",loss)