In [1]:
!pip install smac

Collecting smac
  Downloading smac-2.1.0.tar.gz (148 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.1/148.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting pynisher>=1.0.0 (from smac)
  Downloading pynisher-1.0.10.tar.gz (30 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting ConfigSpace>=0.6.1 (from smac)
  Downloading ConfigSpace-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting pyrfr>=0.9.0 (from smac)
  Downloading pyrfr-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (601 bytes)
Collecting dask-jobqueue (from smac)
  Downloading dask_jobqueue-0.8.5-py2.py3-none-a

In [2]:
from ConfigSpace import Configuration, ConfigurationSpace

import numpy as np
from smac import HyperparameterOptimizationFacade, Scenario
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

iris = datasets.load_iris()


def train(config: Configuration, seed: int = 0) -> float:
    classifier = SVC(C=config["C"], random_state=seed)
    scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
#     print (np.mean(scores))
    return 1 - np.mean(scores)


configspace = ConfigurationSpace({"C": (0.100, 1000.0)})

# Scenario object specifying the optimization environment
scenario = Scenario(configspace, deterministic=True, n_trials=200)

# Use SMAC to find the best configuration/hyperparameters
smac = HyperparameterOptimizationFacade(scenario, train)
incumbent = smac.optimize()

[INFO][abstract_initial_design.py:147] Using 10 initial design configurations and 0 additional configurations.
[INFO][abstract_intensifier.py:305] Using only one seed for deterministic scenario.
[INFO][abstract_intensifier.py:515] Added config 8a6596 as new incumbent because there are no incumbents yet.
[INFO][abstract_intensifier.py:594] Added config a3ff11 and rejected config 8a6596 as incumbent because it is not better than the incumbents on 1 instances:
[INFO][abstract_intensifier.py:594] Added config 0ce248 and rejected config a3ff11 as incumbent because it is not better than the incumbents on 1 instances:
[INFO][abstract_intensifier.py:594] Added config 90dd51 and rejected config 0ce248 as incumbent because it is not better than the incumbents on 1 instances:
[INFO][abstract_intensifier.py:594] Added config 744874 and rejected config 90dd51 as incumbent because it is not better than the incumbents on 1 instances:
[INFO][smbo.py:319] Finished 50 trials.
[INFO][smbo.py:319] Finishe

In [3]:
incumbent.values


<bound method Mapping.values of Configuration(values={
  'C': 11.346459377614869,
})>

In [4]:
classifier = SVC(C=6.724148702862232)
scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
print(np.mean(scores))

0.9866666666666667


In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV, ShuffleSplit
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
import seaborn as sn
import warnings

warnings.filterwarnings('ignore')

[INFO][utils.py:161] NumExpr defaulting to 4 threads.


In [6]:
# Load the Credit Card Fraud Detection dataset
url = "https://www.openml.org/data/get_csv/31/dataset_31.csv"
df = pd.read_csv(url)

# Drop missing values
df = df.dropna()

# Convert categorical features to numerical
df = pd.get_dummies(df, drop_first=True)

In [7]:
df.head()

Unnamed: 0,duration,credit_amount,installment_commitment,residence_since,age,existing_credits,num_dependents,checking_status_'<0',checking_status_'>=200',checking_status_'no checking',...,other_payment_plans_none,other_payment_plans_stores,housing_own,housing_rent,job_'unemp/unskilled non res',job_'unskilled resident',job_skilled,own_telephone_yes,foreign_worker_yes,class_good
0,6,1169,4,4,67,2,1,True,False,False,...,True,False,True,False,False,False,True,True,True,True
1,48,5951,2,2,22,1,1,False,False,False,...,True,False,True,False,False,False,True,False,True,False
2,12,2096,2,3,49,1,2,False,False,True,...,True,False,True,False,False,True,False,False,True,True
3,42,7882,2,4,45,1,2,True,False,False,...,True,False,False,False,False,False,True,False,True,True
4,24,4870,3,4,53,2,2,True,False,False,...,True,False,False,False,False,False,True,False,True,False


In [8]:
df['class_good'].value_counts()

class_good
True     700
False    300
Name: count, dtype: int64

In [9]:
target = "class_good"
X = df.drop(columns=target)
y = df[target]

# Split the dataset into training and testing sets
X_train_raw, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
from sklearn.feature_selection import SelectKBest, f_classif

def Which_features(X_train,y_train,number_of_columns):

    # Select the top k features based on ANOVA F-statistic
    selector = SelectKBest(score_func=f_classif, k=number_of_columns)
    X_train_selected = selector.fit_transform(X_train, y_train)

    # Get the column names of the selected features
    selected_feature_names = X_train.columns[selector.get_support()]

    
    return list(selected_feature_names)

selected_features = Which_features(X_train_raw,y_train,number_of_columns=15)
selected_features

['duration',
 'credit_amount',
 'age',
 "checking_status_'<0'",
 "checking_status_'no checking'",
 "credit_history_'critical/other existing credit'",
 "credit_history_'no credits/all paid'",
 "purpose_'new car'",
 'purpose_radio/tv',
 "savings_status_'<100'",
 "property_magnitude_'no known property'",
 "property_magnitude_'real estate'",
 'other_payment_plans_none',
 'housing_own',
 'housing_rent']

In [11]:
X =X[selected_features]
X_train_raw = X_train_raw[selected_features]
X_test = X_test[selected_features]

In [12]:
X_train_raw.shape

(800, 15)

In [13]:
classifer=KNeighborsClassifier(n_neighbors=7)
classifer.fit(X_train_raw,y_train)
y_pred = classifer.predict(X_test)
loss=1-accuracy_score(y_test,y_pred)
print(loss)


0.32999999999999996


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X = iris.data[:, 1:]
y = iris.data[:, 0] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

In [21]:
from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Float, Integer
from ConfigSpace.conditions import InCondition,EqualsCondition
from smac import HyperparameterOptimizationFacade, Scenario
#potential optimizations
#ass timeseries and regression models
#search for the best hypers parameters for the each model
#add random state parameters
#what is the problem with linear svc
class Models:
    def __init__(self,similarModels,problemType):
        self.Models=similarModels
        self.Problemtype=problemType
    def configspace(self):
        confs = ConfigurationSpace(seed=0)
        #HPOs
        if self.Problemtype=='Classification':
            # models are ['KNN','LR',"RF",'SVC']
            models=Categorical('Models',self.Models)
            #KNN parameters
            Kneighbors=Integer('Ks',(1,10),default=1)
            #LR  and svc Parameters
            rc=Float('regularizationStre',(0.01,1))
            #RF parameters
            nestimators=Integer('n_estimators',(1,20),default=10)
            #SVC parameters
            kernel=Categorical('kernel',['linear','rbf'])
            #dependencies

            useks=InCondition(child=Kneighbors,parent=models,values=['KNN'])
            userc=InCondition(child=rc,parent=models,values=['LR','SVC'])
            usekernel=InCondition(child=kernel,parent=models,values=['SVC'])
            useEst=EqualsCondition(child=nestimators,parent=models,value='RF')



            #adding conditions and HPs
            confs.add_hyperparameters([models,Kneighbors,rc,nestimators,kernel])
            confs.add_conditions([useks,userc,usekernel,useEst])
        elif self.Problemtype=='Regression':
            models=Categorical('Models',self.Models)
            #linear regression parameters
            rc=Float('regularizationStre',(0.01,1))
            #lasso and ridge regression parameters
            alpha=Float('alpha',(0.01,1))
            
            #random forest and XGboost parameters
            nestimators=Integer('n_estimators',(1,20),default=10)
            #dependencies
            userc=InCondition(child=rc,parent=models,values=['LR'])   
            usealpha=InCondition(child=alpha,parent=models,values=['Lasso','Ridge'])
            useEst=InCondition(child=nestimators,parent=models,values=['RF','XGboost'])
            #adding conditions and HPs
            confs.add_hyperparameters([models,rc,alpha,nestimators])
            confs.add_conditions([userc,usealpha,useEst])



            
        return confs
    def train(self,config:Configuration,seed: int=0):
        
        config_dict=config.get_dictionary()
        model=config_dict['Models']
        print(f"config_dict:{config_dict}")
        if self.Problemtype=='Classification':
            if model =='KNN':
                Classifier=KNeighborsClassifier(n_neighbors=config_dict['Ks'])
                Classifier.fit(X_train_raw,y_train)
                y_pred = Classifier.predict(X_test)
                loss=1-accuracy_score(y_test,y_pred)
                print("the loss is: ",loss)
                return loss
            elif model=='LR':
                Classifier=LogisticRegression(C=config_dict['regularizationStre'])
                Classifier.fit(X_train_raw,y_train)
                y_pred = Classifier.predict(X_test)
                loss=1-accuracy_score(y_test,y_pred)
                print("the losss is: ",loss)
                return loss
            elif model=='RF':
                Classifier=RandomForestClassifier(n_estimators=config_dict['n_estimators'])
                Classifier.fit(X_train_raw,y_train)
                y_pred = Classifier.predict(X_test)
                loss=1-accuracy_score(y_test,y_pred)
                print("the loss is: ",loss)
                return loss
            elif model=='SVC':
                Classifier=SVC(C=config_dict['regularizationStre'],kernel=config_dict['kernel'])
                Classifier.fit(X_train_raw,y_train)
                y_pred = Classifier.predict(X_test)
                loss=1-accuracy_score(y_test,y_pred)
                print("the loss is: ",loss)
                return loss
        elif self.Problemtype=='Regression':
            return self.regression(config_dict)
    def regression(self, configDict):
            model=configDict['Models']
            if model=='LR':
                Classifier=RidgeClassifier(alpha=configDict['regularizationStre'])
                Classifier.fit(X_train_raw,y_train)
                y_pred = Classifier.predict(X_test)
                loss=1-accuracy_score(y_test,y_pred)
                print("the loss is: ",loss)
                return loss
            elif model=='Lasso':
                Classifier=RidgeClassifier(alpha=configDict['alpha'])
                Classifier.fit(X_train_raw,y_train)
                y_pred = Classifier.predict(X_test)
                loss=1-accuracy_score(y_test,y_pred)
                print("the loss is: ",loss)
                return loss
            elif model=='RF':
                Classifier=RandomForestClassifier(n_estimators=configDict['n_estimators'])
                Classifier.fit(X_train_raw,y_train)
                y_pred = Classifier.predict(X_test)
                loss=1-accuracy_score(y_test,y_pred)
                print("the loss is: ",loss)
                return loss
            elif model=='XGboost':
                Classifier=XGBClassifier(n_estimators=configDict['n_estimators'])
                Classifier.fit(X_train_raw,y_train)
                y_pred = Classifier.predict(X_test)
                loss=1-accuracy_score(y_test,y_pred)
                print("the loss is: ",loss)
                return loss

In [None]:
if __name__ =="__main__":
    classifier=Models(['KNN','LR','RF','SVC'],'Classification')
    scenario = Scenario(classifier.configspace(), deterministic=True, n_trials=30)
    smac = HyperparameterOptimizationFacade(scenario, classifier.train)
    incumbent = smac.optimize()
    incumbent.values
    print(incumbent)

    


[INFO][abstract_initial_design.py:95] Reducing the number of initial configurations from 50 to 7 (max_ratio == 0.25).
[INFO][abstract_initial_design.py:147] Using 7 initial design configurations and 0 additional configurations.
[INFO][abstract_intensifier.py:305] Using only one seed for deterministic scenario.
config_dict:{'Models': 'LR', 'regularizationStre': 0.8627264361735434}
the losss is:  0.26
[INFO][abstract_intensifier.py:515] Added config 90e02d as new incumbent because there are no incumbents yet.
config_dict:{'Models': 'SVC', 'kernel': 'rbf', 'regularizationStre': 0.013563145697116853}
the loss is:  0.29500000000000004
config_dict:{'Models': 'RF', 'n_estimators': 9}
the loss is:  0.22499999999999998
[INFO][abstract_intensifier.py:594] Added config 2171d7 and rejected config 90e02d as incumbent because it is not better than the incumbents on 1 instances:
config_dict:{'Models': 'KNN', 'Ks': 1}
the loss is:  0.33999999999999997
config_dict:{'Models': 'KNN', 'Ks': 10}
the loss i

In [20]:
Classifier=RandomForestClassifier(n_estimators=19)
Classifier.fit(X_train_raw,y_train)
y_pred = Classifier.predict(X_test)
loss=1-accuracy_score(y_test,y_pred)
print("the loss is: ",loss)

the loss is:  0.245
