In [34]:
import openml
import pandas as pd
import numpy as np
import plotly.express as px 
import time
from sklearn.utils import shuffle
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold 
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing

In [3]:
# List all datasets and their properties
openml.datasets.list_datasets(output_format="dataframe")

# Get dataset by ID
dataset = openml.datasets.get_dataset(1476)

# Get the data itself as a dataframe (or otherwise)
X, _, _, _ = dataset.get_data(dataset_format="dataframe")

# **<span style="color:#3c1518">Questions</span>**

### <span style="color:#69140e">Answer to question 1: </span>

<span style="color:#a44200">This archive contains 13910 measurements from 16 chemical sensors utilized in simulations for drift compensation in a discrimination task of 6 gases at various levels of concentrations.
The goal is to achieve good performance (or as low degradation as possible) over time, as reported in the paper mentioned below in Section 2: Data collection.
The primary purpose of providing this dataset is to make it freely accessible online to the chemo-sensor research community and artificial intelligence to develop strategies to cope with sensor/concept drift. The dataset can be used exclusively for research purposes. Commercial purposes are fully excluded.
The dataset was gathered within January 2007 to February 2011 (36 months) in a gas delivery platform facility situated at the ChemoSignals Laboratory in the BioCircuits Institute, University of California San Diego.
Being completely operated by a fully computerized environment controlled by a LabVIEW's National Instruments software on a PC fitted with the appropriate serial data acquisition boards. The measurement system platform provides versatility for obtaining the desired concentrations of the chemical substances of interest with high accuracy and in a highly reproducible manner, minimizing thereby the common mistakes caused by human intervention and making it possible to exclusively concentrate on the chemical sensors for compensating real drift.
The resulting dataset comprises recordings from six distinct pure gaseous substances, namely Ammonia, Acetaldehyde, Acetone, Ethylene, Ethanol, and Toluene, each dosed at a wide variety of concentration values ranging from 5 to 1000 ppmv</span>

### <span style="color:#69140e">Answer to question 2: </span>

<span style="color:#a44200">There was no need to make any changes in order to import the dataset.</span>

### <span style="color:#69140e">Answer to question 3: </span>

<span style="color:#a44200"></span>

# **<span style="color:#3c1518">Preparation of the Dataset</span>**

In [4]:
X = X.astype('int64')
y = X['Class']
X = X.drop(columns=['Class'])
X,y = shuffle(X,y,random_state=0)
X = X.reset_index(drop= True)
y = y.reset_index(drop= True)

## <span style="color:#69140e">Train - Test Split </span>

In [5]:
Xtrain = X[0:int(0.7*len(X))]
Xtest = X[int(0.7*len(X))+1:]
ytrain = y[0:int(0.7*len(X))]
ytest = y[int(0.7*len(X))+1:]

In [6]:
metrics = pd.DataFrame(columns=['Classifier','Mean Accuracy', 'F1 Score'])

# **<span style="color:#3c1518">Classifiers</span>**

## <span style="color:#69140e">Dummy Classifier</span>

In [7]:
model = DummyClassifier(strategy="uniform")
modelStr = 'Dummy'
acc_score = []
f1_scores = []
k = 10
kf = KFold(n_splits=k, random_state=None)

for train_index , test_index in kf.split(Xtrain):
    X_train , X_test = X.iloc[train_index],X.iloc[test_index]
    y_train , y_test = y[train_index] , y[test_index]
     
    model.fit(X_train, y_train)
    pred_values = model.predict(X_test)
     
    accuracy = model.score(X_test, y_test)
    acc_score.append(accuracy)

    f1Score = f1_score(y_test, model.predict(X_test), average='macro')
    f1_scores.append(f1Score)
     
avg_acc_score = sum(acc_score)/k
avg_f1_score = sum(f1_scores)/k

if not (modelStr in metrics['Classifier'].unique()):
    metrics.loc[len(metrics)] = [modelStr, avg_acc_score, avg_f1_score]
else:
    index = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
    metrics.loc[index] = [modelStr, avg_acc_score, avg_f1_score]

figAccScores = px.bar(acc_score, title='Accuracy',width=1000)
figAccScores.update_layout(
    title="Accuracy diagram",
    xaxis_title="Iterations",
    yaxis_title="Accuracy",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figAccScores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figF1Scores = px.bar(f1_scores, title='F1 Scores',width=1000)
figF1Scores.update_layout(
    title="F1 score diagram",
    xaxis_title="Iterations",
    yaxis_title="F1 Score",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figF1Scores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figAccScores.show()
figF1Scores.show()

## <span style="color:#69140e">Gaussian Naive Bayes</span>

In [61]:
model = GaussianNB()
modelStr = 'Gaussian Naive Bayes'
acc_score = []
f1_scores = []
k = 10
kf = KFold(n_splits=k, random_state=None)

for train_index , test_index in kf.split(Xtrain):
    X_train , X_test = X.iloc[train_index],X.iloc[test_index]
    y_train , y_test = y[train_index] , y[test_index]
     
    model.fit(X_train, y_train)
    pred_values = model.predict(X_test)
    
    accuracy = model.score(X_test, y_test)
    acc_score.append(accuracy)

    f1Score = f1_score(y_test, model.predict(X_test), average='macro')
    f1_scores.append(f1Score)
     
avg_acc_score = sum(acc_score)/k
avg_f1_score = sum(f1_scores)/k

if not (modelStr in metrics['Classifier'].unique()):
    metrics.loc[len(metrics)] = [modelStr, avg_acc_score, avg_f1_score]
else:
    index = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
    metrics.loc[index] = [modelStr, avg_acc_score, avg_f1_score]

figAccScores = px.bar(acc_score, title='Accuracy',width=1000)
figAccScores.update_layout(
    title="Accuracy diagram",
    xaxis_title="Iterations",
    yaxis_title="Accuracy",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figAccScores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figF1Scores = px.bar(f1_scores, title='F1 Scores',width=1000)
figF1Scores.update_layout(
    title="F1 score diagram",
    xaxis_title="Iterations",
    yaxis_title="F1 Score",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figF1Scores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figAccScores.show()
figF1Scores.show()

## <span style="color:#69140e">kNN</span>

In [9]:
model = KNeighborsClassifier()
modelStr = 'kNN'
acc_score = []
f1_scores = []
k = 10
kf = KFold(n_splits=k, random_state=None)

for train_index , test_index in kf.split(Xtrain):
    X_train , X_test = X.iloc[train_index],X.iloc[test_index]
    y_train , y_test = y[train_index] , y[test_index]
     
    model.fit(X_train, y_train)
    pred_values = model.predict(X_test)
    
    accuracy = model.score(X_test, y_test)
    acc_score.append(accuracy)

    f1Score = f1_score(y_test, model.predict(X_test), average='macro')
    f1_scores.append(f1Score)
     
avg_acc_score = sum(acc_score)/k
avg_f1_score = sum(f1_scores)/k

if not (modelStr in metrics['Classifier'].unique()):
    metrics.loc[len(metrics)] = [modelStr, avg_acc_score, avg_f1_score]
else:
    index = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
    metrics.loc[index] = [modelStr, avg_acc_score, avg_f1_score]

figAccScores = px.bar(acc_score, title='Accuracy',width=1000)
figAccScores.update_layout(
    title="Accuracy diagram",
    xaxis_title="Iterations",
    yaxis_title="Accuracy",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figAccScores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figF1Scores = px.bar(f1_scores, title='F1 Scores',width=1000)
figF1Scores.update_layout(
    title="F1 score diagram",
    xaxis_title="Iterations",
    yaxis_title="F1 Score",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figF1Scores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figAccScores.show()
figF1Scores.show()

## <span style="color:#69140e">Logistic Regression</span>

In [10]:
model = LogisticRegression(random_state=0)
modelStr = 'Logistic Regression'
acc_score = []
f1_scores = []
k = 10
kf = KFold(n_splits=k, random_state=None)

for train_index , test_index in kf.split(Xtrain):
    X_train , X_test = X.iloc[train_index],X.iloc[test_index]
    y_train , y_test = y[train_index] , y[test_index]
     
    model.fit(X_train, y_train)
    pred_values = model.predict(X_test)
    
    accuracy = model.score(X_test, y_test)
    acc_score.append(accuracy)

    f1Score = f1_score(y_test, model.predict(X_test), average='macro')
    f1_scores.append(f1Score)
     
avg_acc_score = sum(acc_score)/k
avg_f1_score = sum(f1_scores)/k

if not (modelStr in metrics['Classifier'].unique()):
    metrics.loc[len(metrics)] = [modelStr, avg_acc_score, avg_f1_score]
else:
    index = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
    metrics.loc[index] = [modelStr, avg_acc_score, avg_f1_score]

figAccScores = px.bar(acc_score, title='Accuracy',width=1000)
figAccScores.update_layout(
    title="Accuracy diagram",
    xaxis_title="Iterations",
    yaxis_title="Accuracy",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figAccScores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figF1Scores = px.bar(f1_scores, title='F1 Scores',width=1000)
figF1Scores.update_layout(
    title="F1 score diagram",
    xaxis_title="Iterations",
    yaxis_title="F1 Score",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figF1Scores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figAccScores.show()
figF1Scores.show()


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to th

## <span style="color:#69140e">Multi Layer Perceptron</span>

In [11]:
model = MLPClassifier()
modelStr = 'Multi Layer Perceptron'
acc_score = []
f1_scores = []
k = 10
kf = KFold(n_splits=k, random_state=None)

for train_index , test_index in kf.split(Xtrain):
    X_train , X_test = X.iloc[train_index],X.iloc[test_index]
    y_train , y_test = y[train_index] , y[test_index]
     
    model.fit(X_train, y_train)
    pred_values = model.predict(X_test)
    
    accuracy = model.score(X_test, y_test)
    acc_score.append(accuracy)

    f1Score = f1_score(y_test, model.predict(X_test), average='macro')
    f1_scores.append(f1Score)
     
avg_acc_score = sum(acc_score)/k
avg_f1_score = sum(f1_scores)/k

if not (modelStr in metrics['Classifier'].unique()):
    metrics.loc[len(metrics)] = [modelStr, avg_acc_score, avg_f1_score]
else:
    index = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
    metrics.loc[index] = [modelStr, avg_acc_score, avg_f1_score]

figAccScores = px.bar(acc_score, title='Accuracy',width=1000)
figAccScores.update_layout(
    title="Accuracy diagram",
    xaxis_title="Iterations",
    yaxis_title="Accuracy",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figAccScores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figF1Scores = px.bar(f1_scores, title='F1 Scores',width=1000)
figF1Scores.update_layout(
    title="F1 score diagram",
    xaxis_title="Iterations",
    yaxis_title="F1 Score",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figF1Scores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figAccScores.show()
figF1Scores.show()

## <span style="color:#69140e">SVM</span>

In [12]:
model = SVC(gamma='auto')
modelStr = 'SVM'
acc_score = []
f1_scores = []
k = 10
kf = KFold(n_splits=k, random_state=None)

for train_index , test_index in kf.split(Xtrain):
    X_train , X_test = X.iloc[train_index],X.iloc[test_index]
    y_train , y_test = y[train_index] , y[test_index]
     
    model.fit(X_train, y_train)
    pred_values = model.predict(X_test)
    
    accuracy = model.score(X_test, y_test)
    acc_score.append(accuracy)

    f1Score = f1_score(y_test, model.predict(X_test), average='macro')
    f1_scores.append(f1Score)
     
avg_acc_score = sum(acc_score)/k
avg_f1_score = sum(f1_scores)/k


if not (modelStr in metrics['Classifier'].unique()):
    metrics.loc[len(metrics)] = [modelStr, avg_acc_score, avg_f1_score]
else:
    index = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
    metrics.loc[index] = [modelStr, avg_acc_score, avg_f1_score]

figAccScores = px.bar(acc_score, title='Accuracy',width=1000)
figAccScores.update_layout(
    title="Accuracy diagram",
    xaxis_title="Iterations",
    yaxis_title="Accuracy",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figAccScores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figF1Scores = px.bar(f1_scores, title='F1 Scores',width=1000)
figF1Scores.update_layout(
    title="F1 score diagram",
    xaxis_title="Iterations",
    yaxis_title="F1 Score",
    font=dict(
    family="Verdana, monospace",
    size=18,
    color="#3c1518")   
)
figF1Scores.update_traces(marker_color='#a44200', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.0)
figAccScores.show()
figF1Scores.show()

# **<span style="color:#3c1518">Optimization</span>**

In [13]:
metricsOptimalModel = pd.DataFrame(columns=['Classifier','Mean Accuracy', 'F1 Score', 'Training Time', 'Average Test Time', 'Percentage Change in Mean Accuracy', 'Percentage Change in F1 Score'])

## <span style="color:#69140e">Dummy Classifier</span>

In [56]:
modelStr = 'Dummy'

pipe = Pipeline(steps=[("scaler", StandardScaler()), ('dummy', DummyClassifier())])

param_grid = {"dummy__strategy": ['most_frequent', 'prior', 'stratified', 'uniform']}
search = HalvingGridSearchCV(pipe, param_grid, n_jobs=-1)
search.fit(Xtrain, ytrain)
pd.DataFrame(search.cv_results_)

pipe = Pipeline(steps=[("scaler", StandardScaler()), ('dummy', search.best_estimator_['dummy'])])
startTime = time.time()
pipe.fit(Xtrain,ytrain)
stopTime = time.time()
trainingTime = stopTime - startTime

startTime = time.time()
accuracy = pipe.score(X_test, y_test)
stopTime = time.time()

accuracyTestTime = stopTime - startTime

startTime = time.time()
f1Score = f1_score(y_test, model.predict(X_test), average='macro')
stopTime = time.time()

f1ScoreTestTime = stopTime - startTime
testTime = (accuracyTestTime + f1ScoreTestTime)/2

indexMetrics = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
percentageChangeMeanAccuracy = 100*(accuracy - metrics.iloc[indexMetrics]['Mean Accuracy'])/metrics.iloc[indexMetrics]['Mean Accuracy']
percentageChangeF1Score = 100*(accuracy - metrics.iloc[indexMetrics]['F1 Score'])/metrics.iloc[indexMetrics]['F1 Score']

if not (modelStr in metricsOptimalModel['Classifier'].unique()):
    metricsOptimalModel.loc[len(metricsOptimalModel)] = [modelStr, accuracy, f1Score, trainingTime, testTime, percentageChangeMeanAccuracy, percentageChangeF1Score]
else:
    index = metricsOptimalModel[metricsOptimalModel['Classifier']==modelStr].index.to_list()[0] 
    metricsOptimalModel.loc[index] = [modelStr, accuracy, f1Score, trainingTime, testTime, percentageChangeMeanAccuracy, percentageChangeF1Score]

## <span style="color:#69140e">Gaussian Naive Bayes</span>

In [68]:
modelStr = 'Gaussian Naive Bayes'

pipe = Pipeline(steps=[("scaler", StandardScaler()), ('gaussiannb', GaussianNB())])

param_grid = {"gaussiannb__var_smoothing": list(np.arange(1e-11,1e-7,1e-9 - 1e-13))}
search = HalvingGridSearchCV(pipe, param_grid, n_jobs=-1)
search.fit(Xtrain, ytrain)
pd.DataFrame(search.cv_results_)

pipe = Pipeline(steps=[("scaler", StandardScaler()), ('gaussiannb', search.best_estimator_['gaussiannb'])])
startTime = time.time()
pipe.fit(Xtrain,ytrain)
stopTime = time.time()
trainingTime = stopTime - startTime

startTime = time.time()
accuracy = pipe.score(X_test, y_test)
stopTime = time.time()

accuracyTestTime = stopTime - startTime

startTime = time.time()
f1Score = f1_score(y_test, model.predict(X_test), average='macro')
stopTime = time.time()

f1ScoreTestTime = stopTime - startTime
testTime = (accuracyTestTime + f1ScoreTestTime)/2

indexMetrics = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
percentageChangeMeanAccuracy = 100*(accuracy - metrics.iloc[indexMetrics]['Mean Accuracy'])/metrics.iloc[indexMetrics]['Mean Accuracy']
percentageChangeF1Score = 100*(accuracy - metrics.iloc[indexMetrics]['F1 Score'])/metrics.iloc[indexMetrics]['F1 Score']

if not (modelStr in metricsOptimalModel['Classifier'].unique()):
    metricsOptimalModel.loc[len(metricsOptimalModel)] = [modelStr, accuracy, f1Score, trainingTime, testTime, percentageChangeMeanAccuracy, percentageChangeF1Score]
else:
    indexMetricsOptimalModel = metricsOptimalModel[metricsOptimalModel['Classifier']==modelStr].index.to_list()[0] 
    metricsOptimalModel.loc[indexMetricsOptimalModel] = [modelStr, accuracy, f1Score, trainingTime, testTime, percentageChangeMeanAccuracy, percentageChangeF1Score]

## <span style="color:#69140e">kNN</span>

In [81]:
modelStr = 'kNN'

pipe = Pipeline(steps=[("scaler", StandardScaler()), ('kNN', KNeighborsClassifier())])

param_grid = {"kNN__n_neighbors": list(np.arange(8,15,1)), "kNN__weights": ['uniform', 'distance'], "kNN__algorithm": ['auto', 'ball_tree', 'kd_tree', 'brute'],\
     "kNN__leaf_size": list(np.arange(26,32,1)), "kNN__p": [1,2] }
search = HalvingGridSearchCV(pipe, param_grid, n_jobs=-1)
search.fit(Xtrain, ytrain)
pd.DataFrame(search.cv_results_)

pipe = Pipeline(steps=[("scaler", StandardScaler()), ('kNN', search.best_estimator_['kNN'])])
startTime = time.time()
pipe.fit(Xtrain,ytrain)
stopTime = time.time()
trainingTime = stopTime - startTime

startTime = time.time()
accuracy = pipe.score(X_test, y_test)
stopTime = time.time()

accuracyTestTime = stopTime - startTime

startTime = time.time()
f1Score = f1_score(y_test, model.predict(X_test), average='macro')
stopTime = time.time()

f1ScoreTestTime = stopTime - startTime
testTime = (accuracyTestTime + f1ScoreTestTime)/2

indexMetrics = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
percentageChangeMeanAccuracy = 100*(accuracy - metrics.iloc[indexMetrics]['Mean Accuracy'])/metrics.iloc[indexMetrics]['Mean Accuracy']
percentageChangeF1Score = 100*(accuracy - metrics.iloc[indexMetrics]['F1 Score'])/metrics.iloc[indexMetrics]['F1 Score']

if not (modelStr in metricsOptimalModel['Classifier'].unique()):
    metricsOptimalModel.loc[len(metricsOptimalModel)] = [modelStr, accuracy, f1Score, trainingTime, testTime, percentageChangeMeanAccuracy, percentageChangeF1Score]
else:
    indexMetricsOptimalModel = metricsOptimalModel[metricsOptimalModel['Classifier']==modelStr].index.to_list()[0] 
    metricsOptimalModel.loc[indexMetricsOptimalModel] = [modelStr, accuracy, f1Score, trainingTime, testTime, percentageChangeMeanAccuracy, percentageChangeF1Score]

In [82]:
metricsOptimalModel

Unnamed: 0,Classifier,Mean Accuracy,F1 Score,Training Time,Average Test Time,Percentage Change in Mean Accuracy,Percentage Change in F1 Score
0,Dummy,0.203494,0.056362,0.041907,1.298299,21.636733,19.056556
1,Gaussian Naive Bayes,0.559096,0.541778,0.084835,0.007449,-0.748596,-0.438686
2,kNN,1.0,0.541778,0.44542,0.711671,2.065044,2.145526


## <span style="color:#69140e">Logistic Regression</span>

In [83]:
modelStr = 'Logistic Regression'

pipe = Pipeline(steps=[("scaler", StandardScaler()), ('logisticReg', LogisticRegression(random_state=0))])

param_grid = {"logisticReg__C": list(np.arange(0.5,1.5,0.1)), "logisticReg__solver": ['lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga'],\
     "logisticReg__penalty": ['l1', 'l2', 'elasticnet', None]}
search = HalvingGridSearchCV(pipe, param_grid, n_jobs=-1)
search.fit(Xtrain, ytrain)
pd.DataFrame(search.cv_results_)

pipe = Pipeline(steps=[("scaler", StandardScaler()), ('logisticReg', search.best_estimator_['logisticReg'])])
startTime = time.time()
pipe.fit(Xtrain,ytrain)
stopTime = time.time()
trainingTime = stopTime - startTime

startTime = time.time()
accuracy = pipe.score(X_test, y_test)
stopTime = time.time()

accuracyTestTime = stopTime - startTime

startTime = time.time()
f1Score = f1_score(y_test, model.predict(X_test), average='macro')
stopTime = time.time()

f1ScoreTestTime = stopTime - startTime
testTime = (accuracyTestTime + f1ScoreTestTime)/2

indexMetrics = metrics[metrics['Classifier']==modelStr].index.to_list()[0] 
percentageChangeMeanAccuracy = 100*(accuracy - metrics.iloc[indexMetrics]['Mean Accuracy'])/metrics.iloc[indexMetrics]['Mean Accuracy']
percentageChangeF1Score = 100*(accuracy - metrics.iloc[indexMetrics]['F1 Score'])/metrics.iloc[indexMetrics]['F1 Score']

if not (modelStr in metricsOptimalModel['Classifier'].unique()):
    metricsOptimalModel.loc[len(metricsOptimalModel)] = [modelStr, accuracy, f1Score, trainingTime, testTime, percentageChangeMeanAccuracy, percentageChangeF1Score]
else:
    indexMetricsOptimalModel = metricsOptimalModel[metricsOptimalModel['Classifier']==modelStr].index.to_list()[0] 
    metricsOptimalModel.loc[indexMetricsOptimalModel] = [modelStr, accuracy, f1Score, trainingTime, testTime, percentageChangeMeanAccuracy, percentageChangeF1Score]



850 fits failed out of a total of 1200.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
50 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\potoupni\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\potoupni\Anaconda3\lib\site-packages\sklearn\pipeline.py", line 394, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\Users\potoupni\Anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\potoupni\Anaconda3\lib\site-packages\sklearn\linear_model\_logis

ValueError: Logistic Regression supports only solvers in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga'], got newton-cholesky.

In [80]:
LogisticRegression()._get_param_names()

['C',
 'class_weight',
 'dual',
 'fit_intercept',
 'intercept_scaling',
 'l1_ratio',
 'max_iter',
 'multi_class',
 'n_jobs',
 'penalty',
 'random_state',
 'solver',
 'tol',
 'verbose',
 'warm_start']