# Tune Hyperparameters

In [1]:
import numpy as np
import pandas as pd
from biosppy.signals import ecg
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import make_scorer
from sklearn.impute import SimpleImputer

In [2]:
def fill_missing_values(X, n_neighbors = 75, method="KNN"): 
    
    # normalization
    X_std = np.nanstd(X,axis=0,keepdims=True)
    X_ave = np.nanmean(X,axis=0,keepdims=True)
    X_norma = (X-X_ave)/X_std
    
    # use KNNImputer
    imputer = KNNImputer(missing_values=np.nan, n_neighbors=n_neighbors, weights = 'distance') if method=="KNN"\
        else SimpleImputer(missing_values=np.nan, strategy='median')
    
    X_norma_fixed = imputer.fit_transform(X_norma)
    
    return X_norma_fixed

In [3]:
X_train_data = pd.read_csv('X_train_feature.csv')
y_train_data = pd.read_csv('y_train.csv')
X_test_data = pd.read_csv('X_test_feature.csv')

indices_test = np.array(X_test_data)[:,0]
indices_train = np.array(X_train_data)[:,0]
X_test = np.array(X_test_data)[:,1:]
y_train = np.array(y_train_data)[:,1]
X_train = np.array(X_train_data)[:,1:]
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)

(5117, 936)
(5117,)
(3411, 936)


In [4]:
X_train = fill_missing_values(X_train, method="median")
X_test = fill_missing_values(X_test, method="median")

In [5]:
'''
use random subset of initial dataframe X for model selection  
'''

X_train = pd.DataFrame(X_train) 
X_train['y'] = y_train
X_sub = pd.DataFrame(X_train).sample(frac = 0.40, replace = False, axis = 0)
y_sub = X_sub['y']
X_sub = X_sub.drop('y', axis = 1).values
X_train = X_train.drop('y', axis = 1)
print(y_sub.shape, X_sub.shape)

'''define score function'''
scorer_f1 = make_scorer(f1_score, greater_is_better = True, average = 'micro')

(2047,) (2047, 936)


In [6]:
steps = [("classifier", AdaBoostClassifier())]
pipeline = Pipeline(steps = steps)

parameters = {
              "classifier__n_estimators": [200,250,300],
              "classifier__learning_rate": [0.1,0.08,0.05,0.03],
              "classifier__base_estimator": [SVC(), None]
             }

grid = GridSearchCV(pipeline, parameters, cv = 2, scoring = scorer_f1, verbose = 1)
grid.fit(X_train, y_train)

print(grid.best_score_)
print(grid.best_params_)

Fitting 2 folds for each of 24 candidates, totalling 48 fits


Traceback (most recent call last):
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 443, in fit
    return super().fit(X, y, sample_weight)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 117, in fit
    self._validate_estimator()
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 453, in _validate_estimator
    raise TypeError(
TypeError: AdaBoostClassifier with algorithm='SAMME.R' requires that the weak learner supports t

Traceback (most recent call last):
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 443, in fit
    return super().fit(X, y, sample_weight)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 117, in fit
    self._validate_estimator()
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 453, in _validate_estimator
    raise TypeError(
TypeError: AdaBoostClassifier with algorithm='SAMME.R' requires that the weak learner supports t

Traceback (most recent call last):
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 443, in fit
    return super().fit(X, y, sample_weight)
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 117, in fit
    self._validate_estimator()
  File "/Users/zdh/anaconda3/envs/aml_project/lib/python3.8/site-packages/sklearn/ensemble/_weight_boosting.py", line 453, in _validate_estimator
    raise TypeError(
TypeError: AdaBoostClassifier with algorithm='SAMME.R' requires that the weak learner supports t

0.7332425439838728
{'classifier__base_estimator': None, 'classifier__learning_rate': 0.1, 'classifier__n_estimators': 300}
