In [None]:
!pip install scikit-learn==0.26

In [None]:
import warnings

warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest,chi2
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV, train_test_split, StratifiedKFold, cross_val_score, RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from xgboost import XGBClassifier
from pycaret.classification import *
from autosklearn.classification import AutoSklearnClassifier

### Feature Selection

In [None]:
base_df = pd.read_csv('../workdata/shura/experiment2/results.csv')
base_df = base_df[base_df['rank'] == 5]
# base_df = base_df[base_df['db'] == 'WFDB_ChapmanShaoxing']

X_df = base_df[['iter', 'fval', 'relfval', 'relstep', 'delta', 'rho', 'relerr']]
X = X_df.to_numpy()
corrmat = X_df.corr()
top_corr_features = corrmat.index
plt.figure(figsize=(10,10))
sns.heatmap(X_df[top_corr_features].corr(),annot=True,cmap="RdYlGn")

y_df = base_df[['diagnostic']]
y = y_df['diagnostic'].to_numpy()
le = LabelEncoder()
le.fit(y)
y = le.transform(y)
y_df['diagnostic'] = y
sns.pairplot(pd.concat([X_df, y_df], axis=1), hue="diagnostic")

sc = StandardScaler()
X = sc.fit_transform(X)

### GridSearch

In [None]:
########################
##        MLP         ##
########################
'''
parameter_space_mlp = {'hidden_layer_sizes': [(300, 40, 10), (300, 30), (300,)],
                       'activation': ['logistic', 'tanh', 'relu'],
                       'solver': ['sgd', 'adam'],
                       'alpha': [0.0001, 0.001, 0.005],
                       'learning_rate_init': [0.0001, 0.001, 0.01],
                       'max_iter': [200, 300, 400, 1000],
                       'early_stopping': [True, False],
                       'learning_rate': ['constant','adaptive']}
grid_mlp = GridSearchCV(MLPClassifier(), parameter_space_mlp, n_jobs=-1, cv=5)
grid_mlp.fit(X, y)
print('Best parameters found:\n', grid_mlp.best_params_, grid_mlp.best_score_)
'''


########################
##    Random Forest   ##
########################

parameter_space_rf = {'bootstrap': [True],
                      'max_depth': [80, 90, 100, 110],
                      'max_features': [2, 3],
                      'min_samples_leaf': [3, 4, 5],
                      'min_samples_split': [8, 10, 12],
                      'n_estimators': [100, 200, 300, 1000]}
grid_rf = GridSearchCV(RandomForestClassifier(), parameter_space_rf, n_jobs=-1, cv=5)
grid_rf.fit(X, y)
print('Best parameters found:\n', grid_rf.best_params_, grid_rf.best_score_)



########################
##         LDA        ##
########################

parameter_space_lda = {'solver': ['svd', 'lsqr', 'eigen'],
                       'store_covariance': [True, False],
                       'tol': [1e-05, 1e-04, 1e-03, 1e-02, 1e-01],
                       'shrinkage': ['auto', 0.01, 0.1, 1, None]}
grid_lda = GridSearchCV(LinearDiscriminantAnalysis(), parameter_space_lda, n_jobs=-1, cv=5)
grid_lda.fit(X, y)
print('Best parameters found:\n', grid_lda.best_params_, grid_lda.best_score_)



########################
##         SVM        ##
########################
'''
parameter_space_svm = {'C': [1, 10, 100, 1000],
                       'gamma': [0.01, 0.1, 0, 1], 
                       'kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 
                       'degree': [1, 2, 3],
                       'coef0': [0.001, 0, 1, 2]}

grid_svm = GridSearchCV(SVC(), parameter_space_svm, n_jobs=-1, cv=5)
grid_svm.fit(X, y)
print('Best parameters found:\n', grid_svm.best_params_, grid_svm.best_score_)
'''


########################
##       XGBOOST      ##
########################

parameter_space_xgb = {'n_estimators': [5, 10, 15],
                       'max_depth': [1, 2, 3],
                       'max_leaves': [0, 1, 2],
                       'grow_policy': ['depthwise', 'lossguide'],
                       'booster': ['gbtree', 'gblinear', 'dart'],
                       'gamma': [0.0001, 0.001, 0.01, 1],
                       'min_child_weight': [0.001, 0.01, 0.2],
                       'max_delta_step': [0.001, 0.01, 0.2],
                       'reg_alpha': [0.001, 0.01, 0.2],
                       'reg_lambda': [0.001, 0.02, 0.2],
                      }
grid_xgb = GridSearchCV(XGBClassifier(), parameter_space_xgb, n_jobs=-1, cv=5, verbose=0)
grid_xgb.fit(X, y, verbose=0)
print('Best parameters found:\n', grid_xgb.best_params_, grid_xgb.best_score_)


In [None]:
RandomForestClassifier(**grid_rf.best_params_)

# Classifier

In [None]:
## RANK 6

In [None]:
intervalo RR
SNR

In [None]:
classifiers = {}

'''
classifiers['nn'] = MLPClassifier(activation='tanh',
                                   alpha=0.005,
                                   early_stopping=False,
                                   hidden_layer_sizes=(300,30),
                                   learning_rate='adaptive',
                                   learning_rate_init=0.001,
                                   max_iter=1000,
                                   solver='adam')
'''

classifiers['rf'] = RandomForestClassifier(**grid_rf.best_params_)

classifiers['lda'] = LinearDiscriminantAnalysis(**grid_lda.best_params_)
'''
classifiers['svm'] = SVC(C=1000, 
                         gamma=0.01, 
                         degree=2, 
                         coef0=2, 
                         kernel='poly')
'''

classifiers['xgb'] = XGBClassifier(**grid_xgb.best_params_)

classifiers['sffs'] = SFS(KNeighborsClassifier(n_neighbors=3), 
                          k_features=3,
                          forward=True, 
                          floating=True, 
                          scoring='accuracy',
                          cv=10,
                          n_jobs=-1)
classifiers['sffs'] = Pipeline([('feat', classifiers['sffs']), ('clf', classifiers['xgb'])])

classifiers['sbfs'] = SFS(classifiers['rf'],
                          k_features=5,
                          forward=False, 
                          floating=True, 
                          scoring='accuracy',
                          cv=10,
                          n_jobs=-1)
classifiers['sbfs'] = Pipeline([('feat', classifiers['sbfs']), ('clf', classifiers['xgb'])])

kfold = StratifiedKFold(n_splits=10)
for alg, clf in classifiers.items():
    cv_results = cross_val_score(clf, X, y, cv=kfold, scoring='accuracy')
    print(f"{alg} {cv_results.mean()}")
    plt.boxplot(cv_results)
    plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
timings = [120, 480, 960, 3600]

for t in timings:
    automl = AutoSklearnClassifier(
        time_left_for_this_task=t,
        tmp_folder=f'/home/renan/Área de Trabalho/research-project/tmp/autosklearn_classification_example_{t}',
        n_jobs=6,
        resampling_strategy='cv',
        resampling_strategy_arguments={'folds': 5},
    )
    automl.fit(X_train, y_train)
    print(automl.leaderboard())
    predictions = automl.predict(X_test)
    print("Accuracy score:", accuracy_score(y_test, predictions))

In [None]:
data = pd.concat([X_df, y_df], axis=1)
data.shape

In [None]:
experiment = setup(data,
                   target='diagnostic',
                   data_split_stratify=True)

In [None]:
best_model = compare_models()

In [None]:
predict_model(best_model)

In [None]:
rf = create_model('rf')

In [None]:
plot_model(rf, plot='calibration')

In [None]:
plot_model(rf, plot='feature')

In [None]:
tuned_rf = tune_model(rf)

In [None]:
plot_model(tuned_rf, plot='parameter')

In [None]:
bagged_rf = ensemble_model(rf, n_estimators=10)

In [None]:
print(bagged_gbc)

In [None]:
top3 = compare_models(n_select=3)
blender_top3 = blend_models(top3)

In [None]:
print(top3)