# Import Packages and libraries

In [None]:
import pandas as pd
import numpy as np
import sklearn
import pycaret
import imblearn as im
from pandas.api.types import CategoricalDtype
from pycaret.classification import *
#from imblearn.over_sampling import ADASYN, BorderlineSMOTE, KMeansSMOTE, RandomOverSampler
#from imblearn.over_sampling import SMOTE,SMOTENC, SVMSMOTE
from imblearn.over_sampling import *
from imblearn.under_sampling import *
from imblearn.combine import SMOTEENN, SMOTETomek
from imblearn.under_sampling import RandomUnderSampler, CondensedNearestNeighbour, EditedNearestNeighbours, RepeatedEditedNearestNeighbours,AllKNN, InstanceHardnessThreshold, NearMiss, NeighbourhoodCleaningRule, OneSidedSelection, TomekLinks, ClusterCentroids
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
import os
os.environ["GIT_PYTHON_REFRESH"] = "quiet"
import git

# Reading the TAVR Data

In [None]:
data = pd.read_csv('Data') # reading the csv containing all the data
data.columns = data.columns.str.lower()

data.head() # printing the first five rows of the data

In [None]:
data = data.dropna(how='any', subset=['race','zipinc_qrtl','elective'])

In [None]:
data.info()

In [None]:
#data.shape

In [None]:
data.drop(data.columns[[0,2,16,17,19,22,24,49,50,56,57,58,59]],axis=1, inplace=True) #All 45 All
data.info()

In [None]:
#data.shape

In [None]:
# converting all columns of type object to categorical
data.loc[:, data.dtypes == 'object'] =\
    data.select_dtypes(['object'])\
    .apply(lambda x: x.astype('category'))

In [None]:
#data.info()

# Research question 2

In [None]:
train = data[data['year']< 2019]
test = data[data['year']>= 2019]

In [None]:
#train.shape, test.shape

In [None]:
#train.to_csv('tarin2019.csv')

In [None]:
#test.to_csv('test2019.csv')

In [None]:
#train.info()

In [None]:
#test.info()

In [None]:
train.drop(train.columns[[0]],axis=1, inplace=True)
#train.info()

In [None]:
test.drop(test.columns[[0]],axis=1, inplace=True)
#test.info()

## RandomOverSampler

In [None]:
ros = RandomOverSampler(sampling_strategy='minority')
ros_setup = setup(data=train,test_data= test, target = 'died', preprocess = True, imputation_type = 'iterative', categorical_imputation = 'constant',
                      handle_unknown_categorical = True, fix_imbalance=True, fix_imbalance_method=ros, fold_strategy = 'stratifiedkfold', fold = 5, n_jobs = -1,
                      session_id=2022, experiment_name='tavr_ros', log_experiment=True ,normalize = True, normalize_method ='minmax',feature_selection= True, remove_multicollinearity= True)

In [None]:
# adding specificity and balanced accuracy to the computed metrics
add_metric('Spec', 'Spec.', im.metrics.specificity_score)
add_metric('Balanced Accuracy', 'Balanced Accuracy', sklearn.metrics.balanced_accuracy_score)

### Logistic Regression (lr)

In [None]:
lr = create_model('lr')

In [None]:
#Tune a Model
tuned_lr = tune_model(lr,optimize = 'Balanced Accuracy')

In [None]:
#AUC Plot
#plot_model(tuned_lr, plot = 'auc')
#Precision-Recall Curve
#plot_model(tuned_lr, plot = 'pr')
# Feature Importance Plot
#plot_model(tuned_lr, plot='feature')
# Confusion Matrix
#plot_model(tuned_lr, plot = 'confusion_matrix')

#evaluate_model(tuned_lr)

In [None]:
#Predict on test / hold-out Sample
predict_model(tuned_lr);

In [None]:
predict_model(lr);

In [None]:
# Confusion Matrix
#plot_model(tuned_lr, plot = 'confusion_matrix')

In [None]:
#plot_model(tuned_lr, plot='feature_all')

In [None]:
#interpret_model(tuned_lr, plot = 'reason', observation = 12)

### Linear Discriminant Analysis(lda)

In [None]:
lda = create_model('lda')

In [None]:
#Tune a Model
tuned_lda = tune_model(lda,optimize = 'Balanced Accuracy')

In [None]:
#AUC Plot
#plot_model(tuned_lda, plot = 'auc')
#Precision-Recall Curve
#plot_model(tuned_lda, plot = 'pr')
# Feature Importance Plot
#plot_model(tuned_lda, plot='feature')
# Confusion Matrix
#plot_model(tuned_lda, plot = 'confusion_matrix')

#evaluate_model(tuned_lda)

In [None]:
#Predict on test / hold-out Sample
predict_model(tuned_lda);

In [None]:
predict_model(lda);

In [None]:
# Confusion Matrix
#plot_model(lda, plot = 'confusion_matrix')

In [None]:
#plot_model(lda, plot='feature_all')

### Gradient Boosting Classifier (gbc)

In [None]:
gbc = create_model('gbc',ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=200,
                           n_iter_no_change=None, presort='deprecated', subsample=1.0, tol=0.0001,
                           validation_fraction=0.1,warm_start=False)

In [None]:
#Tune a Model
tuned_gbc = tune_model(gbc,optimize = 'Balanced Accuracy', n_iter= 100)

In [None]:
#AUC Plot
#plot_model(tuned_gbc, plot = 'auc')
#Precision-Recall Curve
#plot_model(tuned_gbc, plot = 'pr')
# Feature Importance Plot
#plot_model(tuned_gbc, plot='feature')
# Confusion Matrix
#plot_model(tuned_gbc, plot = 'confusion_matrix')

#evaluate_model(tuned_gbc)

In [None]:
#Predict on test / hold-out Sample
predict_model(tuned_gbc);

In [None]:
predict_model(gbc);

In [None]:
# Confusion Matrix
#plot_model(tuned_gbc, plot = 'confusion_matrix')

In [None]:
#plot_model(tuned_gbc, plot='feature_all')

### CatBoost Classifier (catboost)

In [None]:
#catboost = create_model('catboost',class_weights={0:6, 1:9},max_depth=3)
catboost = create_model('catboost')

In [None]:
#Tune a Model
tuned_catboost = tune_model(catboost,optimize = 'Balanced Accuracy',n_iter= 100)

In [None]:
#AUC Plot
#plot_model(tuned_catboost, plot = 'auc')
#Precision-Recall Curve
#plot_model(tuned_catboost, plot = 'pr')
# Feature Importance Plot
#plot_model(tuned_catboost, plot='feature')
# Confusion Matrix
#plot_model(tuned_catboost, plot = 'confusion_matrix')

#evaluate_model(tuned_catboost)

In [None]:
#Predict on test / hold-out Sample
predict_model(tuned_catboost);

In [None]:
predict_model(catboost);

In [None]:
# Confusion Matrix
#plot_model(catboost, plot = 'confusion_matrix')

In [None]:
#plot_model(catboost, plot='feature_all')

### Light Gradient Boosting Machine (lightgbm)

In [None]:
lightgbm = create_model('lightgbm',class_weight={0:6, 1:9},max_depth=3,num_leaves=80)

In [None]:
#Tune a Model
tuned_lightgbm = tune_model(lightgbm,optimize = 'Balanced Accuracy',n_iter= 100)

In [None]:
#AUC Plot
#plot_model(tuned_lightgbm, plot = 'auc')
#Precision-Recall Curve
#plot_model(tuned_lightgbm, plot = 'pr')
# Feature Importance Plot
#plot_model(tuned_lightgbm, plot='feature')
# Confusion Matrix
#plot_model(tuned_lightgbm, plot = 'confusion_matrix')

#evaluate_model(tuned_lightgbm)

In [None]:
#Predict on test / hold-out Sample
predict_model(tuned_lightgbm);

In [None]:
predict_model(lightgbm);

In [None]:
# Confusion Matrix
#plot_model(tuned_lightgbm, plot = 'confusion_matrix')

In [None]:
#plot_model(tuned_lightgbm, plot='feature_all')