# Recap

In [1]:
import pandas as pd

data = pd.read_csv("data/electrocardiograms.csv")

data.head()

Unnamed: 0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10,...,x_179,x_180,x_181,x_182,x_183,x_184,x_185,x_186,x_187,target
0,0.0,0.041199,0.11236,0.146067,0.202247,0.322097,0.363296,0.413858,0.426966,0.485019,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,1.0,0.901786,0.760714,0.610714,0.466071,0.385714,0.364286,0.346429,0.314286,0.305357,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.9942,1.0,0.951276,0.903712,0.917633,0.900232,0.803944,0.656613,0.421114,0.288863,...,0.294664,0.295824,0.301624,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0.984472,0.962733,0.663043,0.21118,0.0,0.032609,0.100932,0.177019,0.270186,0.313665,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.619217,0.489324,0.327402,0.11032,0.0,0.060498,0.108541,0.108541,0.145907,0.192171,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


## Base Logistic Regression

👇 Cross-validate the recall score of a Logistic Regression model

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate

# Ready X and y
X = data.loc[:, 'x_1':'x_187']
y = data['target']

# Cross validate model
log_cv_results = cross_validate(LogisticRegression(max_iter=1000),
                                X, y, 
                                cv=10, 
                                scoring = ['recall'])

#  Recall
log_cv_results['test_recall'].mean()

0.32938697318007665

👇 Generate a classification report for the model.

In [3]:
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_predict

y_pred = cross_val_predict(LogisticRegression(max_iter=1000), X, y)

print(classification_report(y,y_pred))

              precision    recall  f1-score   support

           0       0.95      0.99      0.97     18117
           1       0.68      0.33      0.45      1448

    accuracy                           0.94     19565
   macro avg       0.82      0.66      0.71     19565
weighted avg       0.93      0.94      0.93     19565



## Weighted classes Logistic Regression

👇 Cross-validate the recall score of a Logistic Regression model with weighted classes

In [4]:
# Cross validate model
log_cv_results = cross_validate(LogisticRegression(max_iter=1000, class_weight = "balanced"),
                                X, y, 
                                cv=10, 
                                scoring = ['recall'])

#  Recall
log_cv_results['test_recall'].mean()

0.8638793103448276

👇 Generate a classification report for the model

In [5]:
y_pred = cross_val_predict(LogisticRegression(max_iter=1000,class_weight = "balanced"), X, y)

print(classification_report(y,y_pred))

              precision    recall  f1-score   support

           0       0.99      0.84      0.91     18117
           1       0.31      0.87      0.45      1448

    accuracy                           0.85     19565
   macro avg       0.65      0.86      0.68     19565
weighted avg       0.94      0.85      0.88     19565



## Threshold Adjustment

👇 Find the threshold that would guarantee a 95% recall

In [6]:
from sklearn.metrics import precision_recall_curve

# Predict probabilities
y_pred_probas_0, y_pred_probas_1 = cross_val_predict(LogisticRegression(max_iter=1000,class_weight = "balanced"),
                                                     X, y,
                                                     method = "predict_proba").T

# Generate recall and thresholds using probabilities for class 1
precision, recall, thresholds = precision_recall_curve(y, y_pred_probas_1)

# Populate dataframe with recall and threshold
df_recall = pd.DataFrame({"recall" : recall[:-1], "threshold" : thresholds})

# Find out which threshold guarantees a recall equal or higher than the one of the LogisticRegression
new_threshold = df_recall[df_recall['recall'] >= 0.95]['threshold'].max()

new_threshold

0.2702275008250131

## Custom Predict

👇 Make custom predictions using the new threshold

In [7]:
model = LogisticRegression(max_iter=1000,class_weight = "balanced").fit(X,y)

data['proba_1'] = cross_val_predict(model, X,y,method= 'predict_proba')[:,1]

def custom_predict(proba):
    if proba >= new_threshold:
        val = 1
    else:
        val = 0
    return val

data['custom_predict'] = data.proba_1.apply(custom_predict)

data.head()

Unnamed: 0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10,...,x_181,x_182,x_183,x_184,x_185,x_186,x_187,target,proba_1,custom_predict
0,0.0,0.041199,0.11236,0.146067,0.202247,0.322097,0.363296,0.413858,0.426966,0.485019,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.364581,1
1,1.0,0.901786,0.760714,0.610714,0.466071,0.385714,0.364286,0.346429,0.314286,0.305357,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.781967,1
2,0.9942,1.0,0.951276,0.903712,0.917633,0.900232,0.803944,0.656613,0.421114,0.288863,...,0.301624,0.0,0.0,0.0,0.0,0.0,0.0,1,0.975079,1
3,0.984472,0.962733,0.663043,0.21118,0.0,0.032609,0.100932,0.177019,0.270186,0.313665,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.272998,1
4,0.619217,0.489324,0.327402,0.11032,0.0,0.060498,0.108541,0.108541,0.145907,0.192171,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.504505,1


👇 Generate a classification report for the model

In [8]:
print(classification_report(data.target, data.custom_predict))

              precision    recall  f1-score   support

           0       0.99      0.72      0.84     18117
           1       0.22      0.95      0.35      1448

    accuracy                           0.74     19565
   macro avg       0.61      0.84      0.59     19565
weighted avg       0.94      0.74      0.80     19565

