In [140]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, plot_confusion_matrix, plot_roc_curve, accuracy_score, recall_score, precision_score, balanced_accuracy_score, roc_auc_score
import pandas as pd
import matplotlib.pyplot as plt


In [141]:
#load data from CSV file
data = pd.read_csv("/Users/catherinebetancourt-lee/Desktop/fetal_health.csv.csv")

In [142]:
#Merging classification classes into binary 
data['fetal_health'] = data['fetal_health'].replace(1.0,0)
data['fetal_health'] = data['fetal_health'].replace(2.0,0)
data['fetal_health'] = data['fetal_health'].replace(3.0,1)

In [143]:
#Separate data and target variables
X = data.drop('fetal_health', axis = 1)
X
y = data['fetal_health']
y

0       0
1       0
2       0
3       0
4       0
       ..
2121    0
2122    0
2123    0
2124    0
2125    0
Name: fetal_health, Length: 2126, dtype: int64

In [144]:
#split model into training and testing sets 
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=142)

In [145]:
#We need to create the LASSO model
#penalty = L1 to apply the regularization, and then set saga for larger datasets.
#C = inverse of regularization strength to 0.1
lasso_model = LogisticRegression(penalty = 'l1', solver='saga', C=0.1)

In [146]:
#fit the model into the training data
lasso_model.fit(X_train, y_train)



LogisticRegression(C=0.1, penalty='l1', solver='saga')

In [147]:
#predict on testing set
y_pred = lasso_model.predict(X_test)


In [148]:
#metrics
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
f1 = f1_score(y_test, y_pred)
print("F1 Score", f1)
balanced_acc = balanced_accuracy_score(y_test, y_pred)
print("Balanced Accuracy:", balanced_acc)
recall = recall_score(y_test, y_pred)
print("Recall Score:", recall)
precision = precision_score(y_test, y_pred)
print("Precision Score:", precision)
auc = roc_auc_score(y_test, y_pred)
print("AUC Score:", auc)
cm = confusion_matrix(y_test, y_pred)
print("Confusion matrix:")
print(cm)

Accuracy: 0.9624413145539906
F1 Score 0.7419354838709677
Balanced Accuracy: 0.8234563390573622
Recall Score: 0.6571428571428571
Precision Score: 0.8518518518518519
AUC Score: 0.823456339057362
Confusion matrix:
[[387   4]
 [ 12  23]]
