In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, roc_curve, auc

In [2]:
# Set seed
np.random.seed(420)

In [3]:
# Generate weight and obesity datasets
num_samples = 100
weight = np.sort(np.random.normal(loc=172, scale=29, size=num_samples))
obese = (np.random.rand(num_samples) < (np.argsort(weight) / num_samples)).astype(int)

In [8]:
weight.reshape(-1, 1).shape

(100, 1)

In [4]:
# Fit a logistic regression model to the data
logistic_model = LogisticRegression()
logistic_model.fit(weight.reshape(-1, 1), obese)

In [9]:
# Generate predicted probabilities and classes
predicted_probabilities = logistic_model.predict_proba(weight.reshape(-1, 1))[:, 1]
predicted_classes = (predicted_probabilities > 0.5).astype(int)

In [10]:
# Determine correct and incorrect predictions
correct_predictions = predicted_classes == obese

In [12]:
# Weight vs Obese classification plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=weight, y=obese, mode='markers',
                         marker=dict(color=['black' if x else 'red' for x in correct_predictions]),
                         name='Predictions'))
fig.add_trace(go.Scatter(x=weight, y=predicted_probabilities, mode='lines', line=dict(color='blue', width=2),
                         name='Logistic Fit'))
fig.update_layout(title='Weight vs Obese Classification',
                  xaxis_title='Weight',
                  yaxis_title='Obese')

In [27]:
# Plot the confusion matrix
conf_matrix = confusion_matrix(obese, predicted_classes)

fig = px.imshow(conf_matrix, text_auto=True, title='Confusion Matrix for Logistic Regression',
          x=['Predicted Positive', 'Predicted Negative'],
          y=['Actual Positive', 'Actual Negative'])

fig.update_xaxes(side='top')
fig.update_coloraxes(showscale=False)
fig.show()

In [29]:
# Compute ROC for Logistic Regression
fpr_log, tpr_log, thresholds_log = roc_curve(obese, predicted_probabilities)
roc_auc_log = auc(fpr_log, tpr_log)

In [32]:
# Fit Random Forest model and Compute ROC for that model
rf_model = RandomForestClassifier(random_state=420)
rf_model.fit(weight.reshape(-1, 1), obese)

rf_probabilities = rf_model.predict_proba(weight.reshape(-1, 1))[:, 1]

fpr_rf, tpr_rf, thresholds_rf = roc_curve(obese, rf_probabilities)
roc_auc_rf = auc(fpr_rf, tpr_rf)

In [35]:
# Create plotly ROC curve for logistic regression
fig_roc = go.Figure()
fig_roc.add_trace(go.Scatter(x=fpr_log, y=tpr_log, mode='lines', line=dict(color='#377eb8', width=2),
                             name=f'Logistic Regression (AUC = {roc_auc_log:.2f})'))
fig_roc.add_trace(go.Scatter(x=fpr_rf, y=tpr_rf, mode='lines', line=dict(color='#4daf4a', width=2),
                             name=f'Random Forest (AUC = {roc_auc_rf:.2f})'))
fig_roc.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(color='gray', dash='dash'),
                             showlegend=False))
fig_roc.update_layout(title='ROC Curve',
                      xaxis_title='False Positive Rate',
                      yaxis_title='True Positive Rate',
                      legend_title='Model')

fig_roc.show()