Lee-Or Bentovim
8/4/23

In order to load connected file, run the below two cells in order. If any updates are made to helpers.py, you will need to run BOTH in order to get the updates on this page

In [None]:
%run helpers.py

In [None]:
main()

In [None]:
solicit_inputs()

## Below here is the data used to determine this was the best model

In [None]:
X = model_data.drop(columns=['Amount Given','Contributed'])
y = model_data.loc[:,'Contributed']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [None]:
lr = LogisticRegressionCV(max_iter=1000)

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)

In [None]:
X_train_oversampled, y_train_oversampled = smote.fit_resample(X_train, y_train)

In [None]:
lr.fit(X_train_oversampled, y_train_oversampled)

In [None]:
y_pred = lr.predict(X_test)

In [None]:
pred_probs = lr.predict_proba(X_test)[:,1]

In [None]:
from sklearn.calibration import calibration_curve
fraction_of_pos, mean_pred = calibration_curve(y_test,pred_probs)

import matplotlib.pyplot as plt
plt.plot(mean_pred, fraction_of_pos, marker='o', linestyle='-')
plt.plot([0,1],[0,1], label='Perfect Calibration')
plt.xlabel('Mean Predicted Probability')
plt.ylabel('Fraction of Positive Predictions')
plt.title('Calibration Curve')
plt.legend()
plt.show()

In [None]:
from sklearn.calibration import CalibratedClassifierCV
lr.fit(X_train_oversampled, y_train_oversampled)
platt_calibrated_model = CalibratedClassifierCV(lr, method='sigmoid', cv='prefit')

In [None]:
platt_calibrated_model.fit(X_train_oversampled, y_train_oversampled)

In [None]:
platt_calibrated_probs = platt_calibrated_model.predict_proba(X_test)[:,1]

In [None]:
from sklearn.isotonic import IsotonicRegression
isotonic_model = IsotonicRegression()
isotonic_model.fit(platt_calibrated_probs, y_test)
final_calibrated_probs = isotonic_model.transform(platt_calibrated_probs)

In [None]:
fraction_of_pos, mean_pred = calibration_curve(y_test,final_calibrated_probs)

import matplotlib.pyplot as plt
plt.plot(mean_pred, fraction_of_pos, marker='o', linestyle='-')
plt.plot([0,1],[0,1], label='Perfect Calibration')
plt.xlabel('Mean Predicted Probability')
plt.ylabel('Fraction of Positive Predictions')
plt.title('Calibration Curve')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import log_loss
log_loss(y_test,pred_probs)

In [None]:
from sklearn.metrics import roc_curve, auc
fpr, tpr, _ = roc_curve(y_test, final_calibrated_probs)
roc_auc = auc(fpr,tpr)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC Curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0,1.0])
plt.ylim([0.0,1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('AUC ROC Curve')
plt.show()

In [None]:
sklearn.metrics.brier_score_loss(y_test, final_calibrated_probs)

In [None]:
roc_auc

In [None]:
X_test['Predicted_Probs'] = final_calibrated_probs

In [None]:
X_test = X_test.sort_values('Predicted_Probs',ascending=False)

In [None]:
X_test

In [None]:
X_test.loc[:,'Predicted_Probs'].nunique()