In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression # import model
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score, precision_recall_fscore_support, precision_score

df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
df['Male'] = df['Sex'] == 'male' # create new column 'male' with Boolean 0/1 values corresponding to 'Sex' strings

X = df[['Pclass', 'Male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
y = df['Survived'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=5) # lock the random split with 'random_state=5'

model = LogisticRegression() # instantiate model
model.fit(X_train, y_train) # fit on training data
y_pred = model.predict(X_test) # test on testing data

## ROC curve foundations

# "Logistic Regression model doesn’t just return a prediction, but also a probability value between 0 and 1. Typically, we say if the value is >=0.5, we predict the passenger survived, and if the value is <0.5, the passenger didn’t survive. However, we could choose any threshold between 0 and 1."

# precision = percentage of model's positive predictions that are correct
# recall = percentage of actual positives that were predicted positively

print("metrics for model w/ default .5 threshold:")
print("precision:", precision_score(y_test, y_pred))
print("recall:", recall_score(y_test, y_pred))


metrics for model w/ default .5 threshold:
precision: 0.835820895522388
recall: 0.6829268292682927


In [2]:
# "If we make the threshold higher, we’ll have fewer positive predictions, but our positive predictions are more likely to be correct. This means that the precision would be higher and the recall lower. If we make the threshold lower, we’ll have more positive predictions, so we’re more likely to catch all the positive cases. This means that the recall would be higher and the precision lower."

# *"Each choice of a threshold is a different model. ROC (Receiver operating characteristic) Curve is a graph showing all of the possible models and their performance."*

# higher threshold = higher precision, lower recall
# lower threshold = lower precision, higher recall

# Another name for recall is sensitivity. 
# sensitivity is true positive rate. specificity is true negative rate. 
# sensitivity VS specificity demonstrate same trade-off as recall VS precision. 

#      A -     TN | FP
#      c       ___|___
#      t          |
#      u +     FN | TP  
#      a       
#      l       -     +
#          P r e d i c t e d

# sensitivity = positives predicted correctly / all positive cases = TP / (TP + FN)
# specificity = negatives predicted correctly / all negative cases = TN/ (TN + FP)

print("confusion matrix:")
print(confusion_matrix(y_test, y_pred)) # test set: 82 actual positives, 140 actual negatives
print()

#      A -    129 | 11
#      c       ___|___
#      t          |
#      u +     26 | 56  
#      a       
#      l       -     +
#          P r e d i c t e d


confusion matrix:
[[129  11]
 [ 26  56]]



In [3]:
# Sensitivity = TP / (TP + FN) = 56 / (56 + 26) = 56 / 82 = .6829
# Sensitivity calculated w/ scikit-learn:
sensitivity_score = recall_score # sensitivity is the same as recall so we assign imported 'recall_score' function to new function we name 'sensitivity_score'
print("sensitivity:", sensitivity_score(y_test, y_pred))

# Specificity = TN / (TN + FP) = 129 / (129 + 11) = 129 / 140 = .9214 
# Specificity calculated w/ scikit-learn:
def specificity_score(y_true, y_predict): # defining new function to get specificity 
    p, r, f, s = precision_recall_fscore_support(y_true, y_predict) # imported 'precision_recall_fscore_support' function returns a list of 4 arrays of values for those 4 metrics. assigning each array to variables 'p' 'r' 'f' 's' respectively.
    return r[0] # 'r' contains array of recall values for negative class, positive class. 'r[0]' selects first value, recall value for negative class. recall for negative class = specificity, so we have our specificity score!
print("specificity:", specificity_score(y_test, y_pred))
print()


sensitivity: 0.6829268292682927
specificity: 0.9214285714285714



In [10]:
# "goal is to maximize these two values, though generally making one larger makes the other lower. depends on the situation whether we put more emphasis on sensitivity or specificity. (We graph sensitivity VS specifity curve as proxy for recall VS precision curve)

## adjusting logistic regression threshold

probabilities = model.predict_proba(X_test)  # 'predict_proba()' gives us probability that each data point is in 0 class (didn't surive) or 1 class (survived). notice they add up to 100%. 
print("[prob. died, prob. survived]")
print(probabilities[:4]) # show first four prediction probabilitities 
print()

probabilities = model.predict_proba(X_test)[:,1] # we are only concernced with second value, probability of survival. 
print("probability of survival:")
print(probabilities[:4]) # show first four predicition probabilties 
print()

y_pred = model.predict(X_test) # Models' default threshold is .5 ( > .5 = survived)
print("0.5 threshold:", y_pred[:4]) # show first four predictions

y_pred = probabilities > 0.75 # changing threshold to .75 'y_pred' is array of boolean True(1)/False(0) values indicating if each datapoint met the new threshold for survival
print("0.75 threshold:", y_pred[:4]) # show first four predictions
# "a threshold of 0.75 means we need to be more confident in order to make a positive prediction. This results in fewer positive predictions and more negative predictions"


[prob. died, prob. survived]
[[0.46054643 0.53945357]
 [0.8881001  0.1118999 ]
 [0.13520592 0.86479408]
 [0.62497128 0.37502872]]

probability of survival:
[0.53945357 0.1118999  0.86479408 0.37502872]

0.5 threshold: [1 0 1 0]
0.75 threshold: [False False  True False]


In [11]:
print("metrics for model w/ .75 threshold:")
print("precision:", precision_score(y_test, y_pred))
print("recall:", recall_score(y_test, y_pred))
print("sensitivity:", sensitivity_score(y_test, y_pred))
print("specificity:", specificity_score(y_test, y_pred))
print()
print("confusion matrix:")
print(confusion_matrix(y_test, y_pred)) # notice same test set so still 82 actual positives, 140 actual negatives, but some different predictions


## plot ROC curve (not shown here)

# code and comments by github.com/alandavidgrunberg


metrics for model w/ .75 threshold:
precision: 0.9230769230769231
recall: 0.43902439024390244
sensitivity: 0.43902439024390244
specificity: 0.9785714285714285

confusion matrix:
[[137   3]
 [ 46  36]]
