In [None]:
def specificity_score(y_true, y_pred):
    return precision_recall_fscore_support(y_true, y_pred)[1][0]

In [1]:
# Import statements
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, precision_recall_fscore_support

# Load the data into a dataframe
dataframe = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
# Convert the sex column into boolean values
dataframe['male'] = dataframe['Sex'] == 'male'

# Split the data into the features (2d numpy array) and target (1d numpy array)
x = dataframe[['Pclass', 'Age', 'Siblings/Spouses','Parents/Children', 'Fare', 'male']].values
y = dataframe['Survived'].values

print(x)

# Split the data into train and test
x_train, x_test, y_train, y_test = train_test_split(x, y)

print('whole dataset:', x.shape, y.shape)
print('training set:', x_train.shape,  y_train.shape)
print('test set:', x_test.shape, y_test.shape)

# Build the model using the training set
model = LogisticRegression()
model.fit(x_train, y_train)

# Evaluate the model using the test set
y_pred = model.predict(x_test)

print('\naccuracy:', accuracy_score(y_test, y_pred))
print('precision:', precision_score(y_test, y_pred))
print('recall:', recall_score(y_test, y_pred))
print('f1 score:', f1_score(y_test, y_pred))

# sklearn does not have sensitivity or specificity
sensitivity_score = recall_score
print('\nsensitivity:', sensitivity_score(y_test, y_pred))
print('specificity:', specificity_score(y_test, y_pred))

[[3 22.0 1 0 7.25 True]
 [1 38.0 1 0 71.2833 False]
 [3 26.0 0 0 7.925 False]
 ...
 [3 7.0 1 2 23.45 False]
 [1 26.0 0 0 30.0 True]
 [3 32.0 0 0 7.75 True]]
whole dataset: (887, 6) (887,)
training set: (665, 6) (665,)
test set: (222, 6) (222,)

accuracy: 0.7702702702702703
precision: 0.7471264367816092
recall: 0.6914893617021277
f1 score: 0.7182320441988951

sensitivity: 0.6914893617021277


NameError: ignored

In [None]:
# Set the classification threshold to be 0.75 instead of 0.5
y_pred = model.predict_proba(x_test)[:, 1] > 0.75

print('\naccuracy:', accuracy_score(y_test, y_pred))
print('precision:', precision_score(y_test, y_pred))
print('recall:', recall_score(y_test, y_pred))
print('f1 score:', f1_score(y_test, y_pred))

# sklearn does not have sensitivity or specificity
print('\nsensitivity:', sensitivity_score(y_test, y_pred))
print('specificity:', specificity_score(y_test, y_pred))


accuracy: 0.7792792792792793
precision: 0.9230769230769231
recall: 0.43902439024390244
f1 score: 0.5950413223140497

sensitivity: 0.43902439024390244
specificity: 0.9785714285714285
