In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

# Load the data
data = pd.read_csv("data/compas-scores-two-years.csv")

# Keep only the relevant features and the target variable
features = ["age", "priors_count"]
target = "two_year_recid"

data = data[[*features, "race", target]]

# Splitting the data into features and target variable
X = data.drop([target, 'race'], axis=1)
y = data[target]

# Separately, store race for filtering during evaluation
races = data['race']

# Initialize lists to store metrics
overall_accuracies, black_accuracies, white_accuracies = [], [], []
overall_fprs, black_fprs, white_fprs = [], [], []
overall_fnrs, black_fnrs, white_fnrs = [], [], []

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    accuracy = accuracy_score(y_true, y_pred)
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0
    fnr = fn / (fn + tp) if (fn + tp) > 0 else 0.0
    return accuracy, fpr, fnr

# Perform the 1000 train-test splits and evaluations
for _ in range(1000):
    X_train, X_test, y_train, y_test, races_train, races_test = train_test_split(X, y, races, test_size=0.20, stratify=y)

    # Fit the logistic regression model
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Calculate and store overall metrics
    acc, fpr, fnr = calculate_metrics(y_test, y_pred)
    overall_accuracies.append(acc)
    overall_fprs.append(fpr)
    overall_fnrs.append(fnr)
    
    # Calculate and store metrics for African-American
    mask_black = races_test == 'African-American'
    acc, fpr, fnr = calculate_metrics(y_test[mask_black], y_pred[mask_black])
    black_accuracies.append(acc)
    black_fprs.append(fpr)
    black_fnrs.append(fnr)
    
    # Calculate and store metrics for Caucasian
    mask_white = races_test == 'Caucasian'
    acc, fpr, fnr = calculate_metrics(y_test[mask_white], y_pred[mask_white])
    white_accuracies.append(acc)
    white_fprs.append(fpr)
    white_fnrs.append(fnr)

# Calculate average and confidence intervals
def calculate_average_and_confidence_interval(metric_list):
    average = np.mean(metric_list)
    confidence_interval = np.percentile(metric_list, [2.5, 97.5])
    return average, confidence_interval

# Output the results
results = {}
results['Overall Accuracy'], results['Overall Accuracy CI'] = calculate_average_and_confidence_interval(overall_accuracies)
results['Black Accuracy'], results['Black Accuracy CI'] = calculate_average_and_confidence_interval(black_accuracies)
results['White Accuracy'], results['White Accuracy CI'] = calculate_average_and_confidence_interval(white_accuracies)

results['Overall FPR'], results['Overall FPR CI'] = calculate_average_and_confidence_interval(overall_fprs)
results['Black FPR'], results['Black FPR CI'] = calculate_average_and_confidence_interval(black_fprs)
results['White FPR'], results['White FPR CI'] = calculate_average_and_confidence_interval(white_fprs)

results['Overall FNR'], results['Overall FNR CI'] = calculate_average_and_confidence_interval(overall_fnrs)
results['Black FNR'], results['Black FNR CI'] = calculate_average_and_confidence_interval(black_fnrs)
results['White FNR'], results['White FNR CI'] = calculate_average_and_confidence_interval(white_fnrs)

for metric, value in results.items():
    if isinstance(value, np.ndarray):
        print(f"{metric}: {value[0]*100:.2f}%-{value[1]*100:.2f}%\n")
    else:
        print(f"{metric}: {value*100:.2f}%")

# The 'results' dictionary now contains all the average metrics and their confidence intervals


Overall Accuracy: 67.73%
Overall Accuracy CI: 65.84%-69.72%

Black Accuracy: 67.49%
Black Accuracy CI: 64.51%-70.57%

White Accuracy: 67.42%
White Accuracy CI: 63.76%-70.88%

Overall FPR: 19.35%
Overall FPR CI: 16.52%-22.19%

Black FPR: 26.71%
Black FPR CI: 22.50%-31.21%

White FPR: 13.41%
White FPR CI: 9.67%-17.05%

Overall FNR: 48.02%
Overall FNR CI: 44.46%-51.85%

Black FNR: 37.98%
Black FNR CI: 33.25%-42.90%

White FNR: 62.14%
White FNR CI: 56.11%-68.32%

