In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix

file_path = '/Users/adityamittal/Desktop/final/compas-scores.csv'  
df = pd.read_csv(file_path)  

# pre-processing
df = df[df['race'].isin(['African-American', 'Caucasian'])]
df = df[df['decile_score'] != 5]
df['predicted'] = df['decile_score'].apply(lambda x: 1 if x >= 6 else 0)
df['true'] = df['two_year_recid']
df['score'] = df['decile_score']

groups = ['African-American', 'Caucasian']

# equalized odds confusion matrix
for i in groups:
    subset = df[df['race'] == i]
    tn, fp, fn, tp = confusion_matrix(subset['true'], subset['predicted']).ravel()
    print(f"\nConfusion matrix for {i}:")
    print("TN:", tn)
    print("FP:", fp)
    print("FN:", fn)
    print("TP:", tp)

# probabilities for each score - sufficiency metric
scores = sorted(df['score'].unique())
print("\nProbabilities by score and group:")
for i in scores:
    print(f"\nScore = {i}")
    for group in groups:
        subset = df[(df['race'] == group) & (df['score'] == i)]
        print(f"  {group}: P(Y=1)={subset['true'].mean()}, P(Y=0)={1 - subset['true'].mean()}")


Confusion matrix for African-American:
TN: 990
FP: 616
FN: 532
TP: 1193

Confusion matrix for Caucasian:
TN: 1139
FP: 219
FN: 461
TP: 394

Probabilities by score and group:

Score = 1
  African-American: P(Y=1)=0.228643216080402, P(Y=0)=0.771356783919598
  Caucasian: P(Y=1)=0.20851688693098386, P(Y=0)=0.7914831130690161

Score = 2
  African-American: P(Y=1)=0.30279898218829515, P(Y=0)=0.6972010178117048
  Caucasian: P(Y=1)=0.31301939058171746, P(Y=0)=0.6869806094182825

Score = 3
  African-American: P(Y=1)=0.4190751445086705, P(Y=0)=0.5809248554913296
  Caucasian: P(Y=1)=0.34065934065934067, P(Y=0)=0.6593406593406593

Score = 4
  African-American: P(Y=1)=0.4597402597402597, P(Y=0)=0.5402597402597402
  Caucasian: P(Y=1)=0.39649122807017545, P(Y=0)=0.6035087719298246

Score = 6
  African-American: P(Y=1)=0.5598958333333334, P(Y=0)=0.44010416666666663
  Caucasian: P(Y=1)=0.5721649484536082, P(Y=0)=0.4278350515463918

Score = 7
  African-American: P(Y=1)=0.5925, P(Y=0)=0.4075
  Caucasian: