# **Method 1-Logistic Regression with threshold:**

In [None]:
import pandas as pd
import numpy as np
from IPython.display import display
from sklearn.model_selection import train_test_split
import plotly.figure_factory as ff
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
pd.options.mode.chained_assignment = None  # default='warn'
dataURL = 'https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv'
data = pd.read_csv(dataURL)

## **Data description:**
ProPublica obtained pretrial defendant's COMPAS scores from the Broward County Sheriff’s Office in Florida in 2013 – 2014.
Each pretrial defendant received at least three COMPAS scores, each ranged from 1 to 10, with ten being the highest risk: 
1. **decile_score**- Risk of recidivism
2. **v_decile_score**- Risk of violence
3. Risk of Failure to Appear
<br>

We are provided as well 2 category based evaluation labeled as **“High”** (8 – 10), **“Medium”** (5 – 7) and **“Low”** (1 – 4):
1. **score_text**-  Risk of recidivism category
2. **v_score_text**- Risk of violence category


**days_b_screening_arrest**- number of days before COMPAS assessment being conducted 

**c_charge_degree**- the degree of the charge

**priors_count**- number of prior offences

**is_recid**- yes/no prediction of the model of whether the defendant will reoffend

**two_year_recid**- actual result over a two-year period 

**is_violent_recid**- yes/no prediction of the model of whether the defendant will have a violent offence

**juv_misd_count**- number of juvenile misdemeanor crimes

**juv_fel_count**- number of juvenile felony crimes

**juv_other_count**- number of juvenile crimes with dgree diffrent than misdemeanor or felony



## **Data preprocessing:**
We filtered the underlying data from Broward county to include only those rows representing people who had either recidivated in two years, or had at least two years outside of a correctional facility.

In [None]:
df = (data
      .loc[(data['days_b_screening_arrest'] <= 30) & (data['days_b_screening_arrest'] >= -30), :]
      .loc[data['is_recid'] != -1, :]
      .loc[data['c_charge_degree'] != 'O', :])
df.reset_index(inplace = True)
df=df[['age', 'c_charge_degree', 'race', 'age_cat', 'score_text', 'sex', 'priors_count', 'days_b_screening_arrest', 'decile_score', 'is_recid', 'two_year_recid', 'c_jail_in', 'c_jail_out','is_violent_recid','v_decile_score', 'v_score_text','juv_misd_count', 'juv_other_count','juv_fel_count']]

cat = ['score_text','age_cat','sex','race','c_charge_degree','v_score_text']

df.loc[:,cat] = df.loc[:,cat].astype('category')
df = pd.get_dummies(data = df, columns=cat)
new_column_names = [col.lstrip().rstrip().lower().replace(" ", "_").replace("-", "_") for col in df.columns]
df.columns = new_column_names
df['v_score_text_high'] = df['v_score_text_medium'] + df['v_score_text_high']
df['score_text_high'] = df['score_text_medium'] + df['score_text_high']
sensetive_feat='race'
df.head()


Unnamed: 0,age,priors_count,days_b_screening_arrest,decile_score,is_recid,two_year_recid,c_jail_in,c_jail_out,is_violent_recid,v_decile_score,...,race_asian,race_caucasian,race_hispanic,race_native_american,race_other,c_charge_degree_f,c_charge_degree_m,v_score_text_high,v_score_text_low,v_score_text_medium
0,69,0,-1.0,1,0,0,2013-08-13 06:03:42,2013-08-14 05:41:20,0,1,...,0,0,0,0,1,1,0,0,1,0
1,34,0,-1.0,3,1,1,2013-01-26 03:45:27,2013-02-05 05:36:53,1,1,...,0,0,0,0,0,1,0,0,1,0
2,24,4,-1.0,4,1,1,2013-04-13 04:58:34,2013-04-14 07:02:04,0,3,...,0,0,0,0,0,1,0,0,1,0
3,44,0,0.0,1,0,0,2013-11-30 04:50:18,2013-12-01 12:28:56,0,1,...,0,0,0,0,1,0,1,0,1,0
4,41,14,-1.0,6,1,1,2014-02-18 05:08:24,2014-02-24 12:18:30,0,2,...,0,1,0,0,0,1,0,0,1,0


## **Useful Functions:**

In [None]:
# plot the distribution of the predections of African-American and Caucasian defendants:
def plot_dist(df,pred):
  black=df[df['race_african_american']==1]
  white=df[df['race_caucasian']==1]
  y_hat_b = black[pred]
  y_hat_w = white[pred]
  hist_data = [y_hat_b, y_hat_w]
  group_labels = ['black', 'white']
  colors = ['black', "rgb(180, 180, 180)"]

  # Create distplot with curve_type set to 'normal'
  fig = ff.create_distplot(hist_data, group_labels, colors=colors,bin_size=.1, show_rug=False,curve_type='normal')
  fig.update_layout(title_text='Distribution Of The Predections Of African-American & Caucasian Defendants',title_x=0.5,width=700, height=500)
  fig.show()

# compute the probability:
def compute_prob(df,race,predection,recid,threshold):
    numerator=len(df[(df[predection]>threshold) & (df[race]==1) & (df["two_year_recid"]==recid)])
    denomurator=len(df[(df[race]==1) & (df["two_year_recid"]==recid)])
    return numerator/float(denomurator)
    
#print the probabilities of Equalized Odds pairty:
def print_prob(df,th_black=0.5,th_white=0.5,pred='probability'):
  black_recid=compute_prob(df,'race_african_american',pred,1,th_black)
  print("P[recidivism predicted | african_american,recidivism]={}".format(black_recid))

  white_recid=compute_prob(df,'race_caucasian',pred,1,th_white)
  print("P[recidivism predicted | Caucasian, recidivism]={}".format(white_recid))

  print("The diffrence:{}".format(np.abs(black_recid-white_recid)))

  print("\n")

  black_no_recid=compute_prob(df,'race_african_american',pred,0,th_black)
  print("P[recidivism predicted | african_american,no recidivism]={}".format(black_no_recid))

  white_no_recid=compute_prob(df,'race_caucasian',pred,0,th_white)
  print("P[recidivism predicted | Caucasian, no recidivism]={}".format(white_no_recid))

  print("The diffrence:{}".format(np.abs(black_no_recid-white_no_recid)))





# **Logistic Regression:**
Logistic regression models the probabilities for classification problems with two possible outcomes. For example: given the parameters, will the defendant reoffend or not?

It’s an extension of the linear regression model for classification problems.
The logistic regression model uses the logistic function - sigmoid:
<h2>
\begin{align}
        σ(w^T x_i )= \frac{\mathrm{e}^{w^Tx_i}}{1+\mathrm{e}^{w^Tx_i}}
    \end{align}
</h2>

If we feed an output value to the sigmoid function, it will return the probability of the outcome between 0 and 1. This probability is the model's confidence score to the label he predicted

If $σ(w^T x_i ) ≥ 0.5 $ then the label will be 1 (will reoffend) otherwise it will be 0 (won't reoffend).

## **Train The Model:**

In [None]:
features=['sex_female', 'age_cat_greater_than_45', 'age_cat_less_than_25','race_african_american', 'race_asian' ,'race_hispanic' ,'race_native_american' ,'race_caucasian','race_other' ,'priors_count' ,'c_charge_degree_m','juv_misd_count', 'juv_other_count','juv_fel_count']
cols=features.copy()
cols.append("two_year_recid")
X=df[cols]

# Split into train and test datasets:
train,test=train_test_split(X,test_size=0.2, random_state=42)
X_train=train[features]
y_train=train["two_year_recid"]
X_test=test[features]
y_test=test["two_year_recid"]

# Train with logistic regression:
model=LogisticRegression()
model.fit(X_train, y_train)

# Add the predctions of the training dataset:
train['probability']=model.predict_proba(X_train)[:, 1]


<h3>Lets observe the distribution of the predections of African-American and Caucasian defendants

In [None]:
plot_dist(train,pred='probability')

<h4>We can clearly see that there is a major difference between the two races

## **Equalized Odds pairty:** 
We saw that there is a clear bias in the datasets - Black defendants were more likely to be misclassified as higher risk compared to their white counterparts

In order to acheive fairness we will try to acheive Equalized Odds pairty.

**Reminder:** Equalized Odds pairty ensures parity between the subgroups of each race with label 1 in the training set, and parity between the subgroups of each race with label 0 in the training set. 

This means that the subgroups of each race who reoffended are equally likely to be predicted to reoffend. Similarly, there is parity between subgroups of each race without recidivism.

In mathematical terms:

\begin{align}
       TPR_{African-American}=TPR_{Caucasian}
    \end{align}

<h4> <center> P[predicted  recidivism |african american, recidivism]=P[predicted  recidivism|Caucasian, recidivism] </center></h4>

<h4> <center>and</center></h4>

\begin{align}
       FPR_{African-American}=FPR_{Caucasian}
    \end{align}
<h4> <center>P[predicted  recidivism|african american, no recidivism]=P[predicted  recidivism|Caucasian, no recidivism] </center></h4>


**We can note that** TPR= 1- FNR ,thus minimizing the diffrence between the TPR in the two group will also minimize the diffrence between the FNR in the two group

In [None]:
print_prob(train,pred='probability')

P[recidivism predicted | african_american,recidivism]=0.7009622501850481
P[recidivism predicted | caucasian, recidivism]=0.3934169278996865
The diffrence:0.3075453222853616


P[recidivism predicted | african_american,no recidivism]=0.3446280991735537
P[recidivism predicted | caucasian, no recidivism]=0.1661721068249258
The diffrence:0.17845599234862788


<h3> We can clearly see that with the traditional model we can't acheive Equalized Odds pairty, so what can we do?


# **So what can we do?**

## **Threshold-Moving:**

Many machine learning algorithms are capable of predicting a probability or scoring of class membership. 

Those probabilities can be mapped to class label by using **decision threshold** where all values equal or greater than the threshold are mapped to one class and all other values are mapped to another class

The default value for the threshold is 0.5.

For those classification problems that have a severe class imbalance, the default threshold can result in poor performance.

As such, a simple solution for reducing the disparities is using group-specific decision threshold that will minimize the differences of FNR and FPR between the two races, thus enabling to achieve Equalized Odds parity.

In [None]:
# We saw that the threshold of the logistic model was 0.5, we can change the threshold for the diffrent
# group in a way that we will minimize the diffrence between the probabilities

def find_best_threshold(df,pred='probability'):
  best_diff=1
  best_th_b=0
  best_th_w=0
  l=np.arange(0.3,0.7,0.01)
  for th_b in l:
    for th_w in l:
      diff=0
      for y_label in [0,1]:
        black_r=compute_prob(df,'race_african_american',pred,y_label,th_b)
        white_r=compute_prob(df,'race_caucasian',pred,y_label,th_w)
        diff+=np.abs(black_r-white_r)
  
      if diff<best_diff:
        best_diff=diff
        best_th_b=th_b
        best_th_w=th_w

  print("Best difference achieved: {}".format(best_diff))
  print("African American best threshold achieved: {}".format(best_th_b))
  print("Caucasian  best threshold achieved: {}".format(best_th_w))
  return best_th_b,best_th_w


In [None]:
black_threshold,white_threshold=find_best_threshold(train)
print("\n")
print_prob(train,black_threshold,white_threshold)

Best diffrence achived: 0.02237337593656602
african_american best threshold achived: 0.6700000000000004
caucasian best threshold achived: 0.5300000000000002


P[recidivism predicted | african_american,recidivism]=0.3168023686158401
P[recidivism predicted | caucasian, recidivism]=0.3103448275862069
The diffrence:0.006457541029633196


P[recidivism predicted | african_american,no recidivism]=0.09090909090909091
P[recidivism predicted | caucasian, no recidivism]=0.10682492581602374
The diffrence:0.015915834906932824


<h3>We can see that, the difference between the probabilities in the train dataset are really small. </h3>

<h3><b>Did we achived Equalized Odds pairty?</h3>

## **Validate the Result on the Test dataset:**
In order to detrmine if the thershold we made are generic enough - not fitted only to the train dataset lets check if we would get small diffrences between the probabilities also in the test dataset.

In [None]:
# testing on the test set
test['probability']=model.predict_proba(X_test)[:, 1]
print_prob(test,black_threshold,white_threshold)

P[recidivism predicted | african_american,recidivism]=0.3225806451612903
P[recidivism predicted | caucasian, recidivism]=0.2826086956521739
The diffrence:0.03997194950911642


P[recidivism predicted | african_american,no recidivism]=0.11842105263157894
P[recidivism predicted | caucasian, no recidivism]=0.1037037037037037
The diffrence:0.01471734892787524


We can see that, we indeed got small differences between the probabilities also in the test dataset (0.03 ,0.014) comparing to the original model (0.3, 0.18)

Since the difference in both the train dataset and the test dataset are really small we can say that we have reached Equalized Odd parity.

# **What about the accuracy?**
Lets observe what will happend to the accuarcy after we applied different decision threshold for each sub group

In [None]:
def check_pred(row,black_th,white_th):
  if row['race_african_american']==1 and row['probability']>=black_th:
    return 1
  elif row['race_african_american']==1 and row['probability']<black_th:
    return 0
  elif row['race_caucasian']==1 and row['probability']>=white_th:
    return 1
  else:
    return 0

In [None]:
def acc_table(df):
  index=['old','new']
  df['new_pred']=df.apply(lambda row: check_pred(row,black_threshold,white_threshold),axis=1)
  df['old_pred']=df.apply(lambda row: check_pred(row,0.5,0.5),axis=1)
  label=df['two_year_recid']
  groups = ["overall", "race_african_american", "race_caucasian"]
  acc_table = pd.DataFrame(index=index, columns=groups)
  for group in groups:
    if group in ["race_african_american", "race_caucasian"]:
      subset=(df[group]==1)
    else:
      subset=np.full(label.shape, True)
    acc_lst=[]
    for idx in index:
      if index=='old':
        y_true=label[subset]
        y_pred=df['old_pred']
        y_sub_pred=y_pred[subset]
      else:
        y_true=label[subset]
        y_pred=df['new_pred']
        y_sub_pred=y_pred[subset]
      acc_sub=accuracy_score(y_true, y_sub_pred)
      acc_lst.append(acc_sub)
    acc_table[group] = acc_lst
  acc_table.columns=["overall", "African-American", "Caucasian"]
  return acc_table

acc_table(test)

Unnamed: 0,overall,African-American,Caucasian
old,0.624291,0.599349,0.647577
new,0.624291,0.599349,0.647577


What can we infer from those results?