# Logistic regression model for fair recidivism prediction tasks.

#### COMPAS dataset

This dataset is used to assess the likelihood that a criminal defendant will re-offend.

##### Aim 1: to make a logistic regression model using this dataset 

##### and to get the predictions, ground truth values, and protected vs non protected attributes.

#### Prediction Objective: 
Estimate the target variable 'recidivism outcome', which is either 'Did recid' or 'No recid' for each set of input features.

In [2]:
#import datasets
from aif360.datasets import StandardDataset
from aif360.datasets import CompasDataset
#import fairness metrics
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric

from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')



pip install 'aif360[inFairness]'


##### Note: The label value 0 in this case is considered favorable (no recidivism).

In [6]:
#import datasets
from aif360.datasets import StandardDataset
from aif360.datasets import CompasDataset
#import fairness metrics
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric

from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd


#load COMPAS dataset

try:
    compas = CompasDataset(
        protected_attribute_names=['sex', 'race'],
        privileged_classes=[['Female'], ['Caucasian']], 
        features_to_keep=['age', 'c_charge_degree', 'race', 'age_cat', 
                          'sex', 'priors_count', 'days_b_screening_arrest', 'c_charge_desc'],
        features_to_drop=[],
        categorical_features=['age_cat', 'c_charge_degree', 'c_charge_desc'],
        label_name='two_year_recid'
    )
    print("Dataset loaded successfully!")

    #returns the dataframe and the metadata in a tuple
    df, meta = compas.convert_to_dataframe()

except Exception as e:
    print(f"Error loading COMPAS dataset: {e}")


print("compas object type:", type(compas))
print("Dataframe for compas dataset:", df.head())
print("show columns and data types for dataframe: ", df.info)




Dataset loaded successfully!
compas object type: <class 'aif360.datasets.compas_dataset.CompasDataset'>
Dataframe for compas dataset:    sex   age  race  priors_count  days_b_screening_arrest  age_cat=25 - 45  \
1  0.0  69.0   0.0           0.0                     -1.0              0.0   
3  0.0  34.0   0.0           0.0                     -1.0              1.0   
4  0.0  24.0   0.0           4.0                     -1.0              0.0   
7  0.0  44.0   0.0           0.0                      0.0              1.0   
8  0.0  41.0   1.0          14.0                     -1.0              1.0   

   age_cat=Greater than 45  age_cat=Less than 25  c_charge_degree=F  \
1                      1.0                   0.0                1.0   
3                      0.0                   0.0                1.0   
4                      0.0                   1.0                1.0   
7                      0.0                   0.0                0.0   
8                      0.0                