In [74]:
#basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#libraries for model evaluation
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score  #useful for evaluating performance of probabilistic classifiers
from sklearn.model_selection import cross_val_score
#classification models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.neural_network import MLPClassifier

In [75]:
df = pd.read_csv('modified_bail_dataset.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Type_of_Crime,Charges_Filed,Time_Served_Days,Prior_Criminal_History,Risk_of_Flight_Score,Influence_on_Trial,Bail_Decision,Socio_Economic_Background
0,0,Violent,IPC 420,2549,1,0.639427,nit,Granted,Middle
1,1,Non-violent,IPC 302,2188,0,0.025011,Low,Rejected,Upper
2,2,Violent,Economic Offences Act,111,1,0.275029,High,Granted,Upper
3,3,Cyber,IPC 420,1614,0,0.223211,High,Granted,Lower
4,4,Violent,Narcotic Drugs Act,2425,1,0.736471,High,Rejected,Lower


In [76]:
X = df.drop(columns=['Bail_Decision','Time_Served_Days','Risk_of_Flight_Score','Prior_Criminal_History'],axis=1)

In [77]:
y= df['Bail_Decision']

In [78]:
num_features = X.select_dtypes(exclude="object").columns
cat_features = X.select_dtypes(include="object").columns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder()
preprocessor = ColumnTransformer(
    [
        ("OneHotEncoder", oh_transformer, cat_features),
        ("StandardScaler", numeric_transformer, num_features),
    ]
)

In [79]:
X = preprocessor.fit_transform(X)

In [80]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape, X_test.shape

((400, 21), (100, 21))

In [81]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

Creating an Evalutation Functionn to give all metrics after model training

In [82]:
def evaluate_model(true, predicted, predicted_proba):
    accuracy = accuracy_score(true, predicted)
    precision = precision_score(true, predicted, pos_label=1)
    recall = recall_score(true, predicted, pos_label=1)
    f1 = f1_score(true, predicted, pos_label=1)
    roc_auc = roc_auc_score(true, predicted_proba)
    return accuracy, precision, recall, f1, roc_auc

In [83]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest Classifier": RandomForestClassifier(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Support Vector Classifier": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Gradient Boosting Classifier": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(),
    "Ada Boost Classifier": AdaBoostClassifier(),
    "LightGBM": LGBMClassifier(),
    "Naive bayes Classifier": GaussianNB(),
    "Extra Tree Classifier": ExtraTreesClassifier(),
    "Bagging Classifier": BaggingClassifier(),
    "Ridge Classifier": RidgeClassifier(),
    "Neural Network MLPClassifier": MLPClassifier()
}
model_list = []
f1_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model = SVC(probability=True)
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    if hasattr(model, "predict_proba"):
        y_train_proba = model.predict_proba(X_train)[:,1]
        y_test_proba = model.predict_proba(X_test)[:,1]
    else:
        y_train_proba = None
        y_test_proba = None

    model_train_accuracy, model_train_precision, model_train_recall, model_train_f1, model_train_roc_auc = evaluate_model(y_train, y_train_pred, y_train_proba)
    model_test_accuracy, model_test_precision, model_test_recall, model_test_f1, model_test_roc_auc = evaluate_model(y_test, y_test_pred, y_test-y_test_proba)
    
    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print("Model performance for Training set")
    print("-Accuracy: {:.4f}".format(model_train_accuracy))
    print("-Precision: {:.4f}".format(model_train_precision))
    print("-Recall: {:.4f}".format(model_train_recall))
    print("-f1 Score: {:.4f}".format(model_train_f1))
    if model_train_roc_auc is not None:
        print("-ROC AUC: {:.4f}".format(model_train_roc_auc))
    
    print('----------------------------------')

    print("Model performance for Test set")
    print("-Accuracy: {:.4f}".format(model_test_accuracy))
    print("-Precision: {:.4f}".format(model_test_precision))
    print("-Recall: {:.4f}".format(model_test_recall))
    print("-f1 Score: {:.4f}".format(model_test_f1))
    if model_test_roc_auc is not None:
        print("-ROC AUC {:.4f}".format(model_test_roc_auc))
    f1_list.append(model_test_f1)

    print('='*35)
    print('\n')




Logistic Regression
Model performance for Training set
-Accuracy: 0.7225
-Precision: 0.7485
-Recall: 0.6443
-f1 Score: 0.6925
-ROC AUC: 0.1637
----------------------------------
Model performance for Test set
-Accuracy: 0.5200
-Precision: 0.4561
-Recall: 0.6047
-f1 Score: 0.5200
-ROC AUC 1.0000


Random Forest Classifier
Model performance for Training set
-Accuracy: 0.7225
-Precision: 0.7485
-Recall: 0.6443
-f1 Score: 0.6925
-ROC AUC: 0.1612
----------------------------------
Model performance for Test set
-Accuracy: 0.5200
-Precision: 0.4561
-Recall: 0.6047
-f1 Score: 0.5200
-ROC AUC 1.0000


Decision Tree Classifier
Model performance for Training set
-Accuracy: 0.7225
-Precision: 0.7485
-Recall: 0.6443
-f1 Score: 0.6925
-ROC AUC: 0.1599
----------------------------------
Model performance for Test set
-Accuracy: 0.5200
-Precision: 0.4561
-Recall: 0.6047
-f1 Score: 0.5200
-ROC AUC 1.0000


Support Vector Classifier
Model performance for Training set
-Accuracy: 0.7225
-Precision: 0.748