In [24]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from xgboost import XGBClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression, SGDClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from imblearn.under_sampling import RandomUnderSampler

In [20]:
df = pd.read_parquet('C://Users/Isi/anaconda3/envs/FireBrigade/MAY24_BDS_INT_Fire_Brigade/data/df_cleaned_for_classification_models.parquet')

In [21]:
X = df.drop(['ResponseTimeBinary'], axis = 1)
y = df['ResponseTimeBinary']

## Preprocessing data

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply PCA to retain 85% of variance
pca = PCA(n_components=0.85)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

# Apply random undersampling
rus = RandomUnderSampler(random_state=666)
X_train, y_train = rus.fit_resample(X_train, y_train)

## Random Forest Bagging

In [None]:
base_classifier = RandomForestClassifier(random_state=666, criterion='gini', max_depth=10, n_estimators=200)
bagging_classifier = BaggingClassifier(base_classifier, n_estimators=50, random_state=666)
bagging_classifier.fit(X_train, y_train)

y_pred = bagging_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('\nClassification Report:\n', classification_rep)
print('\nConfusion Matrix:\n', confusion_mat)

## XGBoost Bagging

In [13]:
base_classifier = XGBClassifier(random_state=666, use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=5, subsample=0.9, learning_rate=0.1)

bagging_classifier = BaggingClassifier(base_classifier, n_estimators=50, random_state=666)
bagging_classifier.fit(X_train, y_train)

y_pred = bagging_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('\nClassification Report:\n', classification_rep)
print('\nConfusion Matrix:\n', confusion_mat)

Accuracy: 0.6974

Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.63      0.55     90243
           1       0.83      0.73      0.77    217298

    accuracy                           0.70    307541
   macro avg       0.66      0.68      0.66    307541
weighted avg       0.73      0.70      0.71    307541


Confusion Matrix:
 [[ 56854  33389]
 [ 59674 157624]]


## LogReg Bagging

In [17]:
base_classifier = LogisticRegression(C=5, penalty='l2', solver='lbfgs', max_iter=10000)

bagging_classifier = BaggingClassifier(base_classifier, n_estimators=50, random_state=666)
bagging_classifier.fit(X_train, y_train)

y_pred = bagging_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('\nClassification Report:\n', classification_rep)
print('\nConfusion Matrix:\n', confusion_mat)

Accuracy: 0.6797

Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.61      0.53     90243
           1       0.81      0.71      0.76    217298

    accuracy                           0.68    307541
   macro avg       0.64      0.66      0.64    307541
weighted avg       0.71      0.68      0.69    307541


Confusion Matrix:
 [[ 55078  35165]
 [ 63337 153961]]
