### Initial Imports

In [26]:
import pandas as pd
import numpy as np
import csv
from pathlib import Path

import warnings
warnings.filterwarnings('ignore')

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, mean_squared_error, plot_roc_curve
from imblearn.metrics import classification_report_imbalanced

import matplotlib.pyplot as plt
%matplotlib inline

## Retain Data format

In [27]:

# To import the X_train_smote, X_train_ros, X_train_scaled, X_test_scaled
X_train_smote = np.loadtxt('resources/X_train_smote.csv', delimiter=',')
X_train_ros = np.loadtxt('resources/X_train_ros.csv', delimiter=',')
X_train_scaled = np.loadtxt('resources/X_train_scaled.csv', delimiter=',')
X_test_scaled = np.loadtxt('resources/X_test_scaled.csv', delimiter=',')

# To import y_train_smote, y_train_ros
y_train_smote = pd.read_csv('resources/y_train_smote.csv', sep=',', header=0, squeeze=True)
y_train_ros = pd.read_csv('resources/y_train_ros.csv', sep=',', header=0, squeeze=True)

# To import y_train, y_test
y_train = pd.read_csv('resources/y_train.csv', sep=',', header=0, squeeze=True, index_col=0)
y_test = pd.read_csv('resources/y_test.csv', sep=',', header=0, squeeze=True, index_col=0)


## Simple Logistical Regression

In [28]:
model = LogisticRegression(solver='lbfgs', random_state=1)
model

LogisticRegression(random_state=1)

### Fit (train) or model using the training data

In [29]:
model.fit(X_train_scaled, y_train)

LogisticRegression(random_state=1)

### Score the model using the test data

In [30]:
print(f"Training Data Score: {model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {model.score(X_test_scaled, y_test)}")

Training Data Score: 0.9530666666666666
Testing Data Score: 0.9472


### Make Predictions

In [31]:
y_pred = model.predict(X_test_scaled)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


### Balanced Accuracy Score

In [33]:
accuracy_score(y_test, y_pred)

0.9472

### Confusion Matrix

In [35]:
confusion_matrix(y_test, y_pred)

array([[1112,   18],
       [  48,   72]], dtype=int64)

### The Imbalanced Classification Report

In [37]:
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.96      0.98      0.60      0.97      0.77      0.61      1130
          1       0.80      0.60      0.98      0.69      0.77      0.57       120

avg / total       0.94      0.95      0.64      0.94      0.77      0.61      1250



## Logistical Regression Model + SMOTE

### Fit (train) or model using the training data

In [42]:
model.fit(X_train_smote, y_train_smote)

LogisticRegression(random_state=1)

### Make Predictions

In [46]:
y_pred = model.predict(X_test_scaled)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,0,0
1,0,0
2,0,0
3,0,0
4,1,0


### Balanced Accuracy Score

In [48]:
accuracy_score(y_test, y_pred)

0.9008

### Confusion Matrix

In [50]:
confusion_matrix(y_test, y_pred)

array([[1028,  102],
       [  22,   98]], dtype=int64)

### The Imbalanced Classification Report

In [51]:
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.98      0.91      0.82      0.94      0.86      0.75      1130
          1       0.49      0.82      0.91      0.61      0.86      0.74       120

avg / total       0.93      0.90      0.83      0.91      0.86      0.75      1250



## Logistical Regression Model + Random Oversampling (ROS)

### Fit (train) or model using the training data

In [52]:
model.fit(X_train_ros, y_train_ros)

LogisticRegression(random_state=1)

### Make Predictions

In [56]:
y_pred = model.predict(X_test_scaled)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,0,0
1,0,0
2,0,0
3,0,0
4,1,0


### Balanced Accuracy Score

In [57]:
accuracy_score(y_test, y_pred)

0.8952

### Mean Squared Error

### Confusion Matrix

In [58]:
confusion_matrix(y_test, y_pred)

array([[1018,  112],
       [  19,  101]], dtype=int64)

### The Imbalanced Classification Report

In [59]:
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.98      0.90      0.84      0.94      0.87      0.76      1130
          1       0.47      0.84      0.90      0.61      0.87      0.75       120

avg / total       0.93      0.90      0.85      0.91      0.87      0.76      1250



### ROC Curve