In [1]:
# Importing dependencies
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
from imblearn.metrics import classification_report_imbalanced
from sklearn.svm import SVC
from collections import Counter

In [2]:
# Read in the data
df = pd.read_csv("../Resources/bin_5_clean.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,CreditScore,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,bin_age,Geography_France,Geography_Germany,Geography_Spain,gender_0,gender_1
0,0,619,2,0.0,1,1,1,101348.88,1,6,1,0,0,1,0
1,1,608,1,83807.86,1,0,1,112542.58,0,6,0,0,1,1,0
2,2,502,8,159660.8,3,1,0,113931.57,1,6,1,0,0,1,0
3,3,699,1,0.0,2,0,0,93826.63,0,5,1,0,0,1,0
4,4,850,2,125510.82,1,1,1,79084.1,0,6,0,0,1,1,0


In [3]:
df.shape

(9589, 15)

In [5]:
# Dropping unessential columns
df = df.drop(["Unnamed: 0"], axis=1)
df.head()

Unnamed: 0,CreditScore,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,bin_age,Geography_France,Geography_Germany,Geography_Spain,gender_0,gender_1
0,619,2,0.0,1,1,1,101348.88,1,6,1,0,0,1,0
1,608,1,83807.86,1,0,1,112542.58,0,6,0,0,1,1,0
2,502,8,159660.8,3,1,0,113931.57,1,6,1,0,0,1,0
3,699,1,0.0,2,0,0,93826.63,0,5,1,0,0,1,0
4,850,2,125510.82,1,1,1,79084.1,0,6,0,0,1,1,0


In [6]:
df.shape

(9589, 14)

In [7]:
df.dtypes

CreditScore            int64
Tenure                 int64
Balance              float64
NumOfProducts          int64
HasCrCard              int64
IsActiveMember         int64
EstimatedSalary      float64
Exited                 int64
bin_age                int64
Geography_France       int64
Geography_Germany      int64
Geography_Spain        int64
gender_0               int64
gender_1               int64
dtype: object

In [8]:
df.nunique()

CreditScore           460
Tenure                 11
Balance              6122
NumOfProducts           4
HasCrCard               2
IsActiveMember          2
EstimatedSalary      9588
Exited                  2
bin_age                11
Geography_France        2
Geography_Germany       2
Geography_Spain         2
gender_0                2
gender_1                2
dtype: int64

In [9]:
# Split preprocessed data into features and target
X = df.copy()
X = X.drop("Exited", axis=1)
X = pd.get_dummies(X)

# Create our target
y = df["Exited"]

In [10]:
# Check the balance of our target values
y.value_counts()

0    7646
1    1943
Name: Exited, dtype: int64

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1)
Counter(y_train)

Counter({0: 5746, 1: 1445})

In [12]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
from imblearn.combine import SMOTEENN

smote_enn = SMOTEENN(random_state=1)
X_resampled, y_resampled = smote_enn.fit_resample(X_train_scaled, y_train)
Counter(y_resampled)

Counter({0: 3427, 1: 4662})

In [14]:
model = SVC(kernel="linear")
model.fit(X_resampled, y_resampled)

SVC(kernel='linear')

In [15]:
predictions = model.predict(X_test_scaled)

In [16]:
# Calculated the balanced accuracy score
acc_score = balanced_accuracy_score(y_test, predictions)
acc_score

0.6316920312830269

In [17]:
# Displaying results
print("Confusion Matrix")
display(confusion_matrix(y_test, predictions))
print(f"Accuracy Scorem  : {acc_score}")
print("Classification Report")
print(classification_report_imbalanced(y_test, predictions))

Confusion Matrix


array([[943, 957],
       [116, 382]], dtype=int64)

Accuracy Scorem  : 0.6316920312830269
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       0.89      0.50      0.77      0.64      0.62      0.37      1900
          1       0.29      0.77      0.50      0.42      0.62      0.39       498

avg / total       0.76      0.55      0.71      0.59      0.62      0.37      2398

