In [1]:
%matplotlib notebook

import scipy.stats
import numpy as np
import pandas as pd

from imblearn.over_sampling import SMOTE
from collections import Counter

import matplotlib.pyplot as plt

# Pre-processing

In [2]:
# Read the data
df = pd.read_csv('C:/Users/vabalagon/Desktop/Machine Learning Projects/Applied-Machine-Learning-Projects/Customer Churn Prediction/data/processed.csv')

# Get the features and target variable from the dataframe
X = df.drop(['state', 'area_code', 'churn'], axis=1).to_numpy()
y = df['churn'].to_numpy()

# Split into training and testing parts
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X.copy(), y, 
                                                    test_size = 0.25, 
                                                    shuffle=True, 
                                                    random_state=42, 
                                                    stratify=y) #, stratify=y_smote
# Apply SMOTE oversampling technique to the training set
sm = SMOTE(random_state=42)
X_train_smote, y_train_smote = sm.fit_resample(X_train, y_train)
print('Resampled dataset shape %s' % Counter(y_train_smote))

Resampled dataset shape Counter({0: 2739, 1: 2739})


In [3]:
def accuracy_per_class(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)

    for y_i in np.unique(y_test)[::-1]:
        print()

        # Find the indices of y_i in the true labels
        indices_i = np.where(y_test == y_i)

        # Computes the accuracy
        print('class', y_i, 'Accuracy: ', str(round(np.sum(y_test[indices_i] == y_pred[indices_i])/ len(np.where(y_test==y_i)[0]), 3)))

# Naive Bayes without SMOTE

In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import ComplementNB
from sklearn.metrics import balanced_accuracy_score

In [5]:
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)

print("Training set balanced accuracy score:", balanced_accuracy_score(y_train, naive_bayes.predict(X_train))) 
print("Test set balanced accuracy score:", balanced_accuracy_score(y_test, naive_bayes.predict(X_test)))

Training set balanced accuracy score: 0.7885849404109946
Test set balanced accuracy score: 0.7718510405257393


##### Accuracy per class

In [6]:
accuracy_per_class(naive_bayes, X_test, y_test)


class 1 Accuracy:  0.8

class 0 Accuracy:  0.744


# Naive Bayes without SMOTE

In [7]:
naive_bayes_smote = GaussianNB()
naive_bayes_smote.fit(X_train_smote, y_train_smote)

print("Training set balanced accuracy score:", balanced_accuracy_score(y_train, naive_bayes_smote.predict(X_train))) 
print("Test set balanced accuracy score:", balanced_accuracy_score(y_test, naive_bayes_smote.predict(X_test)))

Training set balanced accuracy score: 0.7885849404109946
Test set balanced accuracy score: 0.7718510405257393
