In [1]:
import pandas as pd
import numpy as np
import math
import seaborn as sns
from numpy.linalg import inv
from sklearn.model_selection import train_test_split
from scipy.stats import multivariate_normal
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.utils.multiclass import unique_labels
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv(r"C:\Users\cks12\Downloads\PML Project\dataset\raw\CleanDataset.csv")

In [3]:
X=df[['Age','WorkExperience','MonthlyIncome']]

In [4]:
y=df['Attrition']

In [5]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y)

In [6]:
prior_mean = np.zeros(Xtrain.shape[1]) #p_m

In [8]:
prior_mean

array([0., 0., 0.])

In [9]:
lam = 1
prior_variance = np.eye(Xtrain.shape[1])/lam 

In [10]:
prior_variance

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [11]:
w_init1 =[-8.12,34,8.12]
#p = 1/(1+np.exp(-Xtrain.dot(w_init1)))

In [12]:
#np.sum((ytrain*np.log1p(p)+(1-ytrain)*np.log1p(1-p)))
def softplus(x):
    return np.log1p(np.exp(-np.abs(x))) + np.maximum(x, 0)
z = Xtrain.dot(w_init1)
-np.sum(ytrain * softplus(-z) + (1 - ytrain) * softplus(z))

-506602950.1119604

In [13]:
def logJointLikelihood(X, t, w, p_m, p_v):

    #p = 1/(1+np.exp(-X.dot(w)))
    #loglikelihood =  np.sum((t*np.log1p(p)+(1-t)*np.log1p(1-p)))
    z=X.dot(w)
    loglikelihood = -np.sum(t * softplus(-z) + (1 - t) * softplus(z))
    logPrior = np.log(multivariate_normal.pdf(w, p_m, p_v))
    
    logJointLikelihood = (loglikelihood + logPrior)
    
    NeglogJointLikelihood= -1*logJointLikelihood

    return NeglogJointLikelihood

In [14]:
def logJointLikelihood_grad(X, t, w, p_m, p_v):

 
    p = 1/(1 + np.exp(-X.dot(w)))
    
    
    grad = X.T.dot(p-t)+ inv(p_v).dot(w-p_m)  

    return grad

In [15]:
def logJointLikelihood_hess(X, t, w, p_m, p_v):

   
    p = 1/(1 + np.exp(-X.dot(w)))
    
    hess = (( X.T*(p*(1-p)))).dot(X)+inv(p_v)

    return hess

In [16]:
def GradientDescent(X, t, w, p_m, p_v, eta, tol, max_iter):

    log_joint_likelihood = 1e5
    log_joint_likelihoods = []
    w_history=[]
    w_history.append(w)
    w_opt = 0

    for i in range(max_iter):
        


        grad = logJointLikelihood_grad(X, t, w, p_m, p_v)
        print(grad)

        # update the coefficient for next step
        w_new =  w - eta* grad  #fill in
        w_history.append(w_new)

        # check convergence
        log_joint_likelihood_new = logJointLikelihood(X, t, w_new, p_m, p_v)
        print(log_joint_likelihood_new)
        if (abs(log_joint_likelihood_new - log_joint_likelihood) < tol):
            break
        else:
            log_joint_likelihoods.append(log_joint_likelihood_new)
            log_joint_likelihood, w = log_joint_likelihood_new, w_new
            

    w_opt = w_new
    return w_opt, np.array(log_joint_likelihoods), w_history

In [17]:
def NewtonRaphson(X, t, w, p_m, p_v, eta, tol, max_iter):

    # hyper-parameters setting (Note: 'loss' is set to large value to avoid first time stop)
    log_joint_likelihood = 1e5
    log_joint_likelihoods = []
    w_opt = 0
    w_history= []
    w_history.append(w)

    for i in range(max_iter):

        # find the next step length.
        grad = logJointLikelihood_grad(X, t, w, p_m, p_v)
        
        hess = logJointLikelihood_hess(X, t, w, p_m, p_v)
        

        # update the coefficient for next step
        w_new =   w-eta*(inv(hess)*grad)
        w_history.append(w_new)

        # check convergence
        log_joint_likelihood_new = logJointLikelihood(X, t, w_new, p_m, p_v)
        if (abs(log_joint_likelihood_new - log_joint_likelihood) < tol):
            break
        else:
            log_joint_likelihoods.append(log_joint_likelihood_new)
            log_joint_likelihood, w = log_joint_likelihood_new, w_new
            

        w_opt = w_new

    return w_opt, log_joint_likelihoods, w_history

In [18]:
class BayesianLogistic(object):



    def __init__(self, p_m, p_v, solver, eta=1e-4, tol=1e-5, max_iter=100):


        self.p_m = p_m
        self.p_v = p_v
        self.solver = solver
        self.eta = eta
        self.tol = tol
        self.max_iter = max_iter


    def fit(self, X, t):

        w_init = 1e-5*np.ones(X.shape[1])

        if (self.solver == 'gd'):
            self.coef_, self.log_joint_likelihood_, self.weight_history_ = GradientDescent(X, t, w_init, self.p_m, self.p_v, self.eta, self.tol, self.max_iter)
            print(self.log_joint_likelihood_)
        else:
            self.coef_, self.log_joint_likelihood_, self.weight_history_ = NewtonRaphson(X, t, w_init, self.p_m, self.p_v, self.eta, self.tol, self.max_iter)

        return self


    def predict(self, X):


        pred = []

        for p in 1/(1+np.exp(-X.dot(self.coef_))):
            if p <= 0.4:
                pred.append(0)
            else:
                pred.append(1)

        return np.array(pred)

In [19]:
model = BayesianLogistic(p_m=prior_mean, p_v=prior_variance, solver='gd', eta=1e-4, max_iter=50) #try changing eta and max_iter for better convergence
model.fit(Xtrain, ytrain)

Age               4.265743e+04
WorkExperience    1.509686e+04
MonthlyIncome     2.505659e+07
dtype: float64
inf
Age              -3.724127e+04
WorkExperience   -8.986469e+03
MonthlyIncome    -2.172763e+07
dtype: float64
inf
Age              -3.723754e+04
WorkExperience   -8.985570e+03
MonthlyIncome    -2.172545e+07
dtype: float64
inf
Age               1.066302e+05
WorkExperience    3.401948e+04
MonthlyIncome     6.235555e+07
dtype: float64
inf
Age              -3.724448e+04
WorkExperience   -8.988074e+03
MonthlyIncome    -2.172952e+07
dtype: float64
inf
Age              -3.724076e+04
WorkExperience   -8.987175e+03
MonthlyIncome    -2.172734e+07
dtype: float64
inf
Age              -3.723703e+04
WorkExperience   -8.986276e+03
MonthlyIncome    -2.172517e+07
dtype: float64
inf
Age               1.066307e+05
WorkExperience    3.401878e+04
MonthlyIncome     6.235583e+07
dtype: float64
inf
Age              -3.724397e+04
WorkExperience   -8.988779e+03
MonthlyIncome    -2.172924e+07
dtype: floa

<__main__.BayesianLogistic at 0x1b80ab5de90>

In [20]:
model.coef_

Age                 -8.811644
WorkExperience     -13.357653
MonthlyIncome    -5345.194770
dtype: float64

In [21]:
model.log_joint_likelihood_

array([inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf])

In [22]:
model.weight_history_

[array([1.e-05, 1.e-05, 1.e-05]),
 Age                 -4.265733
 WorkExperience      -1.509676
 MonthlyIncome    -2505.658930
 dtype: float64,
 Age                -0.541607
 WorkExperience     -0.611029
 MonthlyIncome    -332.896160
 dtype: float64,
 Age                  3.182148
 WorkExperience       0.287528
 MonthlyIncome     1839.649334
 dtype: float64,
 Age                 -7.480871
 WorkExperience      -3.114420
 MonthlyIncome    -4395.905428
 dtype: float64,
 Age                 -3.756422
 WorkExperience      -2.215613
 MonthlyIncome    -2222.953633
 dtype: float64,
 Age               -0.032347
 WorkExperience    -1.316896
 MonthlyIncome    -50.219133
 dtype: float64,
 Age                  3.691356
 WorkExperience      -0.418268
 MonthlyIncome     2122.298093
 dtype: float64,
 Age                 -6.971713
 WorkExperience      -3.820146
 MonthlyIncome    -4113.284934
 dtype: float64,
 Age                 -3.247316
 WorkExperience      -2.921268
 MonthlyIncome    -1940.361401
 d

In [23]:
ypred=model.predict(Xtest)

In [24]:
values, counts = np.unique(ypred, return_counts=True)

for val, count in zip(values, counts):
    print(f"Value: {val}, Count: {count}")

Value: 0, Count: 1295
