In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_excel("Simmons.xlsx")
df

Unnamed: 0,Customer,Spending,Card,Coupon
0,1,2.291,0,0
1,2,2.359,0,0
2,3,3.358,0,0
3,4,3.254,0,1
4,5,3.853,1,1
5,6,4.257,1,1
6,7,2.256,0,0
7,8,3.254,1,1
8,9,2.357,1,0
9,10,7.256,1,1


In [3]:
df['Coupon'].unique()

array([0, 1], dtype=int64)

In [4]:
df['Coupon'].value_counts()

1    21
0    19
Name: Coupon, dtype: int64

__1.Split dataset into training and testing sets__

In [11]:
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [12]:
x = df[['Card', 'Spending']]
y = df['Coupon'].values.reshape(-1,1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [13]:
len(x_train), len(y_train), len(x_test), len(y_test)

(30, 30, 10, 10)

__2.Building the model and predicting values__

In [15]:
Lreg = LogisticRegression(solver = 'lbfgs')
Lreg.fit(x_train, y_train.ravel()) # ravel() will return 1D array with all the input-array elements

In [16]:
y_predict = Lreg.predict(x_test)
y_predict

array([0, 0, 1, 0, 1, 1, 0, 1, 0, 0], dtype=int64)

In [17]:
y_predict_train = Lreg.predict(x_train)
y_predict_train

array([0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 1, 1, 1], dtype=int64)

__3.Calculate probability of predicting data values__

In [25]:
x_train

Unnamed: 0,Card,Spending
25,0,3.258
9,1,7.256
13,1,7.585
31,0,2.256
34,1,6.324
8,1,2.357
17,0,1.365
24,0,2.258
0,0,2.291
33,1,5.364


In [22]:
y_prob_train = Lreg.predict_proba(x_train)[:,1]
y_prob_train.reshape(1,-1)

array([[0.23324537, 0.94390548, 0.95535726, 0.12761043, 0.89491508,
        0.31934297, 0.0708752 , 0.12777322, 0.13048482, 0.80852775,
        0.65284509, 0.80060575, 0.13622668, 0.65301068, 0.72815217,
        0.24656739, 0.47486886, 0.57940112, 0.23272304, 0.38679116,
        0.65284509, 0.13596893, 0.89648514, 0.89029254, 0.15511928,
        0.80407713, 0.47468665, 0.94398281, 0.89029254, 0.65301068]])

In [26]:
x_test

Unnamed: 0,Card,Spending
19,0,3.256
16,0,2.258
15,1,6.256
26,0,4.586
4,1,3.853
12,1,6.875
37,1,3.325
27,1,5.258
39,0,3.256
6,0,2.256


In [20]:
y_prob = Lreg.predict_proba(x_test)[:,1]
y_prob.reshape(1,-1)
y_prob

array([0.2329841 , 0.12777322, 0.89014972, 0.44529878, 0.58330587,
       0.92720888, 0.48763773, 0.79624946, 0.2329841 , 0.12761043])

__4.Summary for logistic model__

In [30]:
x = df[['Spending', 'Card']]
y = df['Coupon']

In [38]:
import statsmodels.api as sm
x1 = sm.add_constant(x)
logit_model = sm.Logit(y,x1)
result = logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.374153
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                 Coupon   No. Observations:                   40
Model:                          Logit   Df Residuals:                       37
Method:                           MLE   Df Model:                            2
Date:                Wed, 05 Jul 2023   Pseudo R-squ.:                  0.4592
Time:                        21:31:22   Log-Likelihood:                -14.966
converged:                       True   LL-Null:                       -27.676
Covariance Type:            nonrobust   LLR p-value:                 3.021e-06
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -4.9161      1.593     -3.086      0.002      -8.038      -1.794
Spending       0.9518      0.

__5.Calculating Accuracy Score using Confusion Matrix__

In [39]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test, y_predict)
score

0.9

In [40]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_predict)

array([[5, 0],
       [1, 4]], dtype=int64)

In [42]:
TrueNegative, FalsePositive, FalseNegative, TruePositive = confusion_matrix(y_test, y_predict).ravel()
print(TrueNegative, FalsePositive, FalseNegative, TruePositive)

5 0 1 4


In [43]:
Precision = TruePositive / (TruePositive + FalsePositive)
Precision

1.0

In [44]:
Accuracy = (TruePositive + TrueNegative) / (TruePositive + TrueNegative + FalsePositive + FalseNegative)
Accuracy

0.9

In [45]:
Recall = TruePositive / (TruePositive + FalseNegative)
Recall

0.8