In [1]:
import pandas as pd
import numpy as np
from statsmodels.api import Logit
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, accuracy_score

In [2]:
dataframe = pd.read_csv('datasets/BankNoteAuthentication.csv')
dataframe.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [3]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1372 entries, 0 to 1371
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   variance  1372 non-null   float64
 1   skewness  1372 non-null   float64
 2   curtosis  1372 non-null   float64
 3   entropy   1372 non-null   float64
 4   class     1372 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 53.7 KB


In [4]:
dataframe.dtypes

variance    float64
skewness    float64
curtosis    float64
entropy     float64
class         int64
dtype: object

In [5]:
X = dataframe[['variance', 'skewness', 'curtosis', 'entropy']]
y = dataframe[['class']]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [7]:
logreg = Logit(y_train, X_train).fit()

Optimization terminated successfully.
         Current function value: 0.085039
         Iterations 11


In [8]:
print(logreg.summary())

                           Logit Regression Results                           
Dep. Variable:                  class   No. Observations:                 1029
Model:                          Logit   Df Residuals:                     1025
Method:                           MLE   Df Model:                            3
Date:                Fri, 28 Jun 2024   Pseudo R-squ.:                  0.8762
Time:                        18:34:39   Log-Likelihood:                -87.506
converged:                       True   LL-Null:                       -707.03
Covariance Type:            nonrobust   LLR p-value:                2.467e-268
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
variance      -2.9600      0.311     -9.513      0.000      -3.570      -2.350
skewness      -1.8234      0.235     -7.744      0.000      -2.285      -1.362
curtosis      -1.9610      0.241     -8.133      0.0

In [9]:
y_pred_raw = logreg.predict(X_test)
y_pred_raw

430     1.073900e-06
588     3.190383e-03
296     1.984329e-03
184     1.065975e-08
244     5.865388e-08
            ...     
1121    9.503673e-01
940     3.017936e-01
1189    9.977810e-01
438     9.113253e-09
1022    3.395049e-01
Length: 343, dtype: float64

In [10]:
y_pred = list(map(round, y_pred_raw))
y_pred

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,


In [15]:
conf_mat = confusion_matrix(y_test, y_pred)
conf_mat

array([[191,   0],
       [ 20, 132]])

In [11]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9416909620991254

In [12]:
precision = precision_score(y_test, y_pred)
precision

np.float64(1.0)

In [13]:
recall = recall_score(y_test, y_pred)
recall

np.float64(0.868421052631579)

In [14]:
f1 = f1_score(y_test, y_pred)
f1

np.float64(0.9295774647887324)