In [1]:
import numpy as np
import pandas as pd
import sys

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
data = pd.read_csv("bill_authentication.csv")

In [3]:
data.head()

Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [4]:
def split_data_into_train_and_test(data, test_size=0.2, random_state=0):
    c = len(data.columns)
    X = data.iloc[:, 0:c-1].values
    y = data.iloc[:, c-1].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test
    
    
def scale_X_values(X_train, X_test):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)


def test_classifier(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("Confusion matrix:", conf_matrix, "\n", sep="\n")
    
    class_report = classification_report(y_test, y_pred)
    print("Classification_report:", class_report,"\n", sep="\n")
    
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy_score:", accuracy, sep="\n")
    
    return accuracy, class_report, conf_matrix

In [5]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

def get_LDA_classifier(X_train, y_train):
    classifier = LinearDiscriminantAnalysis()
    classifier.fit(X_train, y_train)
    return classifier


def proceed_testing_LDA_classifier(data, test_size=0.2, random_state=0):
    X_train, X_test, y_train, y_test = split_data_into_train_and_test(data, test_size, random_state)
    scale_X_values(X_train, X_test)
    classifier = get_LDA_classifier(X_train, y_train)
    test_classifier(classifier, X_test, y_test)

In [6]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

def get_QDA_classifier(X_train, y_train):
    classifier = QuadraticDiscriminantAnalysis()
    classifier.fit(X_train, y_train)
    return classifier


def proceed_testing_QDA_classifier(data, test_size=0.2, random_state=0):
    X_train, X_test, y_train, y_test = split_data_into_train_and_test(data, test_size, random_state)
    scale_X_values(X_train, X_test)
    classifier = get_QDA_classifier(X_train, y_train)
    test_classifier(classifier, X_test, y_test)

In [7]:
proceed_testing_LDA_classifier(data)

Confusion matrix:
[[150   7]
 [  0 118]]


Classification_report:
             precision    recall  f1-score   support

          0       1.00      0.96      0.98       157
          1       0.94      1.00      0.97       118

avg / total       0.98      0.97      0.97       275



Accuracy_score:
0.9745454545454545


In [8]:
proceed_testing_QDA_classifier(data)

Confusion matrix:
[[154   3]
 [  0 118]]


Classification_report:
             precision    recall  f1-score   support

          0       1.00      0.98      0.99       157
          1       0.98      1.00      0.99       118

avg / total       0.99      0.99      0.99       275



Accuracy_score:
0.9890909090909091


In [9]:
proceed_testing_LDA_classifier(data, random_state=1)

Confusion matrix:
[[149   8]
 [  0 118]]


Classification_report:
             precision    recall  f1-score   support

          0       1.00      0.95      0.97       157
          1       0.94      1.00      0.97       118

avg / total       0.97      0.97      0.97       275



Accuracy_score:
0.9709090909090909


In [10]:
proceed_testing_QDA_classifier(data, random_state=1)

Confusion matrix:
[[151   6]
 [  0 118]]


Classification_report:
             precision    recall  f1-score   support

          0       1.00      0.96      0.98       157
          1       0.95      1.00      0.98       118

avg / total       0.98      0.98      0.98       275



Accuracy_score:
0.9781818181818182


Zgodnie z teorią QDA powinno mieć niegorszą skuteczność od LDA i tak wychodzi również w tym przypadku. W przypadku tych danych oba klasyfikatory osiągają bardzo wysoką skuteczność, z niewielką przewagą na korzyść QDA.