## Initialization

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [3]:
model_accuracies = {'KNN':1, 'LogReg':1, 'DT':1, 'RF':1, 'NB':1, 'LinearSVC':1, 'KernelSVC':1}

In [4]:
model_accuracies

{'DT': 1,
 'KNN': 1,
 'KernelSVC': 1,
 'LinearSVC': 1,
 'LogReg': 1,
 'NB': 1,
 'RF': 1}

## Importing the Data

In [5]:
dataset = pd.read_csv("data_banknote_authentication.txt", header=None)

In [6]:
dataset.head()

Unnamed: 0,0,1,2,3,4
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [7]:
dataset.shape

(1372, 5)

In [8]:
dataset.columns = ["Variance", "Skewness", "Curtosis", "Entropy", "Class"]

In [9]:
dataset.head()

Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


## Creating X and Y

In [10]:
X = dataset[["Variance", "Skewness", "Curtosis", "Entropy"]]
X.shape

(1372, 4)

In [11]:
Y = dataset["Class"]
Y.shape

(1372,)

In [12]:
X.head()

Unnamed: 0,Variance,Skewness,Curtosis,Entropy
0,3.6216,8.6661,-2.8073,-0.44699
1,4.5459,8.1674,-2.4586,-1.4621
2,3.866,-2.6383,1.9242,0.10645
3,3.4566,9.5228,-4.0112,-3.5944
4,0.32924,-4.4552,4.5718,-0.9888


In [13]:
Y.head()

0    0
1    0
2    0
3    0
4    0
Name: Class, dtype: int64

## Split Data into Train and Test data

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state=4)

In [15]:
X_train.shape

(1029, 4)

In [16]:
X_test.shape

(343, 4)

In [17]:
Y_train.shape

(1029,)

In [18]:
Y_test.shape

(343,)

In [19]:
pd.DataFrame(pd.DataFrame(Y_train)['Class'].value_counts())

Unnamed: 0,Class
0,566
1,463


In [20]:
pd.DataFrame(pd.DataFrame(Y_test)['Class'].value_counts())

Unnamed: 0,Class
0,196
1,147


## KNN Classifier

In [21]:
from sklearn.neighbors import KNeighborsClassifier
clf_knn = KNeighborsClassifier(n_neighbors = 5)

In [22]:
clf_knn.fit(X_train, Y_train)
Y_pred_knn = clf_knn.predict(X_test)

In [23]:
cm_knn = confusion_matrix(Y_pred_knn, Y_test)
cm_knn

array([[196,   0],
       [  0, 147]], dtype=int64)

## Decision Tree Classifier

In [24]:
from sklearn.tree import DecisionTreeClassifier
clf_dt = DecisionTreeClassifier(criterion = "entropy")

In [25]:
clf_dt.fit(X_train, Y_train)
Y_pred_dt = clf_dt.predict(X_test)

In [26]:
cm_dt = confusion_matrix(Y_pred_dt, Y_test)
cm_dt

array([[193,   1],
       [  3, 146]], dtype=int64)

## Logistic Regression

In [27]:
from sklearn.linear_model import LogisticRegression
clf_lr = LogisticRegression()

In [28]:
clf_lr.fit(X_train, Y_train)
Y_pred_lr = clf_lr.predict(X_test)

In [29]:
cm_lr = confusion_matrix(Y_pred_lr, Y_test)
cm_lr

array([[195,   0],
       [  1, 147]], dtype=int64)

## Random Forest Classifier

In [30]:
from sklearn.ensemble import RandomForestClassifier

In [31]:
clf_rf = RandomForestClassifier(n_estimators = 10, criterion = "entropy", random_state = 0)
clf_rf.fit(X_train, Y_train)
Y_pred_rf = clf_rf.predict(X_test)

In [32]:
cm_rf = confusion_matrix(Y_pred_rf, Y_test)
cm_rf

array([[196,   0],
       [  0, 147]], dtype=int64)

## Naive Bayes Classifier

In [33]:
from sklearn.naive_bayes import GaussianNB
clf_nb = GaussianNB()

In [34]:
clf_nb.fit(X_train, Y_train)
Y_pred_nb = clf_nb.predict(X_test)

In [35]:
cm_nb = confusion_matrix(Y_pred_nb, Y_test)
cm_nb

array([[168,  25],
       [ 28, 122]], dtype=int64)

## SVC Linear

In [36]:
from sklearn.svm import SVC
clf_lsvc = SVC(kernel = "linear")

In [37]:
clf_lsvc.fit(X_train, Y_train)
Y_pred_lsvc = clf_lsvc.predict(X_test)

In [38]:
cm_lsvc = confusion_matrix(Y_pred_lsvc, Y_test)
cm_lsvc

array([[195,   0],
       [  1, 147]], dtype=int64)

## SVC Kernel

In [39]:
clf_ksvc = SVC(kernel = "rbf")
clf_ksvc.fit(X_train, Y_train)
Y_pred_ksvc = clf_ksvc.predict(X_test)

In [40]:
cm_ksvc = confusion_matrix(Y_pred_ksvc, Y_test)
cm_ksvc

array([[196,   0],
       [  0, 147]], dtype=int64)

## Check the Accuracy

In [41]:
model_accuracies

{'DT': 1,
 'KNN': 1,
 'KernelSVC': 1,
 'LinearSVC': 1,
 'LogReg': 1,
 'NB': 1,
 'RF': 1}

In [42]:
model_accuracies['DT'] = accuracy_score(Y_pred_dt, Y_test)
model_accuracies['KNN'] = accuracy_score(Y_pred_knn, Y_test)
model_accuracies['KernelSVC'] = accuracy_score(Y_pred_ksvc, Y_test)
model_accuracies['LinearSVC'] = accuracy_score(Y_pred_lsvc, Y_test)
model_accuracies['LogReg'] = accuracy_score(Y_pred_lr, Y_test)
model_accuracies['NB'] = accuracy_score(Y_pred_nb, Y_test)
model_accuracies['RF'] = accuracy_score(Y_pred_rf, Y_test)

In [43]:
model_accuracies

{'DT': 0.98833819241982512,
 'KNN': 1.0,
 'KernelSVC': 1.0,
 'LinearSVC': 0.99708454810495628,
 'LogReg': 0.99708454810495628,
 'NB': 0.84548104956268222,
 'RF': 1.0}