In [1]:
# pip install scikit-learn
import csv
import random

from sklearn import svm
from sklearn.linear_model import Perceptron
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# Read data in from file
with open("banknotes.csv") as f:
    reader = csv.reader(f)
    next(reader)

    data = []
    for row in reader:
        data.append({
            "evidence": [float(cell) for cell in row[:4]],
            "label": "Authentic" if row[4] == "0" else "Counterfeit"
        })

In [3]:
# Separate data into training and testing groups
holdout = int(0.40 * len(data))
random.shuffle(data)
testing = data[:holdout]
training = data[holdout:]

In [4]:
# Model creation
model1 = Perceptron()
model2 = svm.SVC()
model3 = KNeighborsClassifier(n_neighbors=1)
model4 = GaussianNB()

In [5]:
# Train model on training set
X_training = [row["evidence"] for row in training]
y_training = [row["label"] for row in training]
model1.fit(X_training, y_training)
model2.fit(X_training, y_training)
model3.fit(X_training, y_training)
model4.fit(X_training, y_training)

GaussianNB()

In [6]:
# Make predictions on the testing set
X_testing = [row["evidence"] for row in testing]
y_testing = [row["label"] for row in testing]
predictions1 = model1.predict(X_testing)
predictions2 = model2.predict(X_testing)
predictions3 = model3.predict(X_testing)
predictions4 = model4.predict(X_testing)

In [7]:
# Compute how well we performed for perceptron
correct1 = 0
incorrect1 = 0
total1 = 0
for actual, predicted in zip(y_testing, predictions1):
    total1 += 1
    if actual == predicted:
        correct1 += 1
    else:
        incorrect1 += 1

# Print results
print(f"Results for model {type(model1).__name__}")
print(f"Correct: {correct1}")
print(f"Incorrect: {incorrect1}")
print(f"Accuracy: {100 * correct1 / total1:.2f}%")

Results for model Perceptron
Correct: 537
Incorrect: 11
Accuracy: 97.99%


In [8]:
# Compute how well we performed for SVM
correct2 = 0
incorrect2 = 0
total2 = 0
for actual, predicted in zip(y_testing, predictions2):
    total2 += 1
    if actual == predicted:
        correct2 += 1
    else:
        incorrect2 += 1

# Print results
print(f"Results for model {type(model2).__name__}")
print(f"Correct: {correct2}")
print(f"Incorrect: {incorrect2}")
print(f"Accuracy: {100 * correct2 / total2:.2f}%")

Results for model SVC
Correct: 544
Incorrect: 4
Accuracy: 99.27%


In [9]:
# Compute how well we performed for KNN
correct3 = 0
incorrect3 = 0
total3 = 0
for actual, predicted in zip(y_testing, predictions3):
    total3 += 1
    if actual == predicted:
        correct3 += 1
    else:
        incorrect3 += 1

# Print results
print(f"Results for model {type(model3).__name__}")
print(f"Correct: {correct3}")
print(f"Incorrect: {incorrect3}")
print(f"Accuracy: {100 * correct3 / total3:.2f}%")

Results for model KNeighborsClassifier
Correct: 548
Incorrect: 0
Accuracy: 100.00%


In [10]:
# Compute how well we performed for naive bayes
correct4 = 0
incorrect4 = 0
total4 = 0
for actual, predicted in zip(y_testing, predictions4):
    total4 += 1
    if actual == predicted:
        correct4 += 1
    else:
        incorrect4 += 1

# Print results
print(f"Results for model {type(model4).__name__}")
print(f"Correct: {correct4}")
print(f"Incorrect: {incorrect4}")
print(f"Accuracy: {100 * correct4 / total4:.2f}%")

Results for model GaussianNB
Correct: 471
Incorrect: 77
Accuracy: 85.95%
