In [8]:
import csv
import random

from sklearn import svm
from sklearn.linear_model import Perceptron
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

# Read data in from file
with open("banknotes.csv") as f:
    reader = csv.reader(f)
    next(reader)

    data = []
    for row in reader:
        data.append({
            "evidence": [float(cell) for cell in row[:4]],
            "label": "Authentic" if row[4] == "0" else "Counterfeit"
        })

# Separate data into training and testing groups
holdout = int(0.40 * len(data))
random.shuffle(data)
testing = data[:holdout]
training = data[holdout:]

def run_model(model):
    # Train model on training set
    X_training = [row["evidence"] for row in training]
    y_training = [row["label"] for row in training]
    model.fit(X_training, y_training)

    # Make predictions on the testing set
    X_testing = [row["evidence"] for row in testing]
    y_testing = [row["label"] for row in testing]
    predictions = model.predict(X_testing)

    # Compute how well we performed
    correct = 0
    incorrect = 0
    total = 0
    for actual, predicted in zip(y_testing, predictions):
        total += 1
        if actual == predicted:
            correct += 1
        else:
            incorrect += 1

    # Print results
    print(f"Results for model {type(model).__name__}")
    print(f"Correct: {correct}")
    print(f"Incorrect: {incorrect}")
    print(f"Accuracy: {100 * correct / total:.2f}%")

In [4]:
model = Perceptron()
run_model(model)

Results for model Perceptron
Correct: 540
Incorrect: 8
Accuracy: 98.54%


In [5]:
model = svm.SVC()
run_model(model)

Results for model SVC
Correct: 546
Incorrect: 2
Accuracy: 99.64%


In [6]:
model = KNeighborsClassifier(n_neighbors=1)
run_model(model)

Results for model KNeighborsClassifier
Correct: 548
Incorrect: 0
Accuracy: 100.00%


In [7]:
model = KNeighborsClassifier(n_neighbors=3)
run_model(model)

Results for model KNeighborsClassifier
Correct: 548
Incorrect: 0
Accuracy: 100.00%


In [10]:
def train_test_split_model(model, split_fraction=0.5):
    # Separate data into training and testing groups
    evidence = [row["evidence"] for row in data]
    labels = [row["label"] for row in data]

    X_training, X_testing, y_training, y_testing = train_test_split(
        evidence, labels, test_size=split_fraction
    )

    # Fit model
    model.fit(X_training, y_training)

    # Make predictions on the testing set
    predictions = model.predict(X_testing)

    # Compute how well we performed
    correct = (y_testing == predictions).sum()
    incorrect = (y_testing != predictions).sum()
    total = len(predictions)

    # Print results
    print(f"Results for model {type(model).__name__}")
    print(f"Correct: {correct}")
    print(f"Incorrect: {incorrect}")
    print(f"Accuracy: {100 * correct / total:.2f}%")

In [11]:
train_test_split_model(Perceptron())

Results for model Perceptron
Correct: 676
Incorrect: 10
Accuracy: 98.54%


In [12]:
train_test_split_model(svm.SVC())

Results for model SVC
Correct: 683
Incorrect: 3
Accuracy: 99.56%


In [13]:
train_test_split_model(GaussianNB())

Results for model GaussianNB
Correct: 575
Incorrect: 111
Accuracy: 83.82%


In [14]:
train_test_split_model(KNeighborsClassifier(n_neighbors=5))

Results for model KNeighborsClassifier
Correct: 686
Incorrect: 0
Accuracy: 100.00%
