In [1]:
import sys
sys.path.append('..')

In [2]:
from utils.read import read_data, read_df
from model.naive_bayes import Bayes_Classifier

In [3]:
# Read data
train_landmarks = "../dataset/train_landmarks.csv"
test_landmarks = "../dataset/test_landmarks.csv"
train = read_df(train_landmarks)
test = read_df(test_landmarks)

In [4]:
input_sizes = [50, 100, 150, 200, 250, 300]
kernel_sizes = [1, 3, 5, 7, 9, 11]
kernel_decay_methods = ["distance", "none"]
prob_type = ["individual"] # "collective"

In [5]:
import pandas as pd

# Create a dataframe to store the results
columns = ["input_size", "prob_type", "kernel_size", "decay_method", "accuracy"]
results = pd.DataFrame(columns=columns)

In [6]:
for input_size in input_sizes:
    for prob in prob_type:
        for kernel_size in kernel_sizes:
            if kernel_size > 1:
                for dcm in kernel_decay_methods:
                    # Train the model
                    model = Bayes_Classifier("landmarks", input_size, input_size, prob, kernel_size, dcm)
                    model.train(train)

                    # Test the model
                    df = model.classify(test)
                    df["correct"] = df["label"] == df["predicted_label"]
                    accuracy = df["correct"].sum() / len(df)

                    results.loc[len(results)] = {
                        "input_size": input_size,
                        "prob_type": prob,
                        "kernel_size": kernel_size,
                        "decay_method": dcm,
                        "accuracy": accuracy
                    }
                    print("input_size: {}, prob_type: {}, kernel_size: {}, decay_method: {}, accuracy: {}".format(input_size, prob, kernel_size, dcm, accuracy))
            else:
                # Train the model
                model = Bayes_Classifier("landmarks", input_size, input_size, prob, kernel_size, "none")
                model.train(train)

                # Test the model
                df = model.classify(test)
                df["correct"] = df["label"] == df["predicted_label"]
                accuracy = df["correct"].sum() / len(df)

                results.loc[len(results)] = {
                    "input_size": input_size,
                    "prob_type": prob,
                    "kernel_size": kernel_size,
                    "decay_method": None,
                    "accuracy": accuracy
                }
                print("input_size: {}, prob_type: {}, kernel_size: {}, decay_method: {}, accuracy: {}".format(input_size, prob, kernel_size, None, accuracy))
            

input_size: 50, prob_type: individual, kernel_size: 1, decay_method: None, accuracy: 0.6092424685580579
input_size: 50, prob_type: individual, kernel_size: 3, decay_method: distance, accuracy: 0.6218192453933898
input_size: 50, prob_type: individual, kernel_size: 3, decay_method: none, accuracy: 0.5997367651360047
input_size: 50, prob_type: individual, kernel_size: 5, decay_method: distance, accuracy: 0.5703422053231939
input_size: 50, prob_type: individual, kernel_size: 5, decay_method: none, accuracy: 0.5340742907282832
input_size: 50, prob_type: individual, kernel_size: 7, decay_method: distance, accuracy: 0.5393389880081896
input_size: 50, prob_type: individual, kernel_size: 7, decay_method: none, accuracy: 0.4982451009066979
input_size: 50, prob_type: individual, kernel_size: 9, decay_method: distance, accuracy: 0.5226674466218193
input_size: 50, prob_type: individual, kernel_size: 9, decay_method: none, accuracy: 0.47104416496051477
input_size: 50, prob_type: individual, kernel_s

In [7]:
results.head(len(results))

Unnamed: 0,input_size,prob_type,kernel_size,decay_method,accuracy
0,50,individual,1,,0.609242
1,50,individual,3,distance,0.621819
2,50,individual,3,none,0.599737
3,50,individual,5,distance,0.570342
4,50,individual,5,none,0.534074
...,...,...,...,...,...
61,300,individual,7,none,0.690992
62,300,individual,9,distance,0.716145
63,300,individual,9,none,0.711612
64,300,individual,11,distance,0.720971


In [8]:
print("Best accuracy: {}".format(results["accuracy"].max()))
results[results["accuracy"] == results["accuracy"].max()]

Best accuracy: 0.7221409768938286


Unnamed: 0,input_size,prob_type,kernel_size,decay_method,accuracy
51,250,individual,9,distance,0.722141


In [9]:
# Read image data
train_dir = "../dataset/train_binary"
test_dir = "../dataset/test_binary"
features, labels = read_data(train_dir, flatten=1, grayscale=1, resize=(30, 30))
test_features, test_labels = read_data(test_dir, flatten=1, grayscale=1, resize=(30, 30))

In [10]:
naive_bayes = Bayes_Classifier(feature_type='pixels')
naive_bayes.train((features, labels))
preds = naive_bayes.classify((test_features, test_labels))
# calculate accuracy
correct = 0
for i in range(len(preds)):
    if preds[i] == test_labels[i]:
        correct += 1
print("Accuracy: {}".format(correct / len(preds)))

Accuracy: 0.11860193038900263


In [11]:
train_dir = "../dataset/train"
test_dir = "../dataset/test"
features, labels = read_data(train_dir, flatten=1, grayscale=1, resize=(30, 30))
test_features, test_labels = read_data(test_dir, flatten=1, grayscale=1, resize=(30, 30))

In [12]:
naive_bayes = Bayes_Classifier(feature_type='pixels')
naive_bayes.train((features, labels))
preds = naive_bayes.classify((test_features, test_labels))
# calculate accuracy
correct = 0
for i in range(len(preds)):
    if preds[i] == test_labels[i]:
        correct += 1
print("Accuracy: {}".format(correct / len(preds)))

Accuracy: 0.1335185726820708
