In [None]:
import numpy
import sklearn.model_selection
import sklearn.naive_bayes
import sklearn.svm
import pandas
import os

def get_list_of_dir_elements(path):
    return list(os.popen('ls "{0}"'.format(path)).read().split())

def get_dataframe(path):
    return pandas.read_csv(path, 
                           header=None,
                           names=['Frequency', 'Amplitude'])

def check_if_dataframes_are_equal(dataframe1, dataframe2):
    return not(False in (dataframe1 == dataframe2).values)

def get_samples_paths():
    return list(os.popen('ls -d FFT_EXTRACTIONS/*/*/*').read().split())

def get_dataset(samples_path):
    phone_types = get_list_of_dir_elements(samples_path)
    dataset_list = []

    for phone_type_index in range(len(phone_types)):
        phone_type = phone_types[phone_type_index]
        print('Fetching the samples for {0}'.format(phone_type))
        samples = get_list_of_dir_elements(samples_path + '/' + phone_type)
        for sample in samples:
            dataframe = get_dataframe(samples_path + '/' + phone_type + '/' + sample)
            extracted_array = dataframe['Amplitude'].to_numpy()
            dataset_list.append(numpy.append(dataframe['Amplitude'].to_numpy(), phone_type_index))

    return numpy.array(dataset_list)

def apply_classifier(dataset_array, classifier_method):
    dataset_features = dataset_array[:,:-1]
    dataset_labels = dataset_array[:, -1]

    features_train, features_test, labels_train, labels_test = sklearn.model_selection.train_test_split(
        dataset_features,
        dataset_labels,
        shuffle=True
    )

    classifier = classifier_method()
    classifier.fit(features_train, labels_train) 
    predicted_labels = classifier.predict(features_test)


    unique, counts = numpy.unique((predicted_labels == labels_test), return_counts=True)
    accuracy_dict = dict(zip(unique, counts))

    if False in accuracy_dict.keys():
        return accuracy_dict[True] / (accuracy_dict[True] + accuracy_dict[False])
    else:
        return 1.00
    
def apply_baise_naive_on_dataset(dataset_array):
    return apply_classifier(dataset_array, sklearn.naive_bayes.GaussianNB)

def apply_support_machine_vectors_on_dataset(dataset_array):
    return apply_classifier(dataset_array, sklearn.svm.SVC)

In [None]:
# Get all the datasets
samples_paths = get_samples_paths()
all_datasets = dict()

for path in samples_paths:
    print('Fetching dataset for the current path: {0}'.format(path))
    all_datasets[path] = get_dataset(path)

In [None]:
# Apply Bayes Naive on all the datasets
print('Bayes Naive Implementation')
for key, value in all_datasets.items():
    print('{0} : {1}'.format(key, apply_baise_naive_on_dataset(value)))

In [None]:
# Apply Support Vector Machines on all the datasets
print('Support Vector Machines Implementation')
for key, value in all_datasets.items():
    print('{0} : {1}'.format(key, apply_support_machine_vectors_on_dataset(value)))