In [1]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
import pandas

random_state = 42
max_epochs = 100
batch_size = 2000

clfs = {"LR": LogisticRegressionCV(random_state=random_state, multi_class='auto', solver='liblinear', penalty='l1'),
        "SVM": SVC(random_state=random_state, kernel='rbf', gamma=1, probability=True),
        "RF": RandomForestClassifier(random_state=random_state, verbose=0, n_estimators=1000, n_jobs=-1),
        "NB": BernoulliNB(),
        "MLP": MLPClassifier(random_state=random_state, batch_size=batch_size, hidden_layer_sizes=(1000,),max_iter=100, activation='relu', solver='adam'),
        "Extra": ExtraTreesClassifier(random_state=random_state, n_estimators=1000, n_jobs=-1),
        "KNN": KNeighborsClassifier(n_neighbors=5, n_jobs=-1)
}

In [2]:
dataset_name = "ISOT"
y_train = pandas.read_csv("data/data_transformed" + "/" + dataset_name + "_y_train" + ".csv")
y_test = pandas.read_csv("data/data_transformed" + "/" + dataset_name + "_y_test" + ".csv")

key_y = '0'
if(key_y not in y_train.columns):
    key_y = 'LiarColumnNames.LABEL'

y_train = y_train[key_y].values
y_test = y_test[key_y].values

In [None]:
from os import listdir

path_files = "results/latents_combination_views/"

print(listdir(path_files))

list_files = list(set(listdir(path_files)))

In [None]:
from utils.classifiers_utils import *
from datetime import datetime
import os

path_result = "results/clfs_combination_views"
if not os.path.exists(path_result):
    os.makedirs(path_result)

files_alread_run = [i.replace(".csv","") for i in listdir(path_result)]

for file in list_files:
    if file in files_alread_run:
        continue
    
    print(file)
    x_train = pandas.read_csv(path_files + "/" + file + "_train.csv")
    x_test = pandas.read_csv(path_files + "/" + file + "_test.csv")

    results = []
    for clf_name, clf in clfs.items():
        print(" - Train " + clf_name + " -- " + str(datetime.now()))
        accuracy, precision, recall, fscore = train_models(clf, x_train, y_train, x_test, y_test)
        results.append([clf_name, accuracy, precision, recall, fscore])


    print("Saving results")
    pandas.DataFrame(results,
                    columns=["classifier", "accuracy", "precision",
                            "recall", "fscore"]).to_csv(path_result + "/" + file + ".csv")