In [0]:
import pandas as pd
import glob
import numpy as np
import cv2 as cv
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score

In [0]:
class RadialCompressorClassifier:

    def __init__(self,
                 clf=RandomForestClassifier(random_state=23, 
                                            n_estimators=100, 
                                            n_jobs=-1)):
        
        self.clf=RandomForestClassifier(random_state=23, 
                                        n_estimators=100, 
                                        n_jobs=-1)
        
        self.anomaly_csv_columns = {"LS": 1, 
                                    "BR": 2, 
                                    "SB": 3, 
                                    "DS": 4, 
                                    "A": 5, 
                                    "IR": 6, 
                                    "NUD":7}
        
        self.statistics = ["min", "max", "var", "median", "mean"]
        
        self.X = None
        self.y = None
    
    def load_training_data(self, 
                           anomaly, 
                           statistic, 
                           dirname="1d_data/", 
                           im_size=100, 
                           csv_filename="reflex.csv"):
        
        if statistic not in self.statistics:
            print("The chosen statistic: '" + statistic + "' is not supported.")
            print("Available compression statistics: " 
                  + ", ".join(self.statistics))
            return
        if anomaly not in self.anomaly_csv_columns.keys():
            print("Unrecognized anomaly: '" + anomaly + "'.")
            print("Avialable anomalies: " + self.anomalies.keys())
            return

        name = pd.read_csv(csv_filename).iloc[:, 0]
        file_names = [fn for fn in glob.glob(dirname 
                                             + "*" + str(im_size) 
                                             + "x" + str(im_size) 
                                             + statistic 
                                             + ".png")]
        print("Training on " + str(len(file_names)) + " images")
        join_attr = [fn[8:-15] for fn in file_names]
        X = [cv.imread(name, 0)[0][:45] for name in file_names]
        X = pd.DataFrame(X, index=join_attr)

        y_names = pd.read_csv(csv_filename).iloc[:, 0].str.slice(7, -4).values
        y = pd.read_csv(csv_filename).iloc[:, self.anomaly_csv_columns[anomaly]].values
        y = pd.DataFrame(y, index=y_names)
        X_joined = X.join(y, how="inner", rsuffix = "_labels")

        self.y = X_joined.loc[:, "0_labels"]
        self.X = X_joined.drop("0_labels", axis=1)
    
    def cross_val_score(self, 
                        cross_val_score_scoring="accuracy",
                        cross_val_score_cv=10):

        if self.X is None or self.y is None:
            print("Training data not loaded!")
            return
        
        return np.mean(cross_val_score(self.clf, 
                                       self.X, 
                                       self.y, 
                                       cv=cross_val_score_cv, 
                                       scoring=cross_val_score_scoring))

In [0]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

classifier_names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", 
                    "Gaussian Process", "Decision Tree", "Random Forest", 
                    "Neural Net", "AdaBoost", "Naive Bayes", "QDA"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

In [4]:
#print("Available anomalies: " + ", ".join(rcc.anomaly_csv_columns.keys()))
#print("Available statistics: " + ", ".join(rcc.statistics))
#print("Available classifiers: " + ", ".join(classifier_names))

chosen_statistics = ["min", "max", "var", "mean", "median"]
print("Chosen statistics: " + ", ".join(chosen_statistics))
print()

df = pd.DataFrame(np.nan, 
                  index=rcc.anomaly_csv_columns.keys(), 
                  columns=chosen_statistics)


for i in range(len(classifiers)):
    print()
    print(classifier_names[i])
    rcc = RadialCompressorClassifier(classifiers[i])
    for anomaly in rcc.anomaly_csv_columns.keys():
        row = []
        for stat in chosen_statistics:
            rcc.load_training_data(anomaly, stat)
            data = [rcc.cross_val_score(),
                    rcc.cross_val_score(cross_val_score_scoring="recall")]
            row.append("/".join([str(i) for i in data]))
        df.loc[anomaly] = row

    print(df)

Chosen statistics: min, max, var, mean, median



NameError: ignored