In [1]:
import numpy as np
import time
import os
import cv2

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction import FeatureHasher
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier

In [2]:
import sys

print(sys.executable)

/nethome/jbang36/anaconda3/envs/eva_35/bin/python


In [3]:
# Set the dataset paths
os.getcwd()
home_dir = os.path.abspath('../../')
data_dir = os.path.join(home_dir, 'data', 'ua_detrac', 'small-data')
data_dir

home_dir

filter_path = os.path.join(home_dir, 'filters')
loader_path = os.path.join(home_dir, 'loaders')

sys.path.append(home_dir)
sys.path.append(loader_path)
sys.path.append(filter_path)

In [4]:
from loaders.load import Load

# Load data effeciently using the loader
start_time = time.time()
load = Load()

eva_dir = home_dir
train_image_dir = os.path.join(eva_dir, "data", "ua_detrac", "small-data")
train_anno_dir = os.path.join(eva_dir, "data", "ua_detrac", "small-annotations")

X_train, length_per_mvi = load.load_images(train_image_dir)
Y_train_dict = load.load_XML(train_anno_dir, X_train, length_per_mvi)

# Assertions for data dimensions
print("Starting assertions...")
assert (len(X_train.shape) == 4)  # n_samples, width, height, channels
assert (len(Y_train_dict) == 4)  # vehicle_type, color, intersection, speed
assert (X_train.shape[0] == len(Y_train_dict['color']))  # number of frames should be same
assert (X_train.shape[0] == len(Y_train_dict['vehicle_type']))
assert (X_train.shape[0] == len(Y_train_dict['intersection']))
assert (X_train.shape[0] == len(Y_train_dict['speed']))
print("Done with test!")
print("Total time to load small-data is", time.time() - start_time, "seconds")

image directory is  /home/jbang36/eva/data/ua_detrac/small-data
Length of file_names: 10421
mvi length: 664
car_labels length: 664
mvi length: 1600
car_labels length: 1600
mvi length: 2037
car_labels length: 2037
mvi length: 2821
car_labels length: 2821
mvi length: 3621
car_labels length: 3621
mvi length: 4421
car_labels length: 4421
mvi length: 5327
car_labels length: 5327
mvi length: 6021
car_labels length: 6021
mvi length: 6821
car_labels length: 6821
mvi length: 7621
car_labels length: 7621
mvi length: 8421
car_labels length: 8421
mvi length: 9221
car_labels length: 9221
mvi length: 10421
car_labels length: 10421
Starting assertions...
Done with test!
Total time to load small-data is 52.74796104431152 seconds


In [24]:
class PP:
    def __init__(self):

        self.model_library = {"pca_svm": self._svm,
                              "dnn": self._dnn,
                              "pca_rf": self._rf} #KDE, SVM, NN - this should be a mapping of model name to model CONSTRUCTOR

        self.pre_data = {} #processed data through preprocessing methods
        self.validation_set = []
        self.trained_pre = {}
        self.trained_post = {}
        self.pre_stats = {}
        self.column_stats = {} #ex: {'pca_svm':[C value, Threshold Value, R value]}
        labels = {"vehicle_type": ["car", "van", "bus", "others"],
                      "color": ["red", "white", "black", "silver"],
                      "speed": ["s>40", "s>50", "s>60", "s<65", "s<70"],
                      "intersection": ["pt335", "pt211", "pt342", "pt208"]}


    def _generate_binary_labels(self, Y_dict, n_samples):
        """
        Example label dict is going to be {"car": [0,0,0,1,0,0,0.....],"others": [0,1,0,0,0...}
        :param Y_dict: dictionary for all the label categories
        :return: column names and a matrix that contains all the binary values
        """
        Y_names = ["t=car", "t=van", "t=bus", "t=others", "c=red", "c=white", "c=black", "c=silver",
                   "s>40", "s>50", "s>60", "s<65", "s<70", "i=pt335", "i=pt211", "i=pt342", "i=pt208",
                   "o=pt335", "o=pt211", "o=pt342", "o=pt208"]

        Y_table = np.zeros(shape=(n_samples,
                                  len(Y_names)))

        print(Y_table.shape)

        for column_name in Y_dict:
            for frameid, frame_content in enumerate(Y_dict[column_name]):
                if frame_content == None:
                    continue

                if column_name == "speed":
                    for speed_data in frame_content:
                        assert(type(speed_data) == float or type(speed_data) == int)
                        if speed_data > 40:
                            Y_table[frameid][Y_names.index("s>40")] = 1
                        if speed_data > 50:
                            Y_table[frameid][Y_names.index("s>50")] = 1
                        if speed_data > 60:
                            Y_table[frameid][Y_names.index("s>60")] = 1
                        if speed_data < 65:
                            Y_table[frameid][Y_names.index("s<65")] = 1
                        if speed_data < 70:
                            Y_table[frameid][Y_names.index("s<70")] = 1

                elif column_name == "intersection":
                    for intersection_data in frame_content:
                        assert(type(intersection_data) == str)
                        Y_table[frameid][Y_names.index("i="+intersection_data)] = 1
                        Y_table[frameid][Y_names.index("o="+intersection_data)] = 1

                elif column_name == "vehicle_type":
                    for vehicle_data in frame_content:
                        assert(type(vehicle_data) == str)
                        Y_table[frameid][Y_names.index("t="+vehicle_data)] = 1

                else:
                    assert(column_name == "color")
                    for color_data in frame_content:
                        assert(type(color_data) == str)
                        Y_table[frameid][Y_names.index("c="+color_data)] = 1

        print(Y_names)
        print(Y_table[:10])
        return Y_names, Y_table

    def _reshape_image(self, X, sampling_rate = 8):
        """
        :param X: Input images
        :param sampling_rate: The reduction rate
        :return: the reshaped images
        """
        print("before:", X.shape)
        reduction_rate = sampling_rate
        #need to down shape them so that the kernels can train faster
        #image should be num_samples, height, width, channel
        downsampled_images = X[:,::reduction_rate,::reduction_rate,:]
        nsamples, nx, ny, nc = downsampled_images.shape
        reshaped_images = downsampled_images.reshape((nsamples, nx * ny * nc))
        print("After change, shape of image is", reshaped_images.shape)
        return reshaped_images


    def _split_train_val(self, X, Y_table):
        """
        Split the given training data to training and valiation set
        :param X: Train images
        :param Y_table: Train labels
        :return: Train/Val images, Train/Val labels
        """
        n_samples = len(X)
        X_train = X[:int(n_samples * 0.8)]
        X_test = X[int(n_samples * 0.8):]
        Y_train = Y_table[:int(n_samples * 0.8)]
        Y_test = Y_table[int(n_samples * 0.8):]

        assert(len(X_train) == len(Y_train))
        assert(len(X_test) == len(Y_test))
        assert(len(X_train) + len(X_test) == len(X))

        print("X_train shape", X_train.shape)
        print("X_val shape", X_test.shape)
        print("Y_train shape", Y_train.shape)
        print("Y_vale shaep", Y_test.shape)
        return X_train, X_test, Y_train, Y_test


    #TODO
    def train(self, image_matrix, Y_dict):
        """
        :param image_matrix: Input images
        :param Y_dict: Labels for images
        """
        n_samples = len(image_matrix)
        print("Generating binary labels...")
        t1 = time.time()
        Y_names, Y_table = self._generate_binary_labels(Y_dict, n_samples)
        print("Done in", time.time() - t1, "seconds")
        
        print("reshaping images...")
        t1 = time.time()
        image_reshaped = self._reshape_image(image_matrix)
        print("Done in", time.time() - t1, "seconds")
        print("Splitting train / val...")
        t1 = time.time()
        X_train, X_val, Y_train, Y_val = self._split_train_val(image_reshaped, Y_table)
        print("Done in", time.time() - t1, "seconds")
        self.validation_set = [X_val, Y_names, Y_val]
        print("Starting preprocessing...")
        t1 = time.time()
        self._preprocess(X_train)
        print("Done in", time.time() - t1, "seconds")
        
        print("Starting processing...")
        t1 = time.time()
        self._process(X_train, Y_names, Y_train)
        print("Done in", time.time() - t1, "seconds")
        

    def _preprocess(self, X_train):
        """
        Preprocess the training data by applying PCA
        :param X_train: Training images
        """
        X_train_processed = self._pca(X_train)
        self.pre_data['pca'] = X_train_processed
        
        print("X_train shape: ", X_train.shape)
        print("X_train_pca shape: ", X_train_processed.shape)
        assert(X_train.shape[0] == X_train_processed.shape[0])
        assert(len(X_train.shape) == 2)
        assert(len(X_train_processed.shape) == 2)
        assert(X_train_processed.shape[1] < X_train.shape[1])
        
        

    def _process(self, X, Y_names, Y_table):
        for model_name, model_func in self.model_library.items():
            if 'pca' in model_name:
                assert('pca' in model_name)
                print("Full Model Name:", model_name, "using pca'd X_train")
                model_func(self.pre_data['pca'], Y_names, Y_table)
            else:
                print("Full Model Name:", model_name, "using regular X_train")
                model_func(X, Y_names, Y_table)

    #TODO
    def get_reduction(self, target_a):
        """
        Post processing step - will get the target_a from the query optimizer / query parser
        :param target_a: target accuracy; a number between 0 and 1
        :return: relevant statistics such as c, r values for utilization calculation
        """
        X_val, Y_names, Y_val = self.validation_set
        assert(len(X_val) == len(Y_val))
        assert(len(Y_names) == len(Y_val[0]))

        X_val_processed = self.trained_pre['pca'].transform(X_val)
        for Y_col in self.trained_post:
            print(Y_col)
            for model_name, model in self.trained_post[Y_col].items():
                print("  Model name:", model_name)
                if 'pca' in model_name:
                    predict_output = model.predict_proba(X_val_processed)
                    print(predict_output.shape)
                    if predict_output.shape[1] == 2:
                        probabilities = model.predict_proba(X_val_processed)[:, 1]
                    else:
                        probabilities = model.predict_proba(X_val_processed)[:, 0]
                else:
                    print(predict_output.shape)
                    if predict_output.shape[1] == 2:
                        probabilities = model.predict_proba(X_val)[:, 1]
                    else:
                        probabilities = model.predict_proba(X_val)[:, 0]
    
            index = Y_names.index(Y_col)
            th_, r_ = self._search_th_max(probabilities, Y_val[:, index], target_a)
            tup = (target_a, th_, r_)
            self.column_stats[Y_col][model_name].append(tup)


        return self.column_stats

    def _search_th_max(self, f_x, y, target_a):
        th_ = 0
        step_size = 0.01
        left_side = len(f_x[f_x > th_]) / len(y[y == 1])

        while left_side >= target_a:
            th_ += step_size
            left_side = len(f_x[f_x > th_]) / len(y[y == 1])

        th_ -= step_size
        r_ = 1 - len(f_x[f_x > th_]) / len(y)

        return th_, r_


    def predict(self, X_test, column_name, model_name):
        """
        The prediction function
        :param X_test: Test Set
        :param column_name: The column to use
        :param model_name: The model to use
        :return y_final: The predicted labels.
        """
        X_test_reduced = self._reshape_image(X_test)
        model = self.column_library[column_name][model_name]

        if 'pca' in model_name:
            X_test_processed = self.trained_pre['pca'].transform(X_test_reduced)
            y_hat = model.predict_proba(X_test_processed)
        else:
            y_hat = model.predict_proba(X_test)

        th_ = self.column_stats[column_name][1]
        y_final = y_hat > th_
        return y_final
    


#############################################################################################
################################## MODELS ###################################################
#############################################################################################

  #random forest
    def _rf(self, X, Y_names, Y_table):
        """
        Random Forest Model
        :param X:Input Features
        :param Y_names: Output column names
        :param Y_table: Output columns values
        """
        for idx, Y_column in enumerate(Y_names):
            tic = time.time()
            rf = RandomForestClassifier(max_depth=2, random_state=0)
            rf.fit(X, Y_table[:,idx])
            if Y_column not in self.trained_post:
                self.trained_post[Y_column] = {}
            self.trained_post[Y_column]['pca_rf'] = rf

            if Y_column not in self.column_stats:
                self.column_stats[Y_column] = {}
            self.column_stats[Y_column]["pca_rf"] = [round(time.time() - tic + self.pre_stats['pca'][0], 2)]   #
            print("rf finished training for column", Y_column, "in", time.time() - tic, "seconds")


    def _dnn(self, X, Y_names, Y_table):
        """
        Deep Neural Network Model
        :param X:Input Features
        :param Y_names: Output column names
        :param Y_table: Output columns values
        """
        for idx, Y_col in enumerate(Y_names):
            tic = time.time()
            dnn = MLPClassifier(solver='lbfgs', alpha=1e-5,
                              hidden_layer_sizes = (5, 2), random_state = 1)
            dnn.fit(X, Y_table[:, idx])
            if Y_col not in self.trained_post:
                self.trained_post[Y_col] = {}
            self.trained_post[Y_col]['dnn'] = dnn

            if Y_col not in self.column_stats:
                self.column_stats[Y_col] = {}
            self.column_stats[Y_col]["dnn"] = [round(time.time() - tic , 2) ]
            print("dnn finished training for column", Y_col, "in", time.time() - tic, "seconds")
            return


    def _svm(self, X, Y_names, Y_table):
        """
        SVM Model
        :param X:Input Features
        :param Y_names: Output column names
        :param Y_table: Output columns values
        """
        n_samples, n_columns = Y_table.shape
        assert(len(Y_names) == n_columns)

        for idx, Y_col in enumerate(Y_names):
            tic = time.time()
            if len(np.unique(Y_table[:, idx])) == 1:
                print("All the labels are same for column", Y_col)
                continue
            else:
                print("Training for", Y_col)
                svm = SVC(kernel = 'linear', probability = True, max_iter = 10000)
                svm.fit(X, Y_table[:, idx])
                if Y_col not in self.trained_post:
                    self.trained_post[Y_col] = {}
                self.trained_post[Y_col]['pca_svm'] = svm

                if Y_col not in self.column_stats:
                    self.column_stats[Y_col] = {}
                self.column_stats[Y_col]["pca_svm"] = [round(time.time() - tic + self.pre_stats['pca'][0],2)]
            print("svm finished training for column", Y_col, "in", time.time() - tic, "seconds")
        return




    def _pca(self, X):
        """
        Applies PCA
        :param X: Input Features
        :return X_new: PCA-transformed features
        """
        tic = time.time()
        pca = PCA(n_components = 9)
        X_new = pca.fit_transform(X)
        self.trained_pre['pca'] = pca
        print("time it took to train pca:", time.time() - tic, "seconds")
        self.pre_stats['pca'] = [round(time.time() - tic, 2)]

        return X_new



In [25]:
pp = PP()
pp.train(X_train, Y_train_dict)


Generating binary labels...
(10421, 21)
['t=car', 't=van', 't=bus', 't=others', 'c=red', 'c=white', 'c=black', 'c=silver', 's>40', 's>50', 's>60', 's<65', 's<70', 'i=pt335', 'i=pt211', 'i=pt342', 'i=pt208', 'o=pt335', 'o=pt211', 'o=pt342', 'o=pt208']
[[1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1.]
 [1. 0. 0. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1.]
 [1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0.]
 [1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0.]
 [1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0. 1.]
 [1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1.]
 [1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 1. 1. 0. 0.]]
Done in 0.7404599189758301 seconds
reshaping images...
before: (10421, 540, 960, 1)
Afte



svm finished training for column t=car in 1.1358530521392822 seconds
Training for t=van




svm finished training for column t=van in 3.154667615890503 seconds
Training for t=bus




svm finished training for column t=bus in 2.577979803085327 seconds
Training for t=others




svm finished training for column t=others in 1.790062427520752 seconds
Training for c=red




svm finished training for column c=red in 3.3205294609069824 seconds
Training for c=white




svm finished training for column c=white in 3.118685007095337 seconds
Training for c=black




svm finished training for column c=black in 3.014866828918457 seconds
Training for c=silver




svm finished training for column c=silver in 2.925121545791626 seconds
Training for s>40




svm finished training for column s>40 in 3.819061756134033 seconds
Training for s>50




svm finished training for column s>50 in 3.619727373123169 seconds
Training for s>60




svm finished training for column s>60 in 3.414785861968994 seconds
All the labels are same for column s<65
All the labels are same for column s<70
Training for i=pt335




svm finished training for column i=pt335 in 3.043963670730591 seconds
Training for i=pt211




svm finished training for column i=pt211 in 2.955667734146118 seconds
Training for i=pt342




svm finished training for column i=pt342 in 2.97627854347229 seconds
Training for i=pt208




svm finished training for column i=pt208 in 3.0167746543884277 seconds
Training for o=pt335




svm finished training for column o=pt335 in 3.0129618644714355 seconds
Training for o=pt211




svm finished training for column o=pt211 in 3.003312826156616 seconds
Training for o=pt342




svm finished training for column o=pt342 in 2.985189437866211 seconds
Training for o=pt208




svm finished training for column o=pt208 in 3.009824514389038 seconds
Full Model Name: dnn using regular X_train
dnn finished training for column t=car in 1.3862378597259521 seconds
Full Model Name: pca_rf using pca'd X_train
rf finished training for column t=car in 0.0446171760559082 seconds
rf finished training for column t=van in 0.043602705001831055 seconds
rf finished training for column t=bus in 0.043993473052978516 seconds
rf finished training for column t=others in 0.04417705535888672 seconds




rf finished training for column c=red in 0.04583883285522461 seconds
rf finished training for column c=white in 0.06354665756225586 seconds
rf finished training for column c=black in 0.06382322311401367 seconds
rf finished training for column c=silver in 0.04028940200805664 seconds




rf finished training for column s>40 in 0.04992413520812988 seconds
rf finished training for column s>50 in 0.04059767723083496 seconds
rf finished training for column s>60 in 0.040235280990600586 seconds
rf finished training for column s<65 in 0.020154237747192383 seconds
rf finished training for column s<70 in 0.011635541915893555 seconds
rf finished training for column i=pt335 in 0.04024362564086914 seconds
rf finished training for column i=pt211 in 0.04075050354003906 seconds




rf finished training for column i=pt342 in 0.05316162109375 seconds
rf finished training for column i=pt208 in 0.040895938873291016 seconds
rf finished training for column o=pt335 in 0.06205391883850098 seconds
rf finished training for column o=pt211 in 0.04056835174560547 seconds
rf finished training for column o=pt342 in 0.04317069053649902 seconds




rf finished training for column o=pt208 in 0.04070639610290527 seconds
Done in 58.21159052848816 seconds


In [26]:
print(pp.trained_post)

{'t=car': {'pca_svm': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=10000, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False), 'dnn': MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(5, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False), 'pca_rf': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=2, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
      

In [27]:
target_a1 = 0.8
target_a2 = 0.9
stats1 = pp.get_reduction(target_a1)
print(stats1)
print("---------------------------------")
stats2 = pp.get_reduction(target_a2)
print(stats2)




t=car
  Model name: pca_svm
(2085, 2)
  Model name: dnn
(2085, 2)
  Model name: pca_rf
(2085, 2)
t=van
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
t=bus
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
t=others
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
c=red
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
c=white
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
c=black
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
c=silver
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
s>40
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
s>50
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
s>60
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
i=pt335
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
i=pt211
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(2085, 2)
i=pt342
  Model name: pca_svm
(2085, 2)
  Model name: pca_rf
(20

In [None]:
##############################################################################

n_samples = len(X_train)
print("Generating binary labels...")
t1 = time.time()
Y_names, Y_table = pp._generate_binary_labels(Y_train_dict, n_samples)
print("Done in", time.time() - t1, "seconds")

print("reshaping images...")
t1 = time.time()
image_reshaped = pp._reshape_image(X_train)
print("Done in", time.time() - t1, "seconds")
print("Splitting train / val...")
t1 = time.time()
X_train_, X_val, Y_train, Y_val = pp._split_train_val(image_reshaped, Y_table)
print("Done in", time.time() - t1, "seconds")
validation_set = [X_val, Y_names, Y_val]
print("Starting preprocessing...")
t1 = time.time()
pp._preprocess(X_train_)
print("Done in", time.time() - t1, "seconds")

In [None]:
pp._process(X_train_, Y_names, Y_train)

Full Model Name: pca_svm using pca'd X_train
svm finished training for column t=car in 28.957067728042603 seconds


In [9]:
import time
arr = []

X_train_processed = pp._pca(X_train_)
X_val_processed = pp._pca(X_val)


time it took to train pca: 1.3275830745697021 seconds
time it took to train pca: 0.3375887870788574 seconds


In [11]:
for idx, Y_col in enumerate(Y_names):
    if len(np.unique(Y_train[:,idx])) == 1:
        print("Column ", Y_col, "only has 1 value, skipping")
        continue
    tic = time.time()
    print("Working on col", Y_col)
    svm = SVC(kernel = 'linear', probability = True, max_iter = 10000)
    svm.fit(X_train_processed, Y_train[:, idx])
    print("Finished training ", Y_col, "in", time.time() - tic, "seconds")
    score = svm.score(X_val_processed, Y_val[:, idx])
    print("Eval score for ", Y_col,"is", score)
    arr.append(svm)
    
for idx, Y_col in enumerate(Y_names):
    if len(np.unique(Y_train[:,idx])) == 1:
        print("Column ", Y_col, "only has 1 value, skipping")
        continue
    tic = time.time()
    print("Working on col", Y_col)
    rf = RandomForestClassifier(max_depth=2, random_state=0)
    rf.fit(X_train_processed, Y_train[:,idx])
    print("Finished training ", Y_col, "in", time.time() - tic, "seconds")
    score = rf.score(X_val_processed, Y_val[:, idx])
    print("Eval score for ", Y_col,"is", score)
    arr.append(rf)
    
for idx, Y_col in enumerate(Y_names):
    if len(np.unique(Y_train[:,idx])) == 1:
        print("Column ", Y_col, "only has 1 value, skipping")
        continue
    tic = time.time()
    print("Working on col", Y_col)
    dnn = MLPClassifier(solver='lbfgs', alpha=1e-5,
                          hidden_layer_sizes = (5, 2), random_state = 1, )
    dnn.fit(X_train_, Y_train[:, idx])
    print("Finished training ", Y_col, "in", time.time() - tic, "seconds")
    score = dnn.score(X_val, Y_val[:, idx])
    print("Eval score for ", Y_col,"is", score)
    arr.append(dnn)

Working on col t=car




Finished training  t=car in 1.3056917190551758 seconds
Eval score for  t=car is 0.8776978417266187
Working on col t=van




Finished training  t=van in 3.076890468597412 seconds
Eval score for  t=van is 0.5625899280575539
Working on col t=bus




Finished training  t=bus in 2.6166961193084717 seconds
Eval score for  t=bus is 0.6997601918465228
Working on col t=others




Finished training  t=others in 1.8666341304779053 seconds
Eval score for  t=others is 0.8345323741007195
Working on col c=red




Finished training  c=red in 2.9099578857421875 seconds
Eval score for  c=red is 0.38369304556354916
Working on col c=white




Finished training  c=white in 2.8886494636535645 seconds
Eval score for  c=white is 0.5515587529976019
Working on col c=black




Finished training  c=black in 3.0476584434509277 seconds
Eval score for  c=black is 0.4839328537170264
Working on col c=silver




Finished training  c=silver in 2.910611391067505 seconds
Eval score for  c=silver is 0.47290167865707433
Working on col s>40




Finished training  s>40 in 3.603717803955078 seconds
Eval score for  s>40 is 0.5059952038369304
Working on col s>50




Finished training  s>50 in 3.5613009929656982 seconds
Eval score for  s>50 is 0.49976019184652276
Working on col s>60




Finished training  s>60 in 3.2217729091644287 seconds
Eval score for  s>60 is 0.543884892086331
Column  s<65 only has 1 value, skipping
Column  s<70 only has 1 value, skipping
Working on col i=pt335




Finished training  i=pt335 in 2.8544254302978516 seconds
Eval score for  i=pt335 is 0.5328537170263788
Working on col i=pt211




Finished training  i=pt211 in 2.925044298171997 seconds
Eval score for  i=pt211 is 0.49016786570743404
Working on col i=pt342




Finished training  i=pt342 in 2.860210657119751 seconds
Eval score for  i=pt342 is 0.6508393285371703
Working on col i=pt208




Finished training  i=pt208 in 2.8574297428131104 seconds
Eval score for  i=pt208 is 0.539568345323741
Working on col o=pt335




Finished training  o=pt335 in 2.8611817359924316 seconds
Eval score for  o=pt335 is 0.5328537170263788
Working on col o=pt211




Finished training  o=pt211 in 2.9083049297332764 seconds
Eval score for  o=pt211 is 0.49016786570743404
Working on col o=pt342




Finished training  o=pt342 in 2.9208898544311523 seconds
Eval score for  o=pt342 is 0.6508393285371703
Working on col o=pt208
Finished training  o=pt208 in 2.845266819000244 seconds
Eval score for  o=pt208 is 0.539568345323741


