In [None]:
import numpy as np
import pandas as pd
import os
from collections import defaultdict

### Globals

In [None]:
OUTPUT_PATH = 'outputs/'
FEATURES_PATH = 'features/'
ALGORITHMS = ['dp1', 'dp2', 'greedy', 'bf', 'bnb', 'cplex']
LEVELS = [1,2,3,4,5,6,7,8]
LEVELS_SIZE = [19198 ,9983, 6664, 4994, 3999, 3333, 2857, 2500]
FEATURES = ['num_elem',
            'cap',
            'cap_mean_w',
            'cap_median_w', 
            'cap_desv_w', 
            'mean_w_mean_v',
            'median_w_median_v', 
            'desv_w_desv_v',
            'max_w_min_w',
            'max_v_min_v',
            'mean_w',
            'median_w', 
            'desv_w', 
            'min_w', 
            'max_w', 
            'mean_v', 
            'median_v', 
            'desv_v', 
            'min_v', 
            'max_v',
            'p_coef' 
           ]

### Helpers

In [None]:
def get_features(lvl, id_):
    return np.array(open(FEATURES_PATH +str(lvl)+'/'+ str(id_),'r').read().split(), dtype=float)

def get_best(lvl, id_, t):
    best_fo = 0
    best_alg = 0
    best_time = 1000000
    for alg in ALGORITHMS:
        if lvl not in list(alg_data[alg].keys()): continue
        for res in alg_data[alg][lvl][id_]:
            if res[1] < t and res[0] >= best_fo:
                if res[1] < best_time:
                    best_alg = ALGORITHMS.index(alg)+1
                    best_time = res[1]
    return best_alg

def plot_value_array(i, predictions_array, true_label):
    predictions_array, true_label = predictions_array[i], true_label[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    thisplot = plt.bar(range(7), predictions_array, color="#777777")
    plt.ylim([0, 1]) 
    plt.xticks(np.arange(7), ('NO', *ALGORITHMS))
    predicted_label = np.argmax(predictions_array)

    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')

### Reading outputs

In [None]:
output_files = os.listdir(OUTPUT_PATH)
alg_data = defaultdict(list)

print("Faltan los siguientes resultados:")
for alg in ALGORITHMS:
    alg_data[alg] = defaultdict(list)
    for lvl in LEVELS:
        filename = alg + "_" + str(lvl)
        if filename not in output_files: # En caso de que todavía no hayan resultados
            print(alg, lvl)
            continue
        alg_data[alg][lvl] = defaultdict(list)
        file = open(OUTPUT_PATH+filename,'r')
        for row in file:
            id_, fo, time = row.split()
            alg_data[alg][lvl][int(id_)].append([(float)(fo), (float)(time)])

In [None]:
timesteps = np.logspace(1, 30, num=100, base=1.1)-1.1 # escala logaritmica para los tiempos

In [None]:
train_data = []
train_label = []

count = 0

for lvl in LEVELS:
    print("level "+str(lvl)+"...")
    for id_ in range(LEVELS_SIZE[lvl-1]):
        fts = get_features(lvl, id_)
        for t in timesteps:
            train_data.append([*fts, t+1])
            train_label.append(get_best(lvl, id_, t))
        count += 1

In [None]:
# comparing al ojo
lvl_ = 1
id_ = 100
for alg in ALGORITHMS:
    print(alg, alg_data[alg][lvl_][id_])
    
print('BEST: ',ALGORITHMS[get_best(lvl_,id_,1)-1])

In [None]:
data = pd.DataFrame(data=train_data, columns=[*FEATURES, 't'])

## NN

In [None]:
# TensorFlow and tf.keras# Tensor 
import tensorflow as tf
#from tensorflow import keras
#from keras.layers import Dropout, Dense
from keras import Sequential
from keras import optimizers
from keras.models import Model   
from keras.layers import *
import keras
# Helper libraries
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split

### Train and Test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, test_size=0.3, random_state=42)

### Saving

In [None]:
with open('X_train.csv', 'w') as FOUT:
    np.savetxt(FOUT, X_train)
with open('X_test.csv', 'w') as FOUT:
    np.savetxt(FOUT, X_test)
with open('y_train.csv', 'w') as FOUT:
    np.savetxt(FOUT, y_train)
with open('y_test.csv', 'w') as FOUT:
    np.savetxt(FOUT, y_test)

### Loading

In [None]:
y_train = np.loadtxt('y_train.csv', dtype=float)

In [None]:
X_train = np.loadtxt('X_train.csv', dtype=float)

### Model

In [None]:
model = Sequential([
    Dense(128, input_shape = (22,), activation='relu'),
    Dense(64, activation='relu'),
    Dropout(0.5), 
    Dense(64, activation='relu'),
    Dropout(0.5), 
    Dense(7, activation='softmax')
])

In [None]:
# sgd = optimizers.SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit(np.array(X_train), 
                    np.array(y_train), 
                    epochs=10, 
                    validation_split=0.3)

In [None]:
plt.plot(history.history['acc'])
# plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0, n_jobs=-1 ,verbose=1)
h = clf.fit(X_train, y_train)

In [None]:
clf.score(X_test, y_test)

In [None]:
pred = clf.predict(X_test)

In [None]:
from sklearn import svm

clfsvm = svm.LinearSVC()
clfsvm.fit(X_train, y_train)  

In [None]:
clfsvm.score(X_test, y_test)

### Evaluate Accuracy (test set)

In [None]:
X_test = np.loadtxt('X_test.csv', dtype=float)
y_test = np.loadtxt('y_test.csv', dtype=float)

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)

print('Test accuracy:', test_acc)

### Individual Predictions

In [None]:
# rojo predicho
# azul verdadero
i = 5
print(X_test[i])
print(pred[:10])
print(y_test[:10])
plt.figure()
plot_value_array(i, pred,  np.array(y_test, dtype=int))

### Confusion matrix

In [None]:
#best_predictions = np.argmax(predictions,axis=1)

cm = sklearn.metrics.confusion_matrix(np.array(y_test, dtype=int), pred)

fig, ax = plt.subplots()

ax.matshow(cm, cmap=plt.cm.Blues)

In [None]:
sample = np.array(X_test[0])
sample = (np.expand_dims(sample,0))
predictions_single = model.predict(sample)

print(predictions_single)

In [None]:
y_test[0]