<a href="https://colab.research.google.com/github/claudiosegala/Monografia/blob/master/code/tcc_plot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import google as g # To connect with google drive
g.colab.drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
!pip install matplotlib
!pip install pandas
!pip install numpy



In [0]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [0]:
PATH = '/content/drive/My Drive/TCC/'

In [0]:
def plot_history (history, name):
  """ Plot of History
  
  Plot the history of loss in the training session of a model
  
  Arguments:
    history: the history returned by Keras fit of a model
    name: the name of the model
  """
  
  path = f"{PATH}plots/history/{name}"
  
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title(name + ' Model Loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.rcdefaults()
  
  plt.savefig(path + ".png", bbox_inches='tight')
  # plt.savefig(path + ".pdf")
  
  plt.close('all')

In [0]:
def plot_prediction (Y, Y_hat, title):
  """ Plot Prediction
  
  Plot the prediction (Flow x Time) of what was expected and what
  was predicted.
  """

  for i in range(len(Y)):
    name = f"{title} ({str(i+1).zfill(2)} of {len(Y)})"
    path = f"{PATH}plots/prediction/{name}"
    
    plt.plot(Y[i])
    plt.plot(Y_hat[i])
    plt.title(title + 'Prediction')
    plt.ylabel('Flow')
    plt.xlabel('Time')
    plt.legend(['actual', 'prediction'], loc='upper left')
    plt.rcdefaults()

    plt.savefig(path + ".png", bbox_inches='tight')
    # plt.savefig(path + ".pdf")

    plt.close('all')

In [0]:
def plot_precision_bucket (results):
  """ Plot Precision Bucket 
  
  Plot a stack box graph of the precision mesuared by the buckets.
  """
  
  path = f"{PATH}plots/precision"
  N = len(results)
  ind = np.arange(N)    # the x locations for the groups
  width = 0.35       # the width of the bars: can also be len(x) sequence
  pre, bott = [], []
  models = list(results.keys())
  n_buckets = len(results[models[0]]['PRE'])
    
  for i in range(n_buckets):
    pre.append([v["PRE"][i] for v in results.values()])
    
    if i == 0:
      bott.append([0] * N)
    else:
      bott.append([bott[i-1][j] + pre[i-1][j]  for j in range(N)])
  
  p, leg_lin, leg_lab = [], [], []
  
  for i in range(n_buckets):
    _p = plt.bar(ind, tuple(pre[i]), width, bottom=tuple(bott[i]))
    leg_lin.append(_p[0])
    leg_lab.append(f"Bucket of {2**i}")
    p.append(_p)

  plt.ylabel('Scores')
  plt.title('Precision by model and bucket')
  plt.xticks(ind, models, rotation=90)
  plt.yticks(np.arange(0, 1.05, 0.05))
  plt.legend(tuple(leg_lin), tuple(leg_lab))
  
  plt.savefig(path + ".png", bbox_inches='tight')
  # plt.savefig(path + ".pdf")

  plt.close('all')

In [0]:
def plot_performance(results, metric, y_label, title):
  """ Plot Performance
  
  Plot a bar graph of the performance of some metric
  
  Arguments:
    metric: the name of the property of the metric
    y_label: the name of the label of the metric
    title: the title of the plot
  """
  
  path = f"{PATH}plots/performance/{title} Performance Bar"
  
  models = tuple(results.keys())
  y_pos = np.arange(len(models))
  performance = [v[metric] for v in results.values()]

  plt.rcdefaults()
  plt.bar(y_pos, performance, align='center', alpha=0.5)
  plt.xticks(y_pos, models, rotation=90)
  plt.ylabel(y_label)
  plt.title(title)

  plt.savefig(path + ".png", bbox_inches='tight')
  # plt.savefig(path + ".pdf")
    
  plt.close('all')

In [0]:
def plot_performance_improved(results, metric, y_label, title):
  """ Plot Performance Improved
  
  Plot a box graph of the performance of some metric
  
  Arguments:
    results: the struct that contain the results of the models
    metric: the name of the property of the metric
    y_label: the name of the label of the metric
    title: the title of the plot
  """
  
  path = f"{PATH}plots/performance/{title} Performance Boxes"
  
  fig, ax_plot = plt.subplots()
  
  ax_plot.set_title(title)
  ax_plot.set_xlabel(y_label)
  ax_plot.set_ylabel('Model')
  
  bplot = ax_plot.boxplot([v['raw'][metric] for v in results.values()], vert=False)
  ax_plot.set_yticklabels(list(results.keys()))
  
  plt.savefig(path + ".png", bbox_inches='tight')
  # plt.savefig(path + ".pdf")
    
  plt.close('all')

In [0]:
def plot_results_comparison(name, xlabel, xticks, metric):
  path = f"{PATH}plots/comparison/{name.lower().replace(' ', '_')}_{metric.lower()}"
  models = [*comparison_data[0]['results']]
  
  for model in models:
    datapoints = [result['results'][model][metric] for result in comparison_data]
    plt.plot(datapoints) 

  plt.title(name)
  plt.ylabel(metric)
  plt.xlabel(xlabel)
  plt.xticks(np.arange(len(xticks)), xticks)
  plt.legend(models, loc='upper left')
  plt.rcdefaults()

  plt.savefig(path + ".png", bbox_inches='tight')
  # plt.savefig(path + ".pdf")
    
  plt.close('all')

In [0]:
def plot_snapshot(results):
  # plot_precision_bucket(results)
  # plot_performance(results, 'TIME', 'Seconds', 'Training Time Comparison')
  plot_performance_improved(results, 'TIME', 'Seconds', 'Training Time Comparison')
  # plot_performance(results, 'RMSE', 'RMSE', 'Root Mean Square Error Comparison')
  plot_performance_improved(results, 'RMSE', 'RMSE', 'Root Mean Square Error Comparison')
  # plot_performance(results, 'NRMSE', 'NRMSE', 'Normalized Root Mean Square Error Comparison')
  # plot_performance_improved(results, 'NRMSE', 'NRMSE', 'Normalized Root Mean Square Error Comparison')
  # plot_performance(results, 'MAE', 'MAE', 'Max Absolute Error Comparison')
  plot_performance_improved(results, 'MAE', 'MAE', 'Max Absolute Error Comparison')
  # plot_performance(results, 'HR', 'Percentage', 'Hit Ratio Comparison')
  plot_performance_improved(results, 'HR', 'Percentage', 'Hit Ratio Comparison')

  for name in results:
    raw = results[name]['raw']

    plot_prediction(raw['expected'], raw['observed'], name)
    
    if 'history' in raw:
      plot_history(raw['history'], name)

In [0]:
def print_json (obj):
  print(json.dumps(obj, sort_keys=True, indent=4))

In [0]:
name_time = 1573342241

with open(f"{PATH}results/comparison/flow_interval_comparison_{name_time}.json", 'r') as json_file:
  comparison_data = json.load(json_file)

  for result_data in comparison_data:
    plot_snapshot(result_data['results'])

  plot_name = 'Flow Interval for Training Comparison'
  plot_y_label = 'Flow Size in Seconds'
  values = [r['meta']['FLOW_INTERVAL'] for r in comparison_data]

  # plot_results_comparison(plot_name, plot_y_label, values, 'NRMSE')
  plot_results_comparison(plot_name, plot_y_label, values, 'RMSE')
  plot_results_comparison(plot_name, plot_y_label, values, 'MAE')
  plot_results_comparison(plot_name, plot_y_label, values, 'HR')
  plot_results_comparison(plot_name, plot_y_label, values, 'TIME')

In [0]:
name_time = 1573342172

with open(f"{PATH}results/comparison/n_split_comparison_{name_time}.json", 'r') as json_file:
  comparison_data = json.load(json_file)

  for result_data in comparison_data:
    plot_snapshot(result_data['results'])

  plot_name = 'Number of Splits for Training Comparison'
  plot_y_label = 'Number of Splits'
  values = [r['meta']['N_SPLITS'] for r in comparison_data]

  # plot_results_comparison(plot_name, plot_y_label, values, 'NRMSE')
  plot_results_comparison(plot_name, plot_y_label, values, 'RMSE')
  plot_results_comparison(plot_name, plot_y_label, values, 'MAE')
  plot_results_comparison(plot_name, plot_y_label, values, 'HR')
  plot_results_comparison(plot_name, plot_y_label, values, 'TIME')

In [0]:
name_time = 1573342238

with open(f"{PATH}results/comparison/predict_future_comparison_{name_time}.json", 'r') as json_file:
  comparison_data = json.load(json_file)

  for result_data in comparison_data:
    plot_snapshot(result_data['results'])

  plot_name = 'Predict Future for Training Comparison'
  plot_y_label = 'Time in the Future in Minutes'
  values = [r['meta']['PREDICT_IN_FUTURE'] for r in comparison_data]

  # plot_results_comparison(plot_name, plot_y_label, values, 'NRMSE')
  plot_results_comparison(plot_name, plot_y_label, values, 'RMSE')
  plot_results_comparison(plot_name, plot_y_label, values, 'MAE')
  plot_results_comparison(plot_name, plot_y_label, values, 'HR')
  plot_results_comparison(plot_name, plot_y_label, values, 'TIME')

In [0]:
name_time = 1573342180

with open(f"{PATH}results/comparison/seeable_past_comparison_{name_time}.json", 'r') as json_file:
  comparison_data = json.load(json_file)

  for result_data in comparison_data:
    plot_snapshot(result_data['results'])

  plot_name = 'Seeable Past for Training Comparison'
  plot_y_label = 'Seeable Past in Seconds'
  values = [r['meta']['SEEABLE_PAST'] for r in comparison_data]

  # plot_results_comparison(plot_name, plot_y_label, values, 'NRMSE')
  plot_results_comparison(plot_name, plot_y_label, values, 'RMSE')
  plot_results_comparison(plot_name, plot_y_label, values, 'MAE')
  plot_results_comparison(plot_name, plot_y_label, values, 'HR')
  plot_results_comparison(plot_name, plot_y_label, values, 'TIME')