# Deephyper analytics - multi study
**path to data file**: {{path_to_data_file}}

for customization please see: https://matplotlib.org/api/matplotlib_configuration_api.html

## Setup & Data loading

In [None]:
path_to_data_file = {{path_to_data_file}}
labels = {{labels}}

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import json
from pprint import pprint
from datetime import datetime 

width = 21
height = width/1.618

matplotlib.rcParams.update({
    'font.size': 21,
    'figure.figsize': (width, height), 
    'figure.facecolor': 'white', 
    'savefig.dpi': 72, 
    'figure.subplot.bottom': 0.125, 
    'figure.edgecolor': 'white',
    'xtick.labelsize': 21,
    'ytick.labelsize': 21})

def to_sec(ts):
    try:
        return datetime.strptime(ts, '%Y-%m-%d %H:%M:%S').timestamp()
    except:
        return datetime.strptime(ts, '%Y-%m-%d %H:%M:%S.%f').timestamp()

def load_json(path):
    with open(path, 'r') as f:
        data = json.load(f)
    return data

data_list = [load_json(p) for p in path_to_data_file]

for label, data in zip(labels, data_list):
    print(f'file: {label} has keys: {list(data.keys())}')


start_infos_list = []
for data in data_list:
    try:
        start_infos = data['start_infos'][0]
    except:
        start_infos = None
    start_infos_list.append(start_infos)
for label, start_infos in zip(labels, start_infos_list):
    print(f' - file:{label}')
    pprint(start_infos)
    
if None in start_infos_list:
    print("Some of the experiments doesn't have 'start_infos'.")

## Search trajectory
### Raw rewards

In [None]:
def moving_average(data_list, window_size=100):
    res_list = list()
    for i in range(len(data_list) - window_size):
            res_list.append(sum(data_list[i:i+window_size])/window_size)
    return res_list

window_size = 100

for label, data in zip(labels, data_list):

    plt.plot(moving_average(data['raw_rewards'], window_size), label=label)

plt.ylabel('Reward')
plt.xlabel('Evaluations')
plt.xlim(left=0)
plt.grid()
plt.legend()
plt.show()

### Average reward per batch

In [None]:

for label, data in zip(labels, data_list):
    try:
        avr_reward_mbatches = [np.mean(stats['rewards']) for stats in data['env_stats']]
        start_time = to_sec(data['workload']['times'][0])
        times = [to_sec(stats['timestamp'])-start_time for stats in data['env_stats']]

        plt.plot(times, avr_reward_mbatches, label=label)
    except:
        print(f'file:{label} skipped.')

plt.ylabel('Reward')
plt.xlabel('Time (s.)')
plt.xlim(left=0)
plt.grid()
plt.legend()
plt.show()

## Workload
### Profiles

In [None]:
for label, data in zip(labels, data_list):
    start_time = to_sec(data['workload']['times'][0])
    times = [to_sec(t)-start_time for t in data['workload']['times']]
    num_running = np.array(data['workload']['num_running']) - 1

    plt.step(times, num_running, where='post', label=label)

plt.xlabel('Time (s)')
plt.ylabel('Node utilization')
plt.xlim(left=0)
plt.ylim(bottom=0)
plt.grid()
plt.legend()
plt.show()

### Pie charts

In [None]:
def integrate_square(x, y, offset=0):
    res = 0
    for i in range(len(x)-1):
        res += (x[i+1] - x[i]) * y[i]
    return res

for label, data, start_infos in zip(labels, data_list, start_infos_list):
    if start_infos is None:
        print(f'file:{label} skipped because start_infos is not present.')
    else:
        start_time = to_sec(data['workload']['times'][0])
        times = [to_sec(t)-start_time for t in data['workload']['times']]
        num_running = np.array(data['workload']['num_running']) - 1
        nworkers = start_infos['nworkers'] - start_infos['nagents']
        total_available = nworkers * times[-1]
        used_time = integrate_square(times, num_running)
        unused_time = total_available - used_time
        perc_used_time = int(used_time / total_available * 100)
        perc_unused_time = int(unused_time / total_available * 100)
        label_used_time = f'{perc_used_time}% used'
        label_unused_time = f'{perc_unused_time}% unused'
        plt.title(label)
        lpie = plt.pie([used_time, unused_time],
                       labels=[label_used_time, label_unused_time],
                       colors=['green', 'red'])
        plt.show()

## Minibatches
### Timing of minibatches evaluation

In [None]:
window_size = 1

for label, data in zip(labels, data_list):
    try:
        start_time = to_sec(data['workload']['times'][0])
        nca_list = []
        times = []
        for stats in data['env_stats']:
            rank = stats['rank']
            nca = stats['batch_computation']
            time = to_sec(stats['timestamp']) - start_time
            nca_list.append(nca)
            times.append(time)
        
        nca_list = moving_average(nca_list, window_size)
        times = moving_average(times, window_size)
        plt.plot(times, nca_list, label=label)
    except:
        print(f'file:{label} skipped.')
        
plt.ylabel('Batch Computation Time')
plt.xlabel('Time (s.)')
plt.xlim(left=0)
plt.ylim(bottom=0)
plt.grid()
plt.legend()
plt.show()

## Cache
### Number of evaluation cached
It shows the number of cache accesses used for the computation of a given batch at time T.

In [None]:
window_size = 21

for label, data in zip(labels, data_list):
    try:
        start_time = to_sec(data['workload']['times'][0])
        nca_list = []
        times = []
        last_nca_rank = {}
        for stats in data['env_stats']:
            rank = stats['rank']
            nca = stats['num_cache_used']
            time = to_sec(stats['timestamp']) - start_time
            last_nca = last_nca_rank.get(rank)
            if last_nca_rank.get(rank) is None:
                last_nca = 0
            nca_list.append(nca-last_nca)
            times.append(time)
            last_nca_rank[rank] = nca
        
        nca_list = moving_average(nca_list, window_size)
        times = moving_average(times, window_size)
        plt.plot(times, nca_list, label=label)
    except:
        print(f'file:{label} skipped.')
        
plt.ylabel('Cache accesses')
plt.xlabel('Time (s.)')
plt.xlim(left=0)
plt.ylim(bottom=0)
plt.grid()
plt.legend()
plt.show()