# Visualyzing Marconi100's `mem_free` job profiles

## Read Data

Here i visualize only the single-node jobs `/plugin=job_table/metric=job_info_marconi100/metric=mem_free_filter123_singlenode.csv`. We need to modify the plotting script to vizualize for multiple nodes (same path as but with a `multinode` suffix)

IMPORTANT: To make it easier to understand the plots. I'm converting the `mem_free` to its corresponding consumed memory values (i.e., 256GB - `mem_free`). This way is easier to spot measurement errors (i.e., negative values)
Some jobs have "nice" curves but with negative values

In [1]:
import pandas as pd

GIGABYTES_ONE_KILOBYTE = 9.5367431640625e-7

M100_MAX_MEM_GB = 256 

df_jobs_viz = pd.read_csv("../example_data/plugin=job_table/metric=job_info_marconi100/metric=mem_free_filter123_singlenode.csv")

df_metric = pd.read_parquet("../example_data/plugin=ganglia_pub/metric=mem_free/a_0.parquet")
df_metric['node'] = pd.to_numeric(df_metric['node'])
df_metric['value'] = pd.to_numeric(df_metric['value'])

#Converting KB to GB
df_metric['value'] = df_metric['value'] * GIGABYTES_ONE_KILOBYTE

# Comment the line below if you want to visualize the mem_free and not the consumed memory
df_metric['value'] = M100_MAX_MEM_GB - df_metric['value']
df_metric['value'].describe()

count    5.338966e+07
mean     3.730951e+01
std      3.801913e+01
min     -5.257635e+01
25%      1.047766e+01
50%      3.227509e+01
75%      6.160571e+01
max      2.556728e+02
Name: value, dtype: float64

## Plotting the `max_memory` profile for each job (interactive)

In [11]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import seaborn as sns
import matplotlib.pyplot as plt
import ast

#gfig = None

def call_workflow(index):
    get_job_energy_profile(index)

def get_job_energy_profile(index):
    global current_job_id, current_hostname
    annot_str=''
    job=df_jobs_viz.iloc[index,:]
    nodes=ast.literal_eval(job['nodes'])
    start_time=job["start_time"]
    end_time=job["end_time"]

    current_job_id=str(job['job_id'])
    for node in nodes:
        print(node)
        df_node = df_metric.loc[df_metric['node'] == node]
        df_node_job = df_node.loc[df_node['timestamp'].between(start_time, end_time)]
        if len(df_node_job) > 0:
            print(df_node_job.describe(), end="\r")
            plot_energy_profile(df_node_job, annot_str)
    print(nodes)
     
    #plot_energy_profile(job_energy_profile, annot_str)    
    #current_hostname=energy_host
    ##
    #describe=job_energy_profile.describe(percentiles=[.10, .25, .5, .75, .90])
    #describe['job_id']=job['job_id']
    #describe['socket']=socket
    #describe['pp0']=describe[right_pp0]
    #describe['DRAM']=describe[right_DRAM]
    #describe['stat']=describe.index
    #describe=describe.reset_index(drop=True)[arr_cols]  
    #print(describe)  


def plot_energy_profile(profile, annot_str):
    TINY_SIZE = 2
    SMALL_SIZE = 5
    MEDIUM_SIZE = 25
    BIGGER_SIZE = 50
    FIG_WIDTH = 40
    FIG_HEIGHT = 10

    global gfig  

    plt.rc('font', size=MEDIUM_SIZE)          # controls default text sizes
    plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
    plt.rc('axes', labelsize=MEDIUM_SIZE)     # fontsize of the x and y labels
    plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
    plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
    plt.rc('figure', titlesize=MEDIUM_SIZE)  # fontsize of the figure title
    scatterplot_kwargs={'s': 50, 'palette': 'plasma'}
    lineplot_kwargs={'linewidth': 1}

    plt.clf()
    fig = plt.figure(figsize=(FIG_WIDTH,FIG_HEIGHT))
    #ax = sns.boxplot(x='stat', y='value', data=plot_data, showfliers=False, hue='reading',
    #             linewidth=TINY_SIZE)

    #ax = sns.scatterplot(data=profile, x='timestamp', y='value', **scatterplot_kwargs)
    ax = sns.lineplot(data=profile, x='timestamp', y='value', **lineplot_kwargs)

    ## SET BORDERS SIZE AND WIDTH
    [line.set_linewidth(TINY_SIZE) for line in ax.spines.values()]
    [line.set_markersize(TINY_SIZE) for line in ax.yaxis.get_ticklines()]
    [line.set_markeredgewidth(TINY_SIZE) for line in ax.yaxis.get_ticklines()]
    [line.set_markersize(SMALL_SIZE) for line in ax.xaxis.get_ticklines()]
    [line.set_markeredgewidth(TINY_SIZE) for line in ax.xaxis.get_ticklines()]
    #ax.text(x=0.1,y=0.5,
    #        s=annot_str,
    #        fontdict=dict(color='red',size=MEDIUM_SIZE),
    #        bbox=dict(facecolor='yellow',alpha=0.5),
    #        horizontalalignment='left',
    #        verticalalignment='center',
    #        transform=ax.transAxes)
    ax.set_ylabel('Node used memory (GB)')
    ax.set_xlabel('Timestamp')
    ax.set_title('Job ID: '+current_job_id)
    gfig = fig

button = widgets.Button(description="Save to PDF")
output = widgets.Output()

display(button, output)

## TODO: Pass on b the data (job_id, hostname, etc)
def on_button_clicked(b):
    with output:
        #fig=plt.gcf()
        #print(gfig)
        fig_filename='../figures/marconi100_interact_plot_mem_free_profile_'+current_job_id+'.pdf'
        gfig.savefig(fig_filename, format='pdf', dpi=300, bbox_inches='tight')
        print('Plot saved as '+fig_filename)

button.on_click(on_button_clicked)

interact(call_workflow, index=widgets.IntSlider(min=0, max=len(df_jobs_viz)-1, step=1, value=0));

Button(description='Save to PDF', style=ButtonStyle())

Output()

interactive(children=(IntSlider(value=0, description='index', max=350), Output()), _dom_classes=('widget-inter…