In [1]:
import os
import glob
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.io as pio

In [None]:
'''
This data is already preprocessed, to show what these visualizations can do. 
This will just include the parts of code generating the visualizations with processed data.
'''

In [2]:
df_lolli = pd.read_excel("lollipop_test_data.xlsx", index_col=0)

In [3]:
pio.renderers.default = 'browser'

## Creating Lollipop Viz
def create_lollipop(df_lolli):
    # Offset the line length by the marker size to avoid overlapping
    marker_offset = 0.04

    def offset_signal(signal, marker_offset):
        if abs(signal) <= marker_offset:
            return 0
        return signal - marker_offset if signal > 0 else signal + marker_offset

    ## For the size of the lollis
    size=[]
    for count in df_lolli['normalized_runtimes']:
        ## Change these numbers
        if abs(count)<=20:
            size.append(10)
        elif 20<abs(count)<=40:
            size.append(15)
        elif 40<abs(count)<=60:
            size.append(20)
        elif 60<abs(count)<=80:
            size.append(25)
        elif 80<abs(count)<=100:
            size.append(30)


    color_scale = "magma"

    # # Create a dictionary mapping each unique function to a color
    hashed_func_names_list_unique = df_lolli['hashed_func_names_list'].unique()
    colors = ['hsl(' + str(hash(func) % 360) + ',50%' + ',50%)' for func in hashed_func_names_list_unique]
    func_color_mapping = {func: color for func, color in zip(hashed_func_names_list_unique, colors)}

    # https://plotly.com/python/line-and-scatter/
    data = [
        go.Scatter(
            x=df_lolli['call_stack_count'],
            y=df_lolli['func_call_stack_id'],

            mode='markers',
            marker=dict(
                color=[func_color_mapping[func] for func in df_lolli['hashed_func_names_list']],
    #             colorscale = color_scale,
                size=size,
                sizemode='diameter',
    #             sizeref=max_marker_size / 50,  # Adjust the marker size scaling
            ),
            text='Hashed Func Names: ' + df_lolli['hashed_func_names_list'].astype(str) + '<br>' +
                 'Hashed Call Stack: ' + df_lolli['hashed_call_stack_list'].astype(str) + '<br>' +
                 'File #: ' + df_lolli['file_number'].astype(str) + '<br>' +
    #              'Severity Bin: ' + df_lolli['sev_bin_upper_bound'].astype(str) + '<br>' +
                 'Call Stack Count: ' + df_lolli['call_stack_count'].astype(str) + '<br>' +
                 'Function Call Stack ID: ' + df_lolli['func_call_stack_id'].astype(str) + '<br>' +
                 'Runtime: ' + df_lolli['exclusive_runtimes_list'].astype(str) + '<br>' +
                 'Normalized Runtime: ' + df_lolli['normalized_runtimes'].astype(str),
            hovertemplate='%{text}',
        )
    ]

    # Adjust the x-values to be positive or negative based on a threshold
    threshold = 0.5
    adjusted_y = [offset_signal(x, marker_offset) if abs(x) > threshold else 0 for x in df_lolli['func_call_stack_id']]


    shapes = []
    for i in range(len(df_lolli['exclusive_runtimes_list'])):
        if df_lolli['call_stack_count'][i] <= 0:
            shape = dict(
                type='line',
                xref='x',
                yref='y',
                x0=0,  # Starting point of the line (x-coordinate)
                y0=df_lolli['func_call_stack_id'][i],  # Starting point of the line (y-coordinate)
                x1=df_lolli['call_stack_count'][i],  # Ending point of the line (x-coordinate)
                y1=df_lolli['func_call_stack_id'][i],  # Ending point of the line (y-coordinate)
                line=dict(
                    color='black',
                    width=2
                )
            )
            shapes.append(shape)
        if adjusted_y[i] != 0:
            shape = dict(
                type='line',
                xref='x',
                yref='y',
                x0=0,  # Starting point of the line (x-coordinate)
                y0=df_lolli['func_call_stack_id'][i],  # Starting point of the line (y-coordinate)
                x1=df_lolli['call_stack_count'][i],  # Ending point of the line (x-coordinate)
                y1=df_lolli['func_call_stack_id'][i],  # Ending point of the line (y-coordinate)
                line=dict(
                    color='black',
                    width=2
                )
            )
            shapes.append(shape)


    plot_size=1000

    layout = go.Layout(
        shapes=shapes,
        width=plot_size,
        height=plot_size,
    )

    fig = go.Figure(data, layout)

    fig.add_shape(
        type='line',
        x0=0,
        y0=0,
        x1=0,
        y1=18,
        line=dict(color='black', width=2))

    pio.show(fig)

In [4]:
create_lollipop(df_lolli)

In [5]:
df_pars = pd.read_excel("par_coors_test_data.xlsx", index_col=0)

In [6]:
pio.renderers.default = 'browser'

def create_parcoords_plot(df, list_of_param, updated_labels):
    dimensions = []
    for i, par in enumerate(list_of_param):
        dimension = dict(label=updated_labels[i], values=df[par])
        dimensions.append(dimension)

    color = df['exclusive_runtimes_list']
    max_min = df['exclusive_runtimes_list']

    fig = go.Figure(data=go.Parcoords(
        line=dict(color=color,
                  colorscale='Bluered',
                  showscale=True,
                  cmax=max_min.max(),
                  cmin=max_min.min()),
        dimensions=dimensions
    ))

    # Set font size for the tick labels
    fig.update_layout(font=dict(size=32))
    tick_font_size = 32
    fig.update_traces(tickfont_size=tick_font_size, selector=dict(type='parcoords'))
    
    fig.update_layout(height=7*100)
    
#     fig.show()
    pio.show(fig)

# Data and parameters
list_of_param = [
    "entry_time_list",
    "resident set size (pages)",
    "program size (pages)",
    "meminfo:MemFree (MB)",
    "exclusive_runtimes_list",
]

updated_labels = [
    "Entry Time",
    "Resident Set Size",
    "Program Size",
    "Free Memory (MB)",
    "Runtime",
]

In [7]:
create_parcoords_plot(df_pars, list_of_param, updated_labels)