In [1]:
import os
import glob
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.io as pio

pio.renderers.default = 'browser'

In [2]:
'''
This data is already preprocessed, to show what these visualizations can do. 
This will just include the parts of code generating the visualizations with processed data.
'''

'\nThis data is already preprocessed, to show what these visualizations can do. \nThis will just include the parts of code generating the visualizations with processed data.\n'

## LollipopChart class ##

In [3]:
class LollipopChart:
    def __init__(self, df_lolli):
        self.df_lolli = df_lolli
        # Offset the line length by the marker size to avoid overlapping
        self._marker_offset = 0.04

    def offset_signal(self, signal):
        if abs(signal) <= self._marker_offset:
            return 0
        return signal - self._marker_offset if signal > 0 else signal + self._marker_offset

    def calculate_sizes(self, col):
        ## For the size of the lollis
        size = []
        for count in self.df_lolli[col]:
            if abs(count) <= 20:
                size.append(10)
            elif 20 < abs(count) <= 40:
                size.append(15)
            elif 40 < abs(count) <= 60:
                size.append(20)
            elif 60 < abs(count) <= 80:
                size.append(25)
            elif 80 < abs(count) <= 100:
                size.append(30)
        return size

    def create_color_mapping(self, color_col):
        # Create a dictionary mapping each unique function to a color
        unique_cols = self.df_lolli[color_col].unique()
        colors = ['hsl(' + str(hash(col) % 360) + ', 50%, 50%)' for col in unique_cols]
        return {col: color for col, color in zip(unique_cols, colors)}

    def create_data(self, x_col, y_col, color_col, size_col):
        # reference: https://plotly.com/python/line-and-scatter/
        sizes = self.calculate_sizes(size_col)
        func_color_mapping = self.create_color_mapping(color_col)
        
        return [
            go.Scatter(
                x=self.df_lolli[x_col],
                y=self.df_lolli[y_col],
                mode='markers',
                marker=dict(
                    color=[func_color_mapping[func] for func in self.df_lolli[color_col]],
                    size=sizes,
                    sizemode='diameter',
                ),
                text=self.create_hover_text(),
                hovertemplate='%{text}',
            )
        ]

    def create_shapes(self, adjusted_y, x_col, y_col):
        shapes = []
        for i in range(len(self.df_lolli[x_col])):
            shape = dict(
                    type='line',
                    xref='x',
                    yref='y',
                    x0=0,
                    y0=self.df_lolli[y_col][i],
                    x1=self.df_lolli[x_col][i],
                    y1=self.df_lolli[y_col][i],
                    line=dict(color='black', width=2)
                )
            shapes.append(shape)
        return shapes

    def create_hover_text(self):
        return (
            'Hashed Func Name: ' + self.df_lolli['hashed_func_name'].astype(str) + '<br>' +
            'Hashed Call Stack: ' + self.df_lolli['hashed_call_stack'].astype(str) + '<br>' +
            'File #: ' + self.df_lolli['file_number'].astype(str) + '<br>' +
            'Call Stack Count: ' + self.df_lolli['call_stack_count'].astype(str) + '<br>' +
            'Function Call Stack ID: ' + self.df_lolli['func_call_stack_id'].astype(str) + '<br>' +
            'Runtime: ' + self.df_lolli['exclusive_runtime'].astype(str) + '<br>' +
            'Normalized Runtime: ' + self.df_lolli['normalized_runtime'].astype(str)
        )

    def create_lollipop(self, x_col, y_col, color_col, size_col):
        data = self.create_data(x_col, y_col, color_col, size_col)

        # Adjust the x-values to be positive or negative based on a threshold
        threshold = 0.5
        adjusted_y = [self.offset_signal(x) if abs(x) > threshold else 0 for x in self.df_lolli[y_col]]

        shapes = self.create_shapes(adjusted_y, x_col, y_col)

        layout = go.Layout(
            shapes=shapes,
            width=1000,
            height=1000,
        )

        fig = go.Figure(data, layout)

        fig.add_shape(
            type='line',
            x0=0,
            y0=0,
            x1=0,
            y1=18,
            line=dict(color='black', width=2)
        )

        return fig

    def show(self, x_col, y_col, color_col, size_col):
        fig = self.create_lollipop(x_col, y_col, color_col, size_col)
        pio.show(fig)

## Parallel Coordinates Chart Class ##

In [4]:
class ParcoordsChart:
    def __init__(self, df_pars):
        self.df_pars = df_pars

    def create_parcoords_plot(self, list_of_param, updated_labels, color, max_min):
        dimensions = []
        for i, par in enumerate(list_of_param):
            dimension = dict(label=updated_labels[i], values=self.df_pars[par])
            dimensions.append(dimension)

        fig = go.Figure(data=go.Parcoords(
            line=dict(color=self.df_pars[color],
                      colorscale='Bluered',
                      showscale=True,
                      cmax=self.df_pars[max_min].max(),
                      cmin=self.df_pars[max_min].min()),
            dimensions=dimensions
        ))

        return fig

    def show(self, list_of_param, updated_labels, color, max_min, font_size=25, tick_font_size=18, height=700):
        fig = self.create_parcoords_plot(list_of_param, updated_labels, color, max_min)
        
        # Set font size for the tick labels
        fig.update_layout(font=dict(size=font_size))
        fig.update_traces(tickfont_size=tick_font_size, selector=dict(type='parcoords'))
        fig.update_layout(height=height)
        
        pio.show(fig)

## Example usage ##

In [7]:
# Data and parameters for lollipop
df_lolli = pd.read_excel("lollipop_test_data.xlsx", index_col=0)
lollipop_chart = LollipopChart(df_lolli)
lollipop_chart.show('call_stack_count', 'func_call_stack_id', 
                     'hashed_func_name', 'normalized_runtime')

In [8]:
# Data and parameters for parallel coordinates
df_pars = pd.read_excel("par_coors_test_data.xlsx", index_col=0)
list_of_param = [
    "entry_time_list",
    "resident set size (pages)",
    "program size (pages)",
    "meminfo:MemFree (MB)",
    "exclusive_runtimes_list",
]

updated_labels = [
    "Entry Time",
    "Resident Set Size",
    "Program Size",
    "Free Memory (MB)",
    "Runtime",
]

parc_chart = ParcoordsChart(df_pars)
parc_chart.show(list_of_param, updated_labels, 'exclusive_runtimes_list', 'exclusive_runtimes_list')