In [1]:
import numpy as np
import pandas as pd
import pathlib
import sys

In [2]:
def df_from_csv(csv_file):
    # Read csv with the following name of columns
    df = pd.read_csv(csv_file, header=None, sep=":", names=["CPU", "Value", "Unit", "Event Name"])

    # Define the number of epochs and number of measures
    num_epochs = 30
    num_measures = 5

    # Get the events measured
    events = df["Event Name"].unique()

    # Split the Dataframe in num_measures
    arrs = np.array_split(df, num_measures)

    # Store that mean columns in a new Dataframe
    data = []
    headers = []
    i = 0
    # Calculate the mean of each iteration
    for arr in arrs:
        arr = arr.reset_index(drop=True)
        arr['Avg'] = arr.groupby('Event Name')['Value'].transform('sum')
        i = i + 1
        headers.append("Measure_" + str(i))
        data.append(arr['Avg'].head(len(events)))

    # Creates a new df from the avg of the iters
    df = pd.concat(data, axis=1, keys=headers)

    # Add the events names
    df.insert(0, 'Event Name', events)

    # Creates a new avg column from all the measures
    df['Avg'] = df.mean(axis=1)
    return df

In [3]:
# Read the csv and create excel file from it
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/mnist_train_each_batch_CUIDADO.csv"
df = df_from_csv(csv_file)

In [4]:
df

Unnamed: 0,Event Name,Measure_1,Measure_2,Measure_3,Measure_4,Measure_5,Avg
0,PERF_COUNT_HW_INSTRUCTIONS,196028646105,194718272452,194861488225,194880997876,194796944735,195057300000.0
1,PERF_COUNT_HW_BRANCH_INSTRUCTIONS,16899903233,16628859364,16655877660,16664408861,16648518729,16699510000.0
2,L1-DCACHE-LOADS,67046626626,66662987066,66719640026,66715521981,66703393050,66769630000.0
3,L1-DCACHE-STORES,14487138800,14271988592,14320188067,14307957565,14314735170,14340400000.0
4,fp_arith_inst_retired.scalar_double,247821163,247611302,247809540,247815120,247793334,247770100.0
5,fp_arith_inst_retired.scalar_single,155507746,155504693,155507693,155504693,155504693,155505900.0
6,fp_arith_inst_retired.128b_packed_single,15135996,15135704,15135996,15135704,15135704,15135820.0
7,fp_arith_inst_retired.256b_packed_single,3421158,3421092,3421158,3421092,3421092,3421118.0
8,fp_arith_inst_retired.512b_packed_single,123342950370,123340570870,123342950370,123340570870,123340570870,123341500000.0


In [3]:
df

Unnamed: 0,CPU,Value,Unit,Event Name
0,2,1121200835,,PERF_COUNT_HW_INSTRUCTIONS
1,2,231256906,,PERF_COUNT_HW_BRANCH_INSTRUCTIONS
2,2,323762220,,L1-DCACHE-LOADS
3,2,173377936,,L1-DCACHE-STORES
4,2,11887,,fp_arith_inst_retired.scalar_double
...,...,...,...,...
69987235,31,0,,fp_arith_inst_retired.scalar_double
69987236,31,0,,fp_arith_inst_retired.scalar_single
69987237,31,0,,fp_arith_inst_retired.128b_packed_single
69987238,31,0,,fp_arith_inst_retired.256b_packed_single


In [5]:
df

Unnamed: 0,CPU,Value,Unit,Event Name
0,2,1121200835,,PERF_COUNT_HW_INSTRUCTIONS
1,2,231256906,,PERF_COUNT_HW_BRANCH_INSTRUCTIONS
2,2,323762220,,L1-DCACHE-LOADS
3,2,173377936,,L1-DCACHE-STORES
4,2,11887,,fp_arith_inst_retired.scalar_double
...,...,...,...,...
69619495,31,0,,fp_arith_inst_retired.scalar_double
69619496,31,0,,fp_arith_inst_retired.scalar_single
69619497,31,0,,fp_arith_inst_retired.128b_packed_single
69619498,31,0,,fp_arith_inst_retired.256b_packed_single


In [None]:
# Absolute path to this file
MY_PAPI_DIR = pathlib.Path().absolute()
# Now, we have to move to the root of this workspace ([prev. path]/TFG)
MY_PAPI_DIR = MY_PAPI_DIR.parent.parent.parent.parent.absolute()
# From the root (TFG/) access to my_papi dir. and its content
MY_PAPI_DIR = MY_PAPI_DIR / "my_papi"
# Folder where the configuration files are located
CFG_DIR = MY_PAPI_DIR / "conf"
# Folder where the library is located
LIB_DIR = MY_PAPI_DIR / "lib"
# Folder where the source codes are located
SRC_DIR = MY_PAPI_DIR / "src"

# Add the source path and import the library
sys.path.insert(0, str(SRC_DIR))
from MyPapi import *

In [None]:
def plot_each_iter(csv_file):

    # Read csv with the following name of columns
    df = pd.read_csv(csv_file, header=None, sep=":",
                        names=["CPU", "Value", "Unit", "Event Name"])

    # Get the events and cpus measured
    events = df["Event Name"].unique()
    cpus = df["CPU"].unique()

    # Also the number of iterations (batch_size, epoch, etc)
    num_measures = int(len(df.index) / (len(events) * len(cpus)))

    # Creates a column with the number of iteration
    df.insert(0, "# Measure", 0)

    # We have to modify them depending on the number of measures and cpus
    aux = 0
    for i in range(num_measures, 0, -1):
        aux += 1
        df.loc[df.index[-i * len(events) * len(cpus):], "# Measure"] = aux

    # "Rotate" the table
    df = df.pivot_table(index=["# Measure", "CPU"], columns=[
        "Event Name"], values=["Value"]).fillna(0)

    # Drop the first multiindex
    df.columns = df.columns.droplevel()

    # Add columns with rates (IPC, acc., etc.)
    df = MyPapi.get_rates_from_df(df)

    # Remove name of columns
    df.columns.name = None
    # Reset the index to an auto-increment
    df = df.reset_index()

    # No need of CPU column
    df = df.drop(["CPU"], axis=1)

    # Get the list of measures
    measures = df["# Measure"].unique()
    events = df.columns

    # Array with 'num_measures' dicts as entries
    arr_measures = [{} for _ in measures]

    # Store the data
    for i in range(0, len(measures)):
        for e in events:
            dict_aux = arr_measures[i]
            dict_aux[e] = df.loc[df["# Measure"] == measures[i], e].mean()

    # Convert to pandas Dataframe
    df = pd.DataFrame(arr_measures)
    # Set the # Measure column as index
    df = df.set_index("# Measure")

    import plotly.graph_objects as go
    from plotly.subplots import make_subplots

    # Array with the figures/html files to create
    figs = [go.Figure(),
            make_subplots(
                rows=1, cols=2,
                specs=[[{"secondary_y": False}, {"secondary_y": False}]],
                subplot_titles=("Rates", "Derived metrics"))
            ]

    # Add the value to the graphs
    for col in df.columns:
        if col not in events_dict.keys():
            figs[0].add_trace(
                go.Scatter(x=df.index.values.tolist(), y=df[col], name=col)
            )
        else:
            if "rate" in col:
                figs[1].add_trace(
                    go.Scatter(x=df.index.values.tolist(),
                                y=df[col] * 100, name=col),
                    row=1, col=1, secondary_y=False
                )
            else:
                figs[1].add_trace(
                    go.Scatter(x=df.index.values.tolist(),
                                y=df[col], name=col),
                    row=1, col=2, secondary_y=False
                )
    # Change the name of the y axis
    figs[0].update_yaxes(title_text="Value")
    figs[1].update_yaxes(title_text="Miss rate (%)", row=1, col=1)
    figs[1].update_yaxes(title_text="Value", row=1, col=2)
            
    # Set options common to all traces with fig.update_traces
    for fig in figs:
        fig.update_traces(mode='lines+markers',
                            marker_line_width=2, marker_size=8)
        fig.update_layout(
            title='MyPaPi measure by iterations: ' + csv_file,
            # yaxis_zeroline=False, xaxis_zeroline=False,
            hovermode="x unified",
            legend=dict(
                # x=-1,
                # y=-1,
                traceorder="normal",
                font=dict(family="sans-serif",
                            size=12,
                            color="black"),
                bgcolor="white",
                bordercolor="Black",
                borderwidth=2
            )
        )
        fig.update_xaxes(range=[0, len(measures) + 1],
                            title_text="Number of measure")
        # ! Open the files when end the execution
        #fig.show()

    # Save the html files
    name_html = csv_file + "_1_.html"
    figs[0].write_html(name_html)

In [None]:
from jupyter_dash import JupyterDash

import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
from dash.dependencies import Input, Output
import plotly.express as px


external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

# Create server variable with Flask server object for use with gunicorn
# server = app.server

# df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv')
# available_indicators = df['Indicator Name'].unique()


df = px.data.iris()
all_dims = ['sepal_length', 'sepal_width', 
            'petal_length', 'petal_width']

# app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id="dropdown",
        options=[{"label": x, "value": x} 
                 for x in all_dims],
        value=all_dims[:2],
        multi=True
    ),
    dcc.Graph(id="splom"),
])

@app.callback(
    Output("splom", "figure"), 
    [Input("dropdown", "value")])
def update_bar_chart(dims):
    fig = px.scatter_matrix(
        df, dimensions=dims, color="species")
    return fig


app.run_server(mode="jupyterlab")

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/single_thread/mnist_train_each_epoch.csv"
df = df_from_csv_for_plot(csv_file)

#Declaración del estilo
plt.style.use("bmh")
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Tahoma'
plt.rcParams['xtick.color']='#A8A8A9'
plt.rcParams['ytick.color']='#A8A8A9'
plt.rcParams['figure.figsize'] = [10, 5]


x_values = df['# Iter.']
y_values = df['fp_arith_inst_retired.512b_packed_single'].tolist()

plt.plot(x_values, y_values, marker='o') # color='#a12424'

plt.title('Evol. event per epoch')
plt.xlabel('Epoch')
plt.ylabel('Event')
plt.legend(['fp_arith_inst_retired.512b_packed_single'], loc=3)
plt.ylim(ymin=0)

plt.show()

# fig.savefig('test.jpg')