In [1]:
import numpy as np
import pandas as pd
import pathlib
import sys

# Absolute path to this file
MY_PAPI_DIR = pathlib.Path().absolute()
# Now, we have to move to the root of this workspace ([prev. path]/TFG)
MY_PAPI_DIR = MY_PAPI_DIR.parent.parent.parent.parent.absolute()
# From the root (TFG/) access to my_papi dir. and its content
MY_PAPI_DIR = MY_PAPI_DIR / "my_papi"
# Folder where the configuration files are located
CFG_DIR = MY_PAPI_DIR / "conf"
# Folder where the library is located
LIB_DIR = MY_PAPI_DIR / "lib"
# Folder where the source codes are located
SRC_DIR = MY_PAPI_DIR / "src"

# Add the source path and import the library
sys.path.insert(0, str(SRC_DIR))
from MyPapi import *

In [76]:
# Time to print the values of the measure M1
#pd.set_option('display.width', 200)

# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/mnist_train_papi.csv"

# Read csv with the following name of columns
df1 = pd.read_csv(csv_file, header=None, sep=":", names=["CPU", "Value", "Unit", "Event Name"])

# Get the rows which have no 0 values
df1 = df1[df1.Value != 0]

# Calculate the average from the rows which have the same Event name
df1['Avg'] = df1.groupby('Event Name')['Value'].transform('mean')
df1

Unnamed: 0,CPU,Value,Unit,Event Name,Avg
0,2,193532612763,,PERF_COUNT_HW_INSTRUCTIONS,192683100000.0
1,2,16637978284,,PERF_COUNT_HW_BRANCH_INSTRUCTIONS,16523790000.0
2,2,67197413612,,L1-DCACHE-LOADS,66953470000.0
3,2,15638234031,,L1-DCACHE-STORES,15452560000.0
4,2,247316414,,fp_arith_inst_retired.scalar_double,247234600.0
5,2,154688753,,fp_arith_inst_retired.scalar_single,154688800.0
6,2,15056280,,fp_arith_inst_retired.128b_packed_single,15056280.0
7,2,3403140,,fp_arith_inst_retired.256b_packed_single,3403140.0
8,2,122693346870,,fp_arith_inst_retired.512b_packed_single,122693300000.0
270,2,191722933694,,PERF_COUNT_HW_INSTRUCTIONS,192683100000.0


In [77]:
# Time to print the values of the measure M1

# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/mnist_train_callback.csv"

# Read csv with the following name of columns
df2 = pd.read_csv(csv_file, header=None, sep=":", names=["CPU", "Value", "Unit", "Event Name"])

# Get the rows which have no 0 values
df2 = df2[df2.Value != 0]

# Calculate the average from the rows which have the same Event name
df2['Avg'] = df2.groupby('Event Name')['Value'].transform('mean')
df2

Unnamed: 0,CPU,Value,Unit,Event Name,Avg
0,2,193348191211,,PERF_COUNT_HW_INSTRUCTIONS,192924800000.0
1,2,16622251581,,PERF_COUNT_HW_BRANCH_INSTRUCTIONS,16563400000.0
2,2,67139940133,,L1-DCACHE-LOADS,67024250000.0
3,2,15615410143,,L1-DCACHE-STORES,15535690000.0
4,2,247318651,,fp_arith_inst_retired.scalar_double,247281200.0
5,2,154688753,,fp_arith_inst_retired.scalar_single,154688800.0
6,2,15056280,,fp_arith_inst_retired.128b_packed_single,15056280.0
7,2,3403140,,fp_arith_inst_retired.256b_packed_single,3403140.0
8,2,122693346870,,fp_arith_inst_retired.512b_packed_single,122693300000.0
270,2,191548125053,,PERF_COUNT_HW_INSTRUCTIONS,192924800000.0


In [75]:
# Time to print the values of the measure M2

# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/mnist_train_each_epoch.csv"

# Read csv with the following name of columns
df3 = pd.read_csv(csv_file, header=None, sep=":", names=["CPU", "Value", "Unit", "Event Name"])

# Get the rows which have no empty values
df3 = df3[df3.Value != 0]

# Define the number of epochs and number of measures
num_epochs = 30
num_measures = 5

# Get the events measured
events = df3["Event Name"].unique()

# Get the len of the new Dataframe
size = len(df3.index)

# Split the Dataframe in num_measures
arrs = np.array_split(df3, num_measures)

# Store that mean columns in a new Dataframe
data = []
headers = []
i = 0
# Calculate the mean of each iteration
for arr in arrs:
    arr = arr.reset_index(drop=True)
    arr['Avg'] = arr.groupby('Event Name')['Value'].transform('mean')
    i = i + 1
    headers.append("Measure_" + str(i))
    data.append(arr['Avg'].head(len(events)))

# Creates a new df from the avg of the iters
df3 = pd.concat(data, axis=1, keys=headers)

# Add the events names
df3.insert(0, 'Event Name', events)

# Creates a new avg column from all the measures
df3['Avg'] = df3.mean(axis=1)

df3

Unnamed: 0,Event Name,Measure_1,Measure_2,Measure_3,Measure_4,Measure_5,Avg
0,PERF_COUNT_HW_INSTRUCTIONS,6433955000.0,6431052000.0,6424974000.0,6429421000.0,6385306000.0,6420942000.0
1,PERF_COUNT_HW_BRANCH_INSTRUCTIONS,552338100.0,551215700.0,550134500.0,551028500.0,547268700.0,550397100.0
2,L1-DCACHE-LOADS,2235538000.0,2235970000.0,2233437000.0,2235186000.0,2220496000.0,2232125000.0
3,L1-DCACHE-STORES,519353200.0,519794000.0,518685200.0,519772400.0,505654200.0,516651800.0
4,fp_arith_inst_retired.scalar_double,8244126.0,8243732.0,8237161.0,8243882.0,8236990.0,8241178.0
5,fp_arith_inst_retired.scalar_single,5156290.0,5156290.0,5156290.0,5156290.0,5156290.0,5156290.0
6,fp_arith_inst_retired.128b_packed_single,501876.0,501876.0,501876.0,501876.0,501876.0,501876.0
7,fp_arith_inst_retired.256b_packed_single,113438.0,113438.0,113438.0,113438.0,113438.0,113438.0
8,fp_arith_inst_retired.512b_packed_single,4089778000.0,4089778000.0,4089778000.0,4089778000.0,4089778000.0,4089778000.0


In [29]:
# Join the two dataframes in another one
data = [df1["Avg"], df2["Avg"]]
headers = ["df1", "df2"]
df3 = pd.concat(data, axis=1, keys=headers)
df3['Value_diff'] = (df3['df1'] - df3['df2']).abs()
#df3

In [30]:
with pd.ExcelWriter('output.xlsx') as writer:
    df1.to_excel(writer, sheet_name='mnist_train_papi')
    df2.to_excel(writer, sheet_name='mnist_train_callback')
    df3.to_excel(writer, sheet_name='mnit_M1_comparation')

In [None]:
# Time to print the values of the measure M2

# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/mnist_train_each_batch.csv"

# Read csv with the following name of columns
df4 = pd.read_csv(csv_file, header=None, sep=":", names=["CPU", "Value", "Unit", "Event Name"])

# Get the rows which have no empty values
df4 = df4[df4.Value != 0]

# Define the number of epochs and number of measures
num_epochs = 30
num_measures = 5

# Get the events measured
events = df4["Event Name"].unique()

# Get the len of the new Dataframe
size = len(df4.index)

# Split the Dataframe in num_measures
arrs = np.array_split(df4, num_measures)

# Store that mean columns in a new Dataframe
data = []
headers = []
i = 0
# Calculate the mean of each iteration
for arr in arrs:
    arr = arr.reset_index(drop=True)
    arr['Avg'] = arr.groupby('Event Name')['Value'].transform('mean')
    i = i + 1
    headers.append("Measure_" + str(i))
    data.append(arr['Avg'].head(len(events)))

# Creates a new df from the avg of the iters
df5 = pd.concat(data, axis=1, keys=headers)

# Add the events names
df5.insert(0, 'Event Name', events)

# Creates a new avg column from all the measures
df5['Avg'] = df5.mean(axis=1)

df5