In [7]:
import os
import argparse
import sys
import time
import pandas as pd
import re
from pathlib import Path

pd.set_option('display.max_rows', None)

APOLLO_DATA_COLLECTION_DIR='/usr/WS2/bolet1/apolloDataCollection'
APOLLO_DATA_COLLECTION_DIR='/g/g15/bolet1/workspace/apolloDataCollection/finalData/quartz/static_runs_no_traces'


# Open the PA and VA csv files
VAdf = pd.read_csv(APOLLO_DATA_COLLECTION_DIR+'/static-ETE-XTimeData_VA.csv')
PAdf = pd.read_csv(APOLLO_DATA_COLLECTION_DIR+'/static-ETE-XTimeData_PA.csv')

print('Datasets loaded!')

# Join both datasets together into one
VAdf['type'] = 'VA'
PAdf['type'] = 'PA'

# Just one dataframe to worry about now
rawdf = pd.concat([VAdf, PAdf])

# for now, let's only focus on runs with policy 0
#rawdf = rawdf.loc[rawdf['policy'] == "Static,policy=0"]
# let's drop some programs we don't care about
rawdf = rawdf.loc[rawdf['progname'] != "rodinia_backprop"]
rawdf = rawdf.loc[rawdf['progname'] != "rodinia_nn"]
rawdf = rawdf.loc[rawdf['progname'] != "rodinia_nw"]

# Let's preprocess and remove runs that don't have the same counts
grouped = rawdf.groupby(['progname', 'probSize', 'policy', 'type'])

#print(grouped['type'].count())
counts = grouped.count().reset_index()
print(counts)

# Separate the PAs from the VAs, then subtract
vas = counts.loc[counts['type'] == 'VA']
pas = counts.loc[counts['type'] == 'PA']

# Now let's merge the datasets s.t. we enforce matching trial counts
# this df has all the overlapping cases with the same trial count
# from VA and PA
filtered = pd.merge(vas, pas, how='inner', on=['progname', 'probSize', 'policy', 'trialnum', 'eteXtime'])

targets = filtered[['progname', 'probSize', 'policy']]

# Now let's filter out the parent dataframe
df = pd.merge(targets, rawdf, how='inner', on=['progname', 'probSize', 'policy'])
df = df.sort_values(by=['progname', 'probSize', 'policy', 'trialnum', 'type'])
print(df.head())

#df = df.loc[df['trialnum'] > 3]

# Now that we've cleaned the dataset, lets calculate means
grouped = df.groupby(['progname', 'probSize', 'policy', 'type'])

# Compute avrg and stddev
means = grouped['eteXtime'].mean().reset_index()
stds = grouped['eteXtime'].std().reset_index()

summdf = means.copy()
summdf.drop('eteXtime', axis=1, inplace=True)

summdf['mean_etextime'] = means['eteXtime']
summdf['std_etextime'] = stds['eteXtime']

print(summdf)

vadf = summdf.loc[summdf['type'] == 'VA']
padf = summdf.loc[summdf['type'] == 'PA']

vadf = vadf.sort_values(by=['progname', 'probSize', 'policy'])
padf = padf.sort_values(by=['progname', 'probSize', 'policy'])

# Now let's take the diffs between PA and VA mean xtimes
timeDiffs = padf['mean_etextime'].to_numpy() - vadf['mean_etextime'].to_numpy()

# If any values are greater than 0, then PA was slower
padf['pa_minus_va'] = timeDiffs
padf['va_mean_ete_xtime'] = vadf['mean_etextime'].to_numpy()

# Now let's get percent xtime diff
padf['perc_xtime_diff'] = (timeDiffs * 100) / (vadf['mean_etextime'].to_numpy())

padf = padf.sort_values(by=['perc_xtime_diff'])






Datasets loaded!
               progname   probSize           policy type  trialnum  eteXtime
0                  comd  largeprob  Static,policy=0   PA        10        10
1                  comd  largeprob  Static,policy=0   VA        10        10
2                  comd  largeprob  Static,policy=1   PA        10        10
3                  comd  largeprob  Static,policy=1   VA        10        10
4                  comd  largeprob  Static,policy=2   PA         4         4
5                  comd  largeprob  Static,policy=2   VA         4         4
6                  comd    medprob  Static,policy=0   PA        10        10
7                  comd    medprob  Static,policy=0   VA        10        10
8                  comd    medprob  Static,policy=1   PA        10        10
9                  comd    medprob  Static,policy=1   VA        10        10
10                 comd    medprob  Static,policy=2   PA         4         4
11                 comd    medprob  Static,policy=2   VA   

In [15]:
#print(padf)

# now let's lookat the lulesh runs
#print(padf.loc[padf['progname'] == 'lulesh'])

# Let's look at the outlier runs too
print(padf.loc[(padf['perc_xtime_diff'] <= -4) | (padf['perc_xtime_diff'] >= 4)])

print('num cases sampled', len(padf))

               progname   probSize           policy type  mean_etextime  \
262         rodinia_lud    medprob  Static,policy=2   PA     462.105886   
268         rodinia_lud  smallprob  Static,policy=2   PA       5.432863   
290             xsbench  largeprob  Static,policy=1   PA       2.571700   
52               minife  smallprob  Static,policy=2   PA      26.340671   
30               lulesh  smallprob  Static,policy=0   PA     123.218228   
32               lulesh  smallprob  Static,policy=1   PA      70.973525   
288             xsbench  largeprob  Static,policy=0   PA       1.784100   
160              nas_mg  smallprob  Static,policy=2   PA       5.692500   
274  rodinia_pathfinder  largeprob  Static,policy=2   PA       1.605400   
24               lulesh    medprob  Static,policy=0   PA     154.404973   
20               lulesh  largeprob  Static,policy=1   PA     165.690923   
84               nas_cg  smallprob  Static,policy=0   PA       4.805000   
264         rodinia_lud  

In [13]:
# Now let's zoom in and see the samples for rodinia_lud since it has the highest
# xtime difference across VA and PA
name = 'rodinia_lud'
policy = 'Static,policy=0'
probsize = 'smallprob'
print(df.loc[(df['progname'] == name) & 
             (df['policy'] == policy) & 
             (df['probSize'] == probsize) &
             (df['type'] == 'PA')])

print(df.loc[(df['progname'] == name) & 
             (df['policy'] == policy) & 
             (df['probSize'] == probsize) &
             (df['type'] == 'VA')])

         progname   probSize           policy  trialnum  eteXtime type
2315  rodinia_lud  smallprob  Static,policy=0         0  0.321383   PA
2319  rodinia_lud  smallprob  Static,policy=0         1  0.303625   PA
2317  rodinia_lud  smallprob  Static,policy=0         2  0.300191   PA
2316  rodinia_lud  smallprob  Static,policy=0         3  0.279494   PA
2312  rodinia_lud  smallprob  Static,policy=0         4  0.275550   PA
2313  rodinia_lud  smallprob  Static,policy=0         5  0.283626   PA
2314  rodinia_lud  smallprob  Static,policy=0         6  0.312161   PA
2318  rodinia_lud  smallprob  Static,policy=0         7  0.301520   PA
2321  rodinia_lud  smallprob  Static,policy=0         8  0.282820   PA
2320  rodinia_lud  smallprob  Static,policy=0         9  0.311739   PA
         progname   probSize           policy  trialnum  eteXtime type
2309  rodinia_lud  smallprob  Static,policy=0         0  0.287601   VA
2310  rodinia_lud  smallprob  Static,policy=0         1  0.279699   VA
2311  

In [10]:
print(df.loc[(df['progname'] == 'quicksilver') & 
             (df['policy'] == 'Static,policy=0') & 
             (df['probSize'] == 'largeprob') &
             (df['type'] == 'PA')])

print(df.loc[(df['progname'] == 'quicksilver') & 
             (df['policy'] == 'Static,policy=0') & 
             (df['probSize'] == 'largeprob') &
             (df['type'] == 'VA')])

print(df.loc[(df['progname'] == 'quicksilver') & 
             (df['type'] == 'VA')])


         progname   probSize           policy  trialnum    eteXtime type
1640  quicksilver  largeprob  Static,policy=0         0  114.653583   PA
1641  quicksilver  largeprob  Static,policy=0         1  116.636351   PA
1642  quicksilver  largeprob  Static,policy=0         2  118.252588   PA
1643  quicksilver  largeprob  Static,policy=0         3  121.551012   PA
1644  quicksilver  largeprob  Static,policy=0         4  117.921782   PA
1645  quicksilver  largeprob  Static,policy=0         5  117.019064   PA
1647  quicksilver  largeprob  Static,policy=0         6  117.324534   PA
1646  quicksilver  largeprob  Static,policy=0         7  116.032967   PA
1648  quicksilver  largeprob  Static,policy=0         8  121.267572   PA
1649  quicksilver  largeprob  Static,policy=0         9  121.502470   PA
         progname   probSize           policy  trialnum    eteXtime type
1630  quicksilver  largeprob  Static,policy=0         0  115.766770   VA
1632  quicksilver  largeprob  Static,policy=0      

In [11]:
# 