In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import argparse
import time
import sys
import imageio
from IPython import display

In [2]:
# For each problem size, we want to go through the Static data that was
# collected for each of the policies. 
# This static data will allow us to create an oracle that knows the best
# policy for each region+feature+idx combination.

# Set some plotting params
plt.rcParams['figure.figsize'] = [12, 5]
plt.rcParams['figure.dpi'] = 100

In [3]:
# Let's first read in all our desired files.

# Go to the rundata dir
run_data_dir='/g/g15/bolet1/workspace/lulesh-region-fix-correct/LULESH/runData/'
os.chdir(run_data_dir)
!pwd

# Next, let's gather up our targets
# It's all the TRACE_CSV data from each of the static runs
# There was only one invocation of each static run, so in the future
# we might want to make an oracle that takes an average of all the repeat
# executions
csvs = list(glob.glob("VA_RegionMod_explrStatic,*/trace-lulesh-VA_RegionMod_explrStatic*/*.csv"))
csvs.sort()
print(len(csvs))
print(csvs)

/usr/WS2/bolet1/lulesh-region-fix-correct/LULESH/runData
252
['VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l1128-rank-0.csv', 'VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l1211-rank-0.csv', 'VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l1253-rank-0.csv', 'VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l1292-rank-0.csv', 'VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l1317-rank-0.csv', 'VA

In [4]:
# Because each of the files has the policy within the 'training' column
# and the region name in the 'region' column, we only need to add the 'runsize'
# column to tell the run sizes apart
def load_my_csv(csvfile):
    # Load the SingleMod Dataset
    print('Loading file:', csvfile)
    filename = csvfile[:-4]
    t = time.perf_counter()
    df = pd.read_csv(csvfile, sep=' ')
    elapsed_time = time.perf_counter() - t
    print('Read in CSV file in', int(elapsed_time), 'second(s)')
    print('Data shape: ', df.shape)
    
    # Drop rankid column, rankid is constant
    df = df.drop(['rankid'], 1)
    
    # Extract the runsize from the file path string
    target = 'trainSize'
    targetLoc = csvfile.find(target) + len(target)
    
    df['runsize'] = int(csvfile[targetLoc:targetLoc+2])
    
    return df

In [5]:
df = pd.DataFrame()
t = time.perf_counter()

# Read all the CSV files
for idx, csvfile in enumerate(csvs):
    print('On file', idx+1, 'of', len(csvs))
    csvdf = load_my_csv(csvfile)
    df = df.append(csvdf)
    
elapsed_time = time.perf_counter() - t
print('Read in ALL CSV files in', int(elapsed_time), 'second(s)')

print('Done loading all CSVs')
print(df.shape)
print(df.size)  

On file 1 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l1128-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (932, 7)
On file 2 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l1211-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (932, 7)
On file 3 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l1253-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (932, 7)
On file 4 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.re

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Read in CSV file in 0 second(s)
Data shape:  (92268, 7)
On file 12 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l2286-rank-0.csv
Read in CSV file in 3 second(s)
Data shape:  (2530380, 7)
On file 13 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l2296-rank-0.csv
Read in CSV file in 3 second(s)
Data shape:  (2530380, 7)
On file 14 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-Static,0-region-lulesh.cc.apollo.region.l2338-rank-0.csv
Read in CSV file in 1 second(s)
Data shape:  (843460, 7)
On file 15 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic

On file 42 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize55/trace-Static,0-region-lulesh.cc.apollo.region.l2338-rank-0.csv
Read in CSV file in 1 second(s)
Data shape:  (905000, 7)
On file 43 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize55/trace-Static,0-region-lulesh.cc.apollo.region.l2360-rank-0.csv
Read in CSV file in 1 second(s)
Data shape:  (905000, 7)
On file 44 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize55/trace-Static,0-region-lulesh.cc.apollo.region.l2388-rank-0.csv
Read in CSV file in 1 second(s)
Data shape:  (905000, 7)
On file 45 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize55/trace-Static,0-region-lulesh

Read in CSV file in 1 second(s)
Data shape:  (905000, 7)
On file 73 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize80/trace-Static,0-region-lulesh.cc.apollo.region.l2413-rank-0.csv
Read in CSV file in 1 second(s)
Data shape:  (905000, 7)
On file 74 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize80/trace-Static,0-region-lulesh.cc.apollo.region.l2459-rank-0.csv
Read in CSV file in 1 second(s)
Data shape:  (905000, 7)
On file 75 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize80/trace-Static,0-region-lulesh.cc.apollo.region.l2503-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (100000, 7)
On file 76 of 252
Loading file: VA_RegionMod_explrStatic,0_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,

On file 103 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize30/trace-Static,1-region-lulesh.cc.apollo.region.l2503-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (93200, 7)
On file 104 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize30/trace-Static,1-region-lulesh.cc.apollo.region.l2565-rank-0.csv
Read in CSV file in 1 second(s)
Data shape:  (843460, 11)
On file 105 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize30/trace-Static,1-region-lulesh.cc.apollo.region.l2623-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (93200, 7)
On file 106 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize30/trace-Static,1-region-lul

Read in CSV file in 0 second(s)
Data shape:  (100000, 7)
On file 134 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize55/trace-Static,1-region-lulesh.cc.apollo.region.l2675-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (1000, 10)
On file 135 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize55/trace-Static,1-region-lulesh.cc.apollo.region.l2758-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (1000, 7)
On file 136 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize55/trace-Static,1-region-lulesh.cc.apollo.region.l2817-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (100000, 7)
On file 137 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic

On file 164 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize80/trace-Static,1-region-lulesh.cc.apollo.region.l2817-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (100000, 7)
On file 165 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize80/trace-Static,1-region-lulesh.cc.apollo.region.l2905-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (100000, 7)
On file 166 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize80/trace-Static,1-region-lulesh.cc.apollo.region.l304-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (1000, 7)
On file 167 of 252
Loading file: VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,1_c8_pol3_depth4_trainSize80/trace-Static,1-region-lules

On file 195 of 252
Loading file: VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize30/trace-Static,2-region-lulesh.cc.apollo.region.l570-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (932, 8)
On file 196 of 252
Loading file: VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize30/trace-lulesh-VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize30/trace-Static,2-region-lulesh.cc.apollo.region.l871-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (932, 8)
On file 197 of 252
Loading file: VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize55/trace-Static,2-region-lulesh.cc.apollo.region.l1128-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (1000, 7)
On file 198 of 252
Loading file: VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize55/trace-lulesh-VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize55/trace-Static,2-region-lulesh.cc.ap

On file 226 of 252
Loading file: VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize80/trace-Static,2-region-lulesh.cc.apollo.region.l1211-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (1000, 7)
On file 227 of 252
Loading file: VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize80/trace-Static,2-region-lulesh.cc.apollo.region.l1253-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (1000, 7)
On file 228 of 252
Loading file: VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize80/trace-Static,2-region-lulesh.cc.apollo.region.l1292-rank-0.csv
Read in CSV file in 0 second(s)
Data shape:  (1000, 7)
On file 229 of 252
Loading file: VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize80/trace-lulesh-VA_RegionMod_explrStatic,2_c8_pol3_depth4_trainSize80/trace-Static,2-region-lulesh.c

In [6]:
df.head()

# Some features will be missing, fill them in with a -1
df = df.fillna(-1)

In [7]:
# Now that we have all our data together, let's make the oracle
# for each of the runSizes in {30, 55, 80}

t = time.perf_counter()

oracle30 = df.loc[df['runsize'] == 30]
oracle55 = df.loc[df['runsize'] == 55]
oracle80 = df.loc[df['runsize'] == 80]

elapsed_time = time.perf_counter() - t
print('Subset oracles in', int(elapsed_time), 'second(s)')

Subset oracles in 11 second(s)


In [8]:
# For each region+idx combination, we want to grab the minimum time.
# This will then correspond to an optimal policy for the given execution.
# The region+idx combo is equivalent to the region+feature_vector+idx combo

t = time.perf_counter()

# Here we sort by xtime, drop any duplicate region/idx combinations
# then reset the index to be by region/idx combos
oracle30 = oracle30.sort_values(by=['xtime'])\
                   .drop_duplicates(['region', 'idx'], keep='first')\
                   .set_index(['region', 'idx'])\
                   .sort_index()

oracle55 = oracle55.sort_values(by=['xtime'])\
                   .drop_duplicates(['region', 'idx'], keep='first')\
                   .set_index(['region', 'idx'])\
                   .sort_index()

oracle80 = oracle80.sort_values(by=['xtime'])\
                   .drop_duplicates(['region', 'idx'], keep='first')\
                   .set_index(['region', 'idx'])\
                   .sort_index()

elapsed_time = time.perf_counter() - t
print('Made complete oracles in', int(elapsed_time), 'second(s)')

Made complete oracles in 158 second(s)


In [12]:
# Now that we have our oracle datasets, let's save them
t = time.perf_counter()

outputcsv_dir='/g/g15/bolet1/workspace/lulesh-region-fix-correct/LULESH/preprocData/'

oracle30.to_csv(outputcsv_dir+'oracle30.csv', index=True, sep=' ')
print(oracle30.shape)

oracle55.to_csv(outputcsv_dir+'oracle55.csv', index=True, sep=' ')
print(oracle55.shape)

oracle80.to_csv(outputcsv_dir+'oracle80.csv', index=True, sep=' ')
print(oracle80.shape)

elapsed_time = time.perf_counter() - t
print('Saved oracles in', int(elapsed_time), 'second(s)')

(10600568, 9)
(11375000, 9)
(11375000, 9)
Saved oracles in 8257 second(s)


In [10]:
# Returns the amount of memory in MB that the df uses
def dfMemUsage(df):
    return df.memory_usage(index=True).sum()/(1024*1024)

In [11]:
print('Oracle 30 uses', dfMemUsage(oracle30), 'MB of space')
print('Oracle 55 uses', dfMemUsage(oracle55), 'MB of space')
print('Oracle 80 uses', dfMemUsage(oracle80), 'MB of space')

Oracle 30 uses 797.7362699508667 MB of space
Oracle 55 uses 856.0136137008667 MB of space
Oracle 80 uses 856.0136137008667 MB of space
