## Merging of Per-Core Logs
### Can we assume the following about per-core behavior: 
#### If all cores of a running experiment do similar work, then their individual behaviors - as exposed by their per-core logs - are similar to each other, and hence, the behavior of all cores can be merged into an overall behavior of the full experimental run.

In [6]:
import os
import time
import pandas as pd
import numpy as np

In [7]:
import eigen_analysis

cols = eigen_analysis.LINUX_COLS
time_unit = eigen_analysis.TIME_CONVERSION_khz
joules_unit = eigen_analysis.JOULE_CONVERSION

In [54]:
# get all itrs explored for some (dvfs, qps) pair
def list_itrs(rdtsc_dirname):
    itrs = []
    for file in os.listdir(rdtsc_dirname):
        tags = file.split('_')
        itr = tags[1]
        itrs.append(itr)
    itrs = list(set(itrs))
    return itrs

In [58]:
global itrs

In [59]:
def print_err_log(dvfs, qps, itr, rapl='135', run='0', err_dir = 'err_logs/'):
    err_filename = 'err_log_' + run + '_' + itr + '_' + dvfs + '_' + rapl + '_' + qps[:-1] + '000'
    if os.path.exists(err_dir + err_filename):
        err_file = open(err_dir + err_filename, 'r')
        print(err_file.read())
    else:
        print('EMPTY ERROR LOG')

In [99]:
def handle_neg_diffs(df_diffs, df, core, err_file):
    
    tmp = df_diffs.copy()

    # isolating rows with negative diffs
    tmp_neg = tmp[(tmp['joules_diff'] < 0) | (tmp['instructions_diff'] < 0) | (tmp['cycles_diff'] < 0) \
                   | (tmp['ref_cycles_diff'] < 0) | (tmp['llc_miss_diff'] < 0) | (tmp['timestamp_diff'] < 0)]

    # re-computing diffs if possible; else dropping rows
    for i,j in tmp_neg.iterrows():
        prev = df.shift(1).loc[i]
        cur = df.loc[i]
        # TODO note that we are only handling case of RAPL-energy-status register overflow
        if (tmp.loc[i]['joules_diff'] < 0) & (tmp.loc[i]['timestamp_diff'] >= 0.001):
            err_file.write('CORE ' + str(core) + '  ---  JOULES COUNTER OVERFLOW AT LOG ENTRY DIFF #' + str(i) + '\n')
            tmp.loc[i, ['joules_diff']] = (2**32 - 1) * joules_unit - prev['joules'] + cur['joules'] 
        else:
            err_file.write('CORE ' + str(core) + '  ---  UNEXPLAINED NEGATIVE VALS AT LOG ENTRY DIFF # ' + str(i) + '\n')
            cols = ''
            prevs = ''
            currs = ''
            for col in list(df.columns):
                cols += col + '  '
                prevs += str(prev[col]) + '  '
                currs += str(cur[col]) + '  '
            err_file.write('          ' + cols + '\n')
            err_file.write('         log[' + str(i-1) + ']: ' + prevs + '\n')
            err_file.write('         log[' + str(i) + ']: ' + currs + '\n')
            tmp = tmp.drop(i, axis=0)

    return tmp

In [61]:
# given (dvfs, qps, itr), concatenate all per-core logs into one big dataframe
def concat_core_logs(dvfs, qps, itr, rapl='135', run='0'):
    print('Concatenating all per-core logs with ITR-DELAY = ', itr)
    
    # here are all the log files for this dvfs & qps
    logs_dir = qps + '_qps/linux_mcd_dmesg_' + run + '_' + dvfs + '_' + rapl + '_' + qps + '/'
    # here are all the time-management files for this dvfs & qps
    rdtsc_dir = qps + '_qps/linux_mcd_rdtsc_' + run + '_' + dvfs + '_' + rapl + '_' + qps +'/'
    rdtsc_file = rdtsc_dir + 'linux.mcd.rdtsc.' + run + '_' + itr + '_' + dvfs + '_' + rapl + '_' + qps[:-1] + '000'
    start, end = eigen_analysis.get_rdtsc(rdtsc_file)
        
    # initializing error log file
    err_dir = 'err_logs/'
    err_filename = 'err_log_' + run + '_' + itr + '_' + dvfs + '_' + rapl + '_' + qps[:-1] + '000'
    err_file = open(err_dir + err_filename, 'w')

    # here will be stored counter-based log data from all cores
    counters_full_df = pd.DataFrame()
    # here will be stored non-counter-based log data from all cores
    non_counters_full_df = pd.DataFrame()
    
    # TODO remove fixed core-id range
    for c in range(0,16):
        file = logs_dir + 'linux.mcd.dmesg.' + run + '_' + str(c) + '_' + itr + '_' + dvfs + '_' + rapl + '_' + qps[:-1] + '000'
        df = pd.read_csv(file, sep = ' ', names = cols, index_col='i')
        df = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)]
        df['timestamp'] = df['timestamp'] - df['timestamp'].min()
        df['timestamp'] = df['timestamp'] * time_unit
        df['joules'] = df['joules'] * joules_unit

        # CONCATENATING MILLISECOND-LEVEL PER-CORE DFS
        ##############################################
        # removing zero-filled log-entries
        # -> these represent interrupt occurrences at a frequency greater than per-1ms
        counters_df = df[['joules', 'instructions', 'cycles', 'ref_cycles', 'llc_miss', 'timestamp']].copy()
        counters_df = counters_df[(counters_df['joules'] > 0) & (counters_df['instructions'] > 0) \
                                            & (counters_df['cycles'] > 0) & (counters_df['ref_cycles'] > 0) \
                                            & (counters_df['llc_miss'] > 0)]
        # computing diffs of counter readings
        df_diffs = counters_df.diff().dropna().copy()
        df_diffs.columns = [f'{c}_diff' for c in df_diffs.columns]
        df_diffs = handle_neg_diffs(df_diffs, counters_df, c, err_file)

        # SCHEME 1: merge per-core logs on index i where
        #           i = order of occurrence of a log-entry in a per-core log
        df_diffs = df_diffs.drop(['timestamp_diff'], axis=1).reset_index()
        if counters_full_df.shape[0] == 0:
            counters_full_df = df_diffs.copy()
        else:
            counters_full_df = counters_full_df.merge(df_diffs, left_index=True, right_index=True, \
                                                      how='outer', sort=True, suffixes=('', '_0')).fillna(0)

        # CONCATENATING MICROSECOND-LEVEL PER-CORE DFS
        ##############################################
        non_counters_df = df[['rx_bytes', 'rx_desc', 'tx_bytes', 'tx_desc']].copy()

        # SCHEME 1
        if non_counters_full_df.shape[0] == 0:
            non_counters_full_df = non_counters_df.copy()
        else:
            non_counters_full_df = non_counters_full_df.merge(non_counters_df, left_index=True, right_index=True, \
                                                              how='outer', sort=True, suffixes=('', '_0')).fillna(0)
        print('CORE: ', str(c))
        print('         NON COUNTERS:  full =', non_counters_df.shape[0], \
              '  expected:', int(20 * 10**6 / int(itr)))        
        print('         COUNTERS:      full =', counters_df.shape[0], \
              '  after computing diffs =', df_diffs.shape[0])   

    # delete error log if empty
    err_file.close()
    if (os.path.getsize(err_dir + err_filename) == 0):
        os.remove(err_dir + err_filename)
        
    print()
    print('-------------------------------------------------- PARSED 16 LOGS -------------------------')
    print()
    return counters_full_df, non_counters_full_df

In [62]:
def merge_concat_logs(counters_full_df, non_counters_full_df):
    
    # creating dfs of average per-core log readings
    df_merged_counter = pd.DataFrame(columns=['instructions_diff', 'cycles_diff', 'ref_cycles_diff', \
                                     'llc_miss_diff', 'joules_diff'])
    df_merged_non_counter = pd.DataFrame(columns=['rx_bytes', 'rx_desc', 'tx_bytes', 'tx_desc'])

    for col in df_merged_counter.columns:
        df_merged_counter[col] = (counters_full_df[[col, col+'_0']].sum(axis=1))/16
    for col in df_merged_non_counter.columns:
        df_merged_non_counter[col] = (non_counters_full_df[[col, col+'_0']].sum(axis=1))/16

    return df_merged_counter, df_merged_non_counter

In [63]:
def save_merged_logs(merged_counters_df, merged_non_counters_df, dvfs, qps, itr, rapl='135', run='0'):
    
    counters_df_outdir = qps + '_' + dvfs + '_counters_merged/'
    non_counters_df_outdir = qps + '_' + dvfs + '_non_counters_merged/'
    !mkdir -p $counters_df_outdir
    !mkdir -p $non_counters_df_outdir
    counters_outfile = counters_df_outdir + dvfs + '_' + qps + '_' + itr + '_counters_merged'
    non_counters_outfile = non_counters_df_outdir + dvfs + '_' + qps + '_' + itr + '_non_counters_merged'
    merged_counters_df.to_csv(counters_outfile)
    merged_non_counters_df.to_csv(non_counters_outfile)

    print('COUNTERS DIR: ')
    !ls $counters_df_outdir
    print('NON_COUNTERS DIR: ')
    !ls $non_counters_df_outdir

In [101]:
dvfs = '0xd00'
qps = '400k'
run = '0'
rapl = '135'
itrs = list_itrs(qps + '_qps/linux_mcd_rdtsc_' + run + '_' + dvfs + '_' + rapl + '_' + qps +'/')
for itr in itrs:
    counters_full_df = pd.DataFrame()
    non_counters_full_df = pd.DataFrame()
    counters_full_df, non_counters_full_df = concat_core_logs(dvfs, qps, itr)
    print_err_log(dvfs, qps, itr)
    merged_counters_df, merged_non_counters_df = merge_concat_logs(counters_full_df, non_counters_full_df)
    save_merged_logs(merged_counters_df, merged_non_counters_df, dvfs, qps, itr)
    print('----------------------------------------')

Concatenating all per-core logs with ITR-DELAY =  2
CORE:  0
         NON COUNTERS:  full = 1288726   expected: 10000000
         COUNTERS:      full = 19697   after computing diffs = 19696
CORE:  1
         NON COUNTERS:  full = 1284877   expected: 10000000
         COUNTERS:      full = 19706   after computing diffs = 19705
CORE:  2
         NON COUNTERS:  full = 1283319   expected: 10000000
         COUNTERS:      full = 19695   after computing diffs = 19694
CORE:  3
         NON COUNTERS:  full = 1302024   expected: 10000000
         COUNTERS:      full = 19711   after computing diffs = 19710
CORE:  4
         NON COUNTERS:  full = 1279800   expected: 10000000
         COUNTERS:      full = 19703   after computing diffs = 19702
CORE:  5
         NON COUNTERS:  full = 1285829   expected: 10000000
         COUNTERS:      full = 19707   after computing diffs = 19706
CORE:  6
         NON COUNTERS:  full = 1278770   expected: 10000000
         COUNTERS:      full = 19693   after comput

0xd00_400k_100_non_counters_merged  0xd00_400k_2_non_counters_merged
0xd00_400k_200_non_counters_merged  0xd00_400k_300_non_counters_merged
0xd00_400k_20_non_counters_merged   0xd00_400k_50_non_counters_merged
----------------------------------------
Concatenating all per-core logs with ITR-DELAY =  300
CORE:  0
         NON COUNTERS:  full = 65117   expected: 66666
         COUNTERS:      full = 16280   after computing diffs = 16279
CORE:  1
         NON COUNTERS:  full = 65113   expected: 66666
         COUNTERS:      full = 16279   after computing diffs = 16278
CORE:  2
         NON COUNTERS:  full = 65117   expected: 66666
         COUNTERS:      full = 16280   after computing diffs = 16279
CORE:  3
         NON COUNTERS:  full = 65115   expected: 66666
         COUNTERS:      full = 16280   after computing diffs = 16279
CORE:  4
         NON COUNTERS:  full = 65117   expected: 66666
         COUNTERS:      full = 16279   after computing diffs = 16278
CORE:  5
         NON COUNTERS

CORE:  13
         NON COUNTERS:  full = 48838   expected: 50000
         COUNTERS:      full = 16280   after computing diffs = 16279
CORE:  14
         NON COUNTERS:  full = 48839   expected: 50000
         COUNTERS:      full = 16279   after computing diffs = 16278
CORE:  15
         NON COUNTERS:  full = 48838   expected: 50000
         COUNTERS:      full = 16280   after computing diffs = 16279

-------------------------------------------------- PARSED 16 LOGS -------------------------

EMPTY ERROR LOG
COUNTERS DIR: 
0xd00_400k_100_counters_merged	0xd00_400k_300_counters_merged
0xd00_400k_200_counters_merged	0xd00_400k_400_counters_merged
0xd00_400k_20_counters_merged	0xd00_400k_40_counters_merged
0xd00_400k_2_counters_merged	0xd00_400k_50_counters_merged
NON_COUNTERS DIR: 
0xd00_400k_100_non_counters_merged  0xd00_400k_300_non_counters_merged
0xd00_400k_200_non_counters_merged  0xd00_400k_400_non_counters_merged
0xd00_400k_20_non_counters_merged   0xd00_400k_40_non_counters_merged

CORE:  9
         NON COUNTERS:  full = 378295   expected: 400000
         COUNTERS:      full = 19450   after computing diffs = 19449
CORE:  10
         NON COUNTERS:  full = 378029   expected: 400000
         COUNTERS:      full = 19448   after computing diffs = 19447
CORE:  11
         NON COUNTERS:  full = 377715   expected: 400000
         COUNTERS:      full = 19448   after computing diffs = 19447
CORE:  12
         NON COUNTERS:  full = 378088   expected: 400000
         COUNTERS:      full = 19444   after computing diffs = 19443
CORE:  13
         NON COUNTERS:  full = 377240   expected: 400000
         COUNTERS:      full = 19445   after computing diffs = 19444
CORE:  14
         NON COUNTERS:  full = 379055   expected: 400000
         COUNTERS:      full = 19450   after computing diffs = 19449
CORE:  15
         NON COUNTERS:  full = 375937   expected: 400000
         COUNTERS:      full = 19441   after computing diffs = 19440

------------------------------------------------

NON_COUNTERS DIR: 
0xd00_400k_100_non_counters_merged  0xd00_400k_300_non_counters_merged
0xd00_400k_10_non_counters_merged   0xd00_400k_350_non_counters_merged
0xd00_400k_200_non_counters_merged  0xd00_400k_400_non_counters_merged
0xd00_400k_20_non_counters_merged   0xd00_400k_40_non_counters_merged
0xd00_400k_250_non_counters_merged  0xd00_400k_50_non_counters_merged
0xd00_400k_2_non_counters_merged
----------------------------------------
Concatenating all per-core logs with ITR-DELAY =  30
CORE:  0
         NON COUNTERS:  full = 576499   expected: 666666
         COUNTERS:      full = 19615   after computing diffs = 19614
CORE:  1
         NON COUNTERS:  full = 583271   expected: 666666
         COUNTERS:      full = 19624   after computing diffs = 19623
CORE:  2
         NON COUNTERS:  full = 576480   expected: 666666
         COUNTERS:      full = 19614   after computing diffs = 19613
CORE:  3
         NON COUNTERS:  full = 582025   expected: 666666
         COUNTERS:      full =

In [104]:
counters_full_df

Unnamed: 0,i,joules_diff,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,i_0,joules_diff_0,instructions_diff_0,cycles_diff_0,...,instructions_diff_0.1,cycles_diff_0.1,ref_cycles_diff_0,llc_miss_diff_0,i_0.1,joules_diff_0.1,instructions_diff_0.2,cycles_diff_0.2,ref_cycles_diff_0.1,llc_miss_diff_0.1
0,229.0,0.124074,683405.0,1319743.0,2944051.0,3762.0,197,0.091134,642851.0,1328563.0,...,689036.0,1306852.0,2915312.0,3790.0,150.0,0.091134,419225.0,821762.0,1834598.0,1624.0
1,261.0,0.136884,733214.0,1309428.0,2921025.0,4298.0,230,0.128954,699880.0,1320692.0,...,780722.0,1339134.0,2987290.0,3816.0,182.0,0.128954,481131.0,951926.0,2123612.0,2666.0
2,294.0,0.135725,694060.0,1334245.0,2976415.0,3909.0,262,0.131821,682177.0,1277731.0,...,611050.0,1278174.0,2851309.0,3428.0,214.0,0.131821,593633.0,1200660.0,2678469.0,3426.0
3,325.0,0.135603,751164.0,1324616.0,2954897.0,3739.0,295,0.134993,645228.0,1213300.0,...,544709.0,1163770.0,2596138.0,2899.0,247.0,0.134993,712683.0,1321082.0,2947009.0,3822.0
4,357.0,0.133468,685368.0,1318884.0,2942108.0,3574.0,328,0.132187,537266.0,1051257.0,...,633846.0,1236825.0,2759060.0,3206.0,280.0,0.132187,718758.0,1291835.0,2881817.0,3442.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19618,0.0,0.000000,0.0,0.0,0.0,0.0,583323,0.124684,480797.0,1013067.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
19619,0.0,0.000000,0.0,0.0,0.0,0.0,583335,0.111569,162558.0,393979.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
19620,0.0,0.000000,0.0,0.0,0.0,0.0,583365,0.116388,597311.0,1311616.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
19621,0.0,0.000000,0.0,0.0,0.0,0.0,583390,0.131028,624714.0,1303273.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [103]:
non_counters_full_df

Unnamed: 0_level_0,rx_bytes,rx_desc,tx_bytes,tx_desc,rx_bytes_0,rx_desc_0,tx_bytes_0,tx_desc_0,rx_bytes_0,rx_desc_0,...,tx_bytes_0,tx_desc_0,rx_bytes_0,rx_desc_0,tx_bytes_0,tx_desc_0,rx_bytes_0,rx_desc_0,tx_bytes_0,tx_desc_0
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
108,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
109,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,66,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
110,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,132,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
584804,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,66,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
584805,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,132,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
584806,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,66,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
584807,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,132,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [108]:
merged_counters_df

Unnamed: 0,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff
0,676844.2500,1.278246e+06,2.855681e+06,3953.6875,0.104779
1,695104.3750,1.266143e+06,2.824477e+06,3805.1250,0.134848
2,716595.0000,1.298005e+06,2.895565e+06,3732.4375,0.133331
3,708744.3125,1.256791e+06,2.803615e+06,3290.5625,0.133567
4,717205.3750,1.263793e+06,2.819248e+06,3375.3125,0.132824
...,...,...,...,...,...
19447,159059.2500,3.476983e+05,7.756376e+05,850.7500,0.031930
19448,138686.7500,2.898031e+05,6.464916e+05,690.3750,0.039132
19449,43147.0625,9.407319e+04,2.098458e+05,178.5000,0.014709
19450,38942.1875,7.963438e+04,1.776486e+05,190.1875,0.008212


In [102]:
merged_non_counters_df

Unnamed: 0_level_0,rx_bytes,rx_desc,tx_bytes,tx_desc
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
107,8.2500,0.1250,0.000,0.000
108,7.3750,0.0625,0.000,0.000
109,20.7500,0.1250,4.125,0.125
110,0.0000,0.0000,8.250,0.250
111,15.1875,0.1250,0.000,0.000
...,...,...,...,...
584804,0.0000,0.0000,4.125,0.125
584805,0.0000,0.0000,8.250,0.250
584806,0.0000,0.0000,4.125,0.125
584807,4.1250,0.0625,8.250,0.250


## BELOW IS REDUNDANT

In [117]:
#global itrs

In [116]:
# # {DVFS --> {QPS --> {ITR-DELAY --> df of concatenated core data}}}
# per_dvfs_counters_df_dict = {}
# per_dvfs_non_counters_df_dict = {}
# for dvfs in ['0xd00', '0x1d00']:

#     # {QPS --> {ITR-DELAY --> df of concatenated core data}}
#     per_qps_counters_df_dict = {}
#     per_qps_non_counters_df_dict = {}
#     for qps in ['200k', '400k', '600k']:
#         rdtsc_dir = qps + '_qps/linux_mcd_rdtsc_0_' + dvfs + '_135_' + qps +'/'
#         logs_dir = qps + '_qps/linux_mcd_dmesg_0_' + dvfs + '_135_' + qps + '/'
#         itrs = list_itrs(rdtsc_dir)
#         # e.g. 200k --> 200000
#         qps = qps[:-1] + '000'
        
#         print('ITR_DELAYS: ', itrs)
#         # {ITR-DELAY --> dfs of concatenated core data}
#         per_itr_counters_df_dict = {}
#         per_itr_non_counters_df_dict = {}
#         for itr in itrs:
#             print('Concatenating all per-core logs where ITR-DELAY = ', str(itr))
#             rdtsc_file = rdtsc_dir + 'linux.mcd.rdtsc.0_' + str(itr) + '_' + dvfs + '_135_' + qps
#             start, end = eigen_analysis.get_rdtsc(rdtsc_file)
            
#             ############################
#             # CONCATENATING PER-CORE DFS
#             ############################
#             counters_full_df = pd.DataFrame()
#             non_counters_full_df = pd.DataFrame()
#             for c in range(0,16):
#                 file = logs_dir + 'linux.mcd.dmesg.0_' + str(c) + '_' + str(itr) + '_' + dvfs + '_135_' + qps
#                 df = pd.read_csv(file, sep = ' ', names = cols, index_col='i')
#                 df = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)]
#                 df['timestamp'] = df['timestamp'] - df['timestamp'].min()
#                 df['timestamp'] = df['timestamp'] * time_unit
#                 df['joules'] = df['joules'] * joules_unit

#                 ##############################################
#                 # CONCATENATING MILLISECOND-LEVEL PER-CORE DFS
#                 ##############################################
#                 # removing empty/zero log-entries
#                 # -> these represent interrupt occurrences at a frequency greater than per-1ms
#                 counters_df = df[['joules', 'instructions', 'cycles', 'ref_cycles', 'llc_miss', 'timestamp']].copy()
#                 counters_df = counters_df[(counters_df['joules'] > 0) & (counters_df['instructions'] > 0) \
#                                                     & (counters_df['cycles'] > 0) & (counters_df['ref_cycles'] > 0) \
#                                                     & (counters_df['llc_miss'] > 0)]
#                 # computing diffs of counter values
#                 df_diffs = counters_df.diff().dropna().copy()
#                 df_diffs.columns = [f'{c}_diff' for c in df_diffs.columns]
#                 tmp = df_diffs.copy()
#                 # isolating log entries with negative diffs due to register overflow
#                 tmp_neg = tmp[(tmp['joules_diff'] < 0) | (tmp['instructions_diff'] < 0) | (tmp['cycles_diff'] < 0) \
#                                | (tmp['ref_cycles_diff'] < 0) | (tmp['llc_miss_diff'] < 0) | (tmp['timestamp_diff'] < 0)]
#                 # re-computing diffs that expose register overflow
#                 for i,j in tmp_neg.iterrows():
#                     prev = counters_df.shift(1).loc[i]
#                     cur = counters_df.loc[i]
#                     # handle case of joules counter overflow at (2**32 - 1)
#                     if (tmp.loc[i]['joules_diff'] < 0) & (tmp.loc[i]['timestamp_diff'] >= 0.001):
#                         print('COUNTER OVERFLOW AT LOG ENTRY #', i)
#                         tmp.loc[i, ['joules_diff']] = (2**32 - 1) * joules_unit - prev['joules'] + cur['joules'] 
#                     else:
#                         print('CORE: ', str(c), 'UNEXPLAINED NEGATIVE DIFFS ... DROPPING ROW ', i)
#                         #print(tmp_neg.loc[i])
#                         #print('prev: ', list(prev))
#                         #print('curr: ', list(cur))
#                         df_diffs = df_diffs.drop(i, axis=0)
#                 # concatenate per-core dataframes
#                 # SCHEME 1: merge on index i where
#                 #           i = order of occurrence of log-entry in a per-core log
#                 df_diffs = df_diffs.drop(['timestamp_diff'], axis=1).reset_index()
#                 if counters_full_df.shape[0] == 0:
#                     counters_full_df = df_diffs.copy()
#                 else:
#                     counters_full_df = counters_full_df.merge(df_diffs, left_index=True, right_index=True, \
#                                                               how='outer', sort=True, suffixes=('', '_0')).fillna(0)

#                 ##############################################
#                 # CONCATENATING MICROSECOND-LEVEL PER-CORE DFS
#                 ##############################################
#                 non_counters_df = df[['rx_bytes', 'rx_desc', 'tx_bytes', 'tx_desc']]

#                 # merge per-core dataframes
#                 # SCHEME 1
#                 if non_counters_full_df.shape[0] == 0:
#                     non_counters_full_df = non_counters_df.copy()
#                 else:
#                     non_counters_full_df = non_counters_full_df.merge(non_counters_df, left_index=True, right_index=True, \
#                                                                       how='outer', sort=True, suffixes=('', '_0')).fillna(0)
# #                print('CORE: ', str(c))
# #                print('       NON COUNTERS  full =', non_counters_df.shape[0], \
# #                      '      expected length:', int(20 * 10**6 / itr))        
# #                print('       COUNTERS      after dropping 0s =', counters_df.shape[0], \
# #                      '   after applying diff =', tmp.shape[0], '   after dropping -ve diffs =', df_diffs.shape[0])   

#             print()
#             print('---------------------------------------- PARSED 16 LOGS ----------------')
#             print()
#             # append concatenated dataframe to per-itr dict
#             per_itr_counters_df_dict[str(itr)] = counters_full_df
#             per_itr_non_counters_df_dict[str(itr)] = non_counters_full_df
#             time.sleep(2)
#         print()
#         print('---------------------------------------- PARSED 12 ITRS ----------------------------')
#         print()
#         # append per-itr dict to per-qps dict
#         per_qps_counters_df_dict[qps] = per_itr_counters_df_dict
#         per_qps_non_counters_df_dict[qps] = per_itr_non_counters_df_dict
#         time.sleep(3)
#     print()
#     print('---------------------------------------- PARSED QPSS ----------------------------------------')
#     print()
#     per_dvfs_counters_df_dict[dvfs] = per_qps_counters_df_dict
#     per_dvfs_non_counters_df_dict[dvfs] = per_qps_non_counters_df_dict
#     time.sleep(4)
# print()
# print('---------------------------------------- PARSED DVFSS ---------------------------------------------')
# print()


In [115]:
#df_qps_itr_counter_list['400000']['100']

In [114]:
#df_qps_itr_counter_list['200000']['100']

In [113]:
#df_qps_itr_counter_list['600000']['100']

In [112]:
#df_qps_itr_non_counter_list['200000']['100'].head(50)

In [111]:
# for dvfs in ['0x1d00']:

#     df_qps_itr_counter_merged_list = {}
#     df_qps_itr_non_counter_merged_list = {}
#     for qps in ['200000', '400000', '600000']:

#         df_itr_counter_merged_list = {}
#         df_itr_non_counter_merged_list = {}
#         for itr in df_itr_counter_list.keys():
#             # creating a larger dataframe of average readings across log files of different cores
#             df_merged_counter = pd.DataFrame(columns=['instructions_diff', 'cycles_diff', 'ref_cycles_diff', \
#                                              'llc_miss_diff', 'joules_diff'])
#             df_merged_non_counter = pd.DataFrame(columns=['rx_bytes', 'rx_desc', 'tx_bytes', 'tx_desc'])

#             df_itr_counter = df_qps_itr_counter_list[qps][itr]
#             df_itr_non_counter = df_qps_itr_non_counter_list[qps][itr]

#             # averaging..
#             for col in df_merged_counter.columns:
#                 df_merged_counter[col] = (df_itr_counter[[col, col+'_0']].sum(axis=1))/16
#             for col in df_merged_non_counter.columns:
#                 df_merged_non_counter[col] = (df_itr_non_counter[[col, col+'_0']].sum(axis=1))/16
#             print()
#             print('ITR-DELAY: ', itr)
#             print('merged_counter_df.head(): ')
#             print(df_merged_counter.head(2)['joules_diff'])
#             df_itr_counter_merged_list[itr] = df_merged_counter
#             #print('merged_non_counter_df.head(): ')
#             #print(df_merged_non_counter.head(2)['rx_bytes'])
#             df_itr_non_counter_merged_list[itr] = df_merged_non_counter    
#         print()
#         print('---------------------------------------- PARSED 12 ITRS ----------------')
#         print()
#         df_qps_itr_counter_merged_list[qps] = df_itr_counter_merged_list
#         df_qps_itr_non_counter_merged_list[qps] = df_itr_non_counter_merged_list

In [110]:
# for dvfs in ['0x1d00']:
#     for qps in ['200k', '400k', '600k']:
#         counter_df_outdir = qps[:3] + 'k_' + dvfs + '_merged_counter_per_core_dfs/'
#         non_counter_df_outdir = qps[:3] + 'k_' + dvfs + '_merged_non_counter_per_core_dfs/'
#         !mkdir $counter_df_outdir
#         !mkdir $non_counter_df_outdir
#         qps_val = qps[:-1] + '000'
#         for itr in df_qps_itr_counter_merged_list[qps_val].keys():
#             outfile = counter_df_outdir + 'df_itr_' + itr + '_merged_cores_counters'
#             df_qps_itr_counter_merged_list[qps_val][itr].to_csv(outfile)
#         for itr in df_itr_non_counter_merged_list.keys():
#             outfile = non_counter_df_outdir + 'df_itr_' + itr + '_merged_cores_non_counters'
#             df_qps_itr_non_counter_merged_list[qps_val][itr].to_csv(outfile)