## Merging of Per-Core Logs
### Can we assume the following about per-core behavior: 
#### If all cores of a running experiment do similar work, then their individual behaviors - as exposed by their per-core logs - are similar to each other, and hence, the behavior of all cores can be merged into an overall behavior of the full experimental run.

In [2]:
import os
import pandas as pd
import numpy as np

In [3]:
import eigen_analysis

cols = eigen_analysis.LINUX_COLS
time_unit = eigen_analysis.TIME_CONVERSION_khz
joules_unit = eigen_analysis.JOULE_CONVERSION

In [4]:
# get all itrs explored for some (dvfs, qps) pair
def list_itrs(rdtsc_dirname):
    itrs = []
    for file in os.listdir(rdtsc_dirname):
        tags = file.split('_')
        itr = tags[1]
        itrs.append(int(itr))
    itrs = list(set(itrs))
    return itrs

### Quick Test: 
#### qps = 400k  ,  dvfs = 0xd00  ,  itr = X  ,  cores = [0, 1]

In [5]:
# start, end = eigen_analysis.get_rdtsc(rdtsc)
# df1 = df1[(df1['timestamp'] >= start) & (df1['timestamp'] <= end)]
# df2 = df2[(df2['timestamp'] >= start) & (df2['timestamp'] <= end)]
# df1['timestamp'] = df1['timestamp'] - df1['timestamp'].min()
# df1['timestamp'] = df1['timestamp'] * time_unit
# df1['joules'] = df1['joules'] * joules_unit
# df2['timestamp'] = df2['timestamp'] - df2['timestamp'].min()
# df2['timestamp'] = df2['timestamp'] * time_unit
# df2['joules'] = df2['joules'] * joules_unit

# df1_copy = df1[(df1['joules'] > 0) & (df1['instructions'] > 0) & (df1['cycles'] > 0) \
#              & (df1['ref_cycles'] > 0) & (df1['llc_miss'] > 0)].copy()
# df2_copy = df2[(df2['joules'] > 0) & (df2['instructions'] > 0) & (df2['cycles'] > 0) \
#              & (df2['ref_cycles'] > 0) & (df2['llc_miss'] > 0)].copy()

# tmp1 = df1_copy.diff().dropna()
# tmp1.columns = [f'{c}_diff' for c in tmp1.columns]
# tmp2 = df2_copy.diff().dropna()
# tmp2.columns = [f'{c}_diff' for c in tmp2.columns]


# df1_diffs_neg = tmp1[(tmp1['joules_diff'] < 0) | (tmp1['instructions_diff'] < 0) | (tmp1['cycles_diff'] < 0) \
#                    | (tmp1['ref_cycles_diff'] < 0) | (tmp1['llc_miss_diff'] < 0) | (tmp1['timestamp_diff'] < 0)]
# df1_diffs = tmp1[(tmp1['joules_diff'] >= 0) | (tmp1['instructions_diff'] >= 0) | (tmp1['cycles_diff'] >= 0) \
#                    | (tmp1['ref_cycles_diff'] >= 0) | (tmp1['llc_miss_diff'] >= 0) | (tmp1['timestamp_diff'] >= 0)]
# if df1_diffs_neg.shape[0] > 0:
#     print('NEGATIVE DIFFS')
#     print(df1_diffs_neg)

# df2_diffs_neg = tmp2[(tmp2['joules_diff'] < 0) | (tmp2['instructions_diff'] < 0) | (tmp2['cycles_diff'] < 0) \
#                    | (tmp2['ref_cycles_diff'] < 0) | (tmp2['llc_miss_diff'] < 0) | (tmp2['timestamp_diff'] < 0)]
# df2_diffs = tmp2[(tmp2['joules_diff'] >= 0) | (tmp2['instructions_diff'] >= 0) | (tmp2['cycles_diff'] >= 0) \
#                    | (tmp2['ref_cycles_diff'] >= 0) | (tmp2['llc_miss_diff'] >= 0) | (tmp2['timestamp_diff'] >= 0)]
# if df2_diffs_neg.shape[0] > 0:
#     print('NEGATIVE DIFFS')
#     print(df2_diffs_neg)
    
# df1_diffs = df1_diffs.drop(['timestamp_diff'], axis=1).reset_index()
# df2_diffs = df2_diffs.drop(['timestamp_diff'], axis=1).reset_index()
# df11 = df1_diffs.copy()
# df22 = df2_diffs.copy()

#df_full = df11.merge(df22, left_index=True, right_index=True, how='outer', sort=True).fillna(0).copy()

# df_final = pd.DataFrame(columns=['instructions_diff', 'cycles_diff', 'ref_cycles_diff', 'llc_miss_diff', 'joules_diff'])
# for i,j in df_full.iterrows():
#     new_row = {}
#     for col in df_final.columns:
#         sum = 0
#         for s in ['x', 'y']:
#             sum += j[col+'_'+s]
#         new_row[col] = sum / 2
#     df_final = df_final._append(new_row, ignore_index = True)

#df_final.corr()

In [6]:
rdtsc_dir = '200k_qps/linux_mcd_rdtsc_0_0xd00_135_200k/'
logs_dir = '200k_qps/linux_mcd_dmesg_0_0xd00_135_200k/'
itrs = list_itrs(rdtsc_dir)
print(itrs)

[350, 2, 100, 40, 200, 10, 300, 400, 50, 20, 250, 30]


In [17]:
df_itr_list = {}
for itr in itrs:
    print('ITR-DELAY:', str(itr))
    df_full = pd.DataFrame()

    rdtsc_file = rdtsc_dir + 'linux.mcd.rdtsc.0_' + str(itr) + '_0xd00_135_200000'
    start, end = eigen_analysis.get_rdtsc(rdtsc_file)

    for c in range(0,16):
        file = logs_dir + 'linux.mcd.dmesg.0_' + str(c) + '_' + str(itr) + '_0xd00_135_200000'
        df = pd.read_csv(file, sep = ' ', names = cols, index_col='i').drop(['c1', 'c1e', 'c3', 'c6', 'c7', \
                                                                             'rx_bytes', 'rx_desc', 'tx_bytes', 'tx_desc'], axis=1)
        df = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)]
        df['timestamp'] = df['timestamp'] - df['timestamp'].min()
        df['timestamp'] = df['timestamp'] * time_unit
        df['joules'] = df['joules'] * joules_unit

        # removing empty/zero log-entries
        # -> these represent interrupt occurrences at a frequency larger than ~1ms
        df_copy = df[(df['joules'] > 0) & (df['instructions'] > 0) & (df['cycles'] > 0) \
                 & (df['ref_cycles'] > 0) & (df['llc_miss'] > 0)].copy()

        # computing diffs
        df_diffs = df_copy.diff().dropna().copy()
        df_diffs.columns = [f'{c}_diff' for c in df_diffs.columns]
        tmp = df_diffs.copy()

        # isolating log entries with negative diffs due to register overflow
        tmp_neg = tmp[(tmp['joules_diff'] < 0) | (tmp['instructions_diff'] < 0) | (tmp['cycles_diff'] < 0) \
                       | (tmp['ref_cycles_diff'] < 0) | (tmp['llc_miss_diff'] < 0) | (tmp['timestamp_diff'] < 0)]
        # re-computing diffs that expose register overflow
        for i,j in tmp_neg.iterrows():
            prev = df_copy.shift(1).loc[i]
            cur = df_copy.loc[i]
            # handle known case of overflow at (2**32 - 1)
            if (tmp.loc[i]['joules_diff'] < 0) & (tmp.loc[i]['timestamp_diff'] >= 0.001):
                tmp.loc[i, ['joules_diff']] = (2**32 - 1) * joules_unit - prev['joules_diff'] + cur['joules_diff'] 
            else:
                print('UNEXPLAINED NEGATIVE DIFFS ... DROPPING ROW ', i)
                print(tmp_neg.loc[i])
                print('prev: ', list(prev))
                print('curr: ', list(cur))
                df_diffs = df_diffs.drop(i, axis=0)

        # dropping timestamp_diff column - subject to change
        df_diffs = df_diffs.drop(['timestamp_diff'], axis=1).reset_index()

        # merge per-core dataframes
        # SCHEME 1: merge on order of occurrence per-core
        if df_full.shape[0] == 0:
            df_full = df_diffs.copy()
        else:
            df_full = df_full.merge(df_diffs, left_index=True, right_index=True, how='outer', \
                                    sort=True, suffixes=('', '_0')).fillna(0)

        # total entries ~= (20 s * 1000 ms/s * 1000 us/ms )/ ITR us
        print('CORE:', str(c), '      full:', df.shape[0], '   drop 0:', df_copy.shape[0], \
              '   apply diff:', tmp.shape[0], '   drop -ve diff:', df_diffs.shape[0])        
    print()
    print('---------------------------------------- PARSED 16 LOGS ----------------')
    print()
    df_itr_list[str(itr)] = df_full
print()
print('---------------------------------------- PARSED 12 ITRS ----------------')
print()

ITR-DELAY: 350
CORE: 0       full: 55812    drop 0: 18604    apply diff: 18603    drop -ve diff: 18603
CORE: 1       full: 55808    drop 0: 18603    apply diff: 18602    drop -ve diff: 18602
CORE: 2       full: 55813    drop 0: 18604    apply diff: 18603    drop -ve diff: 18603
CORE: 3       full: 55803    drop 0: 18602    apply diff: 18601    drop -ve diff: 18601
CORE: 4       full: 55815    drop 0: 18605    apply diff: 18604    drop -ve diff: 18604
CORE: 5       full: 55808    drop 0: 18603    apply diff: 18602    drop -ve diff: 18602
CORE: 6       full: 55816    drop 0: 18606    apply diff: 18605    drop -ve diff: 18605
UNEXPLAINED NEGATIVE DIFFS ... DROPPING ROW  376
instructions_diff   -9.851471e+07
cycles_diff         -2.511755e+08
ref_cycles_diff     -5.622607e+08
llc_miss_diff       -6.299188e+06
joules_diff         -2.993380e+02
timestamp_diff       3.582446e-04
Name: 376, dtype: float64
prev:  [8172636828947.0, 14246686466256.0, 20958450680056.0, 29528013388.0, 17830.739383, 

CORE: 2       full: 525196    drop 0: 19301    apply diff: 19300    drop -ve diff: 19300
CORE: 3       full: 522889    drop 0: 19298    apply diff: 19297    drop -ve diff: 19297
CORE: 4       full: 518572    drop 0: 19269    apply diff: 19268    drop -ve diff: 19268
CORE: 5       full: 527667    drop 0: 19302    apply diff: 19301    drop -ve diff: 19301
UNEXPLAINED NEGATIVE DIFFS ... DROPPING ROW  57927
instructions_diff   -8.099582e+08
cycles_diff         -1.605617e+09
ref_cycles_diff     -3.582787e+09
llc_miss_diff       -5.015000e+06
joules_diff         -5.897612e+02
timestamp_diff       8.917369e-04
Name: 57927, dtype: float64
prev:  [6108789152433.0, 10321925662713.0, 14387819301537.0, 17933894844.0, 112020.161673, 2.11085106167278]
curr:  [6107979194268.0, 10320320045917.0, 14384236514720.0, 17928879844.0, 111430.400497, 2.1117427985319996]
19292
19291
CORE: 6       full: 523387    drop 0: 19293    apply diff: 19292    drop -ve diff: 19291
CORE: 7       full: 529257    drop 0: 19

CORE: 5       full: 78062    drop 0: 19518    apply diff: 19517    drop -ve diff: 19517
CORE: 6       full: 78106    drop 0: 19527    apply diff: 19526    drop -ve diff: 19526
CORE: 7       full: 78038    drop 0: 19514    apply diff: 19513    drop -ve diff: 19513
CORE: 8       full: 78105    drop 0: 19527    apply diff: 19526    drop -ve diff: 19526
CORE: 9       full: 78074    drop 0: 19521    apply diff: 19520    drop -ve diff: 19520
CORE: 10       full: 78106    drop 0: 19528    apply diff: 19527    drop -ve diff: 19527
CORE: 11       full: 78074    drop 0: 19520    apply diff: 19519    drop -ve diff: 19519
CORE: 12       full: 78102    drop 0: 19527    apply diff: 19526    drop -ve diff: 19526
CORE: 13       full: 78069    drop 0: 19521    apply diff: 19520    drop -ve diff: 19520
CORE: 14       full: 78101    drop 0: 19525    apply diff: 19524    drop -ve diff: 19524
CORE: 15       full: 78059    drop 0: 19519    apply diff: 19518    drop -ve diff: 19518

-------------------------

In [19]:
df_itr_list['350']

Unnamed: 0,i,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff,i_0,instructions_diff_0,cycles_diff_0,ref_cycles_diff_0,...,cycles_diff_0.1,ref_cycles_diff_0.1,llc_miss_diff_0,joules_diff_0,i_0.1,instructions_diff_0.1,cycles_diff_0.2,ref_cycles_diff_0.2,llc_miss_diff_0.1,joules_diff_0.1
0,60.0,399537.0,990997.0,2210670.0,2764.0,0.105286,100.0,520473.0,1167784.0,2605070.0,...,1114095.0,2485271.0,3127.0,0.105286,42.0,331939.0,858689.0,1915508.0,2468.0,0.089853
1,63.0,445229.0,987064.0,2201970.0,2381.0,0.124745,103.0,489310.0,1103770.0,2462274.0,...,1020831.0,2277225.0,2295.0,0.243268,45.0,431164.0,1012455.0,2258549.0,2685.0,0.235765
2,66.0,452146.0,1042160.0,2324785.0,2441.0,0.118523,106.0,404601.0,831156.0,1854115.0,...,752779.0,1679825.0,2042.0,0.116022,48.0,428884.0,909838.0,2029623.0,2531.0,0.118767
3,69.0,507916.0,936175.0,2088522.0,2147.0,0.116022,109.0,362481.0,926263.0,2066221.0,...,672659.0,1500663.0,1950.0,0.113521,51.0,357663.0,767242.0,1716713.0,1958.0,0.117913
4,72.0,435888.0,758646.0,1692411.0,1762.0,0.113521,112.0,511304.0,880974.0,1965272.0,...,777841.0,1735244.0,1996.0,0.113704,54.0,376845.0,701222.0,1564347.0,2100.0,0.110715
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18600,55860.0,446134.0,880526.0,1964779.0,1840.0,0.108824,55899.0,532637.0,927019.0,2067961.0,...,723098.0,1613096.0,1425.0,0.108824,55838.0,622547.0,1128650.0,2517780.0,2949.0,0.124745
18601,55863.0,683097.0,1225726.0,2734323.0,2798.0,0.127307,55902.0,623175.0,1292325.0,2882890.0,...,608056.0,1356446.0,1162.0,0.127307,55841.0,774626.0,1396600.0,3115499.0,3511.0,0.123830
18602,55866.0,578069.0,1038438.0,2316520.0,2152.0,0.116937,0.0,0.0,0.0,0.0,...,459849.0,1025846.0,850.0,0.116937,0.0,0.0,0.0,0.0,0.0,0.000000
18603,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000


In [20]:
for itr in df_itr_list.keys():
    df_itr_list[itr] = df_itr_list[itr].drop(['i', 'i_0'], axis=1)

In [21]:
df_itr_list['350']

Unnamed: 0,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff,instructions_diff_0,cycles_diff_0,ref_cycles_diff_0,llc_miss_diff_0,joules_diff_0,...,instructions_diff_0.1,cycles_diff_0.1,ref_cycles_diff_0.1,llc_miss_diff_0.1,joules_diff_0.1,instructions_diff_0.2,cycles_diff_0.2,ref_cycles_diff_0.2,llc_miss_diff_0.2,joules_diff_0.2
0,399537.0,990997.0,2210670.0,2764.0,0.105286,520473.0,1167784.0,2605070.0,3700.0,0.000000,...,529876.0,1114095.0,2485271.0,3127.0,0.105286,331939.0,858689.0,1915508.0,2468.0,0.089853
1,445229.0,987064.0,2201970.0,2381.0,0.124745,489310.0,1103770.0,2462274.0,3118.0,0.235765,...,480342.0,1020831.0,2277225.0,2295.0,0.243268,431164.0,1012455.0,2258549.0,2685.0,0.235765
2,452146.0,1042160.0,2324785.0,2441.0,0.118523,404601.0,831156.0,1854115.0,1917.0,0.118767,...,375781.0,752779.0,1679825.0,2042.0,0.116022,428884.0,909838.0,2029623.0,2531.0,0.118767
3,507916.0,936175.0,2088522.0,2147.0,0.116022,362481.0,926263.0,2066221.0,2080.0,0.117913,...,353457.0,672659.0,1500663.0,1950.0,0.113521,357663.0,767242.0,1716713.0,1958.0,0.117913
4,435888.0,758646.0,1692411.0,1762.0,0.113521,511304.0,880974.0,1965272.0,2652.0,0.110715,...,428203.0,777841.0,1735244.0,1996.0,0.113704,376845.0,701222.0,1564347.0,2100.0,0.110715
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18600,446134.0,880526.0,1964779.0,1840.0,0.108824,532637.0,927019.0,2067961.0,2181.0,0.124745,...,435597.0,723098.0,1613096.0,1425.0,0.108824,622547.0,1128650.0,2517780.0,2949.0,0.124745
18601,683097.0,1225726.0,2734323.0,2798.0,0.127307,623175.0,1292325.0,2882890.0,2753.0,0.123830,...,411970.0,608056.0,1356446.0,1162.0,0.127307,774626.0,1396600.0,3115499.0,3511.0,0.123830
18602,578069.0,1038438.0,2316520.0,2152.0,0.116937,0.0,0.0,0.0,0.0,0.000000,...,308809.0,459849.0,1025846.0,850.0,0.116937,0.0,0.0,0.0,0.0,0.000000
18603,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000


In [23]:
df_itr_merged_list = {}
for itr in df_itr_list.keys():
    # creating a larger and more sparse dataframe of average diffs across log files
    df_final = pd.DataFrame(columns=['instructions_diff', 'cycles_diff', 'ref_cycles_diff', \
                                     'llc_miss_diff', 'joules_diff'])
    df_itr = df_itr_list[itr]
    for i,j in df_itr.iterrows():
        new_row = {}
        for col in df_final.columns:
            row = df_itr[[col, col+'_0']].loc[i]
            avg = np.average(row.to_numpy())
            new_row[col] = avg
        df_final = df_final._append(new_row, ignore_index = True)
    print()
    print('ITR-DELAY: ', itr, 'merged_df.head(): ')
    print(df_final.head())
    df_itr_merged_list[itr] = df_final 

ITR-DELAY:  350 merged_df.head(): 
   instructions_diff   cycles_diff  ref_cycles_diff  llc_miss_diff  \
0        389777.3750  9.962972e+05     2.222783e+06      2590.2500   
1        449189.9375  1.005234e+06     2.242472e+06      2615.3750   
2        444722.6250  8.569940e+05     1.912113e+06      2217.8125   
3        391786.3125  7.666286e+05     1.713039e+06      1982.6875   
4        413895.3125  7.385942e+05     1.647678e+06      1982.5000   

   joules_diff  
0     0.091393  
1     0.180575  
2     0.118176  
3     0.123418  
4     0.112164  
ITR-DELAY:  2 merged_df.head(): 
   instructions_diff   cycles_diff  ref_cycles_diff  llc_miss_diff  \
0         469141.250  1.207002e+06     2.694689e+06      3417.8125   
1         479211.625  1.106415e+06     2.468154e+06      3021.5625   
2         470889.125  1.070030e+06     2.387010e+06      2931.5625   
3         481284.375  1.032341e+06     2.302950e+06      2656.6250   
4         441570.625  9.291194e+05     2.072688e+06      21

In [24]:
df_itr_merged_list

{'350':        instructions_diff   cycles_diff  ref_cycles_diff  llc_miss_diff  \
 0            389777.3750  9.962972e+05     2.222783e+06      2590.2500   
 1            449189.9375  1.005234e+06     2.242472e+06      2615.3750   
 2            444722.6250  8.569940e+05     1.912113e+06      2217.8125   
 3            391786.3125  7.666286e+05     1.713039e+06      1982.6875   
 4            413895.3125  7.385942e+05     1.647678e+06      1982.5000   
 ...                  ...           ...              ...            ...   
 18600        446770.5000  8.562139e+05     1.911983e+06      2020.5625   
 18601        447022.5625  8.451451e+05     1.886767e+06      2037.5000   
 18602        285128.0625  5.092758e+05     1.136090e+06      1243.0000   
 18603        117732.4375  2.261037e+05     5.043916e+05       524.7500   
 18604         26356.7500  4.796988e+04     1.072764e+05       100.5000   
 
        joules_diff  
 0         0.091393  
 1         0.180575  
 2         0.118176  
 3 

In [25]:
for itr in df_itr_merged_list.keys():
    outfile = 'df_itr_' + itr + '_merged_cores'
    df_itr_merged_list[itr].to_csv(outfile)

In [26]:
for itr in df_itr_merged_list.keys():
    print(df_itr_merged_list[itr].corr())

                   instructions_diff  cycles_diff  ref_cycles_diff  \
instructions_diff           1.000000     0.984449         0.983843   
cycles_diff                 0.984449     1.000000         0.999961   
ref_cycles_diff             0.983843     0.999961         1.000000   
llc_miss_diff               0.926167     0.966525         0.967003   
joules_diff                 0.919070     0.960285         0.960799   

                   llc_miss_diff  joules_diff  
instructions_diff       0.926167     0.919070  
cycles_diff             0.966525     0.960285  
ref_cycles_diff         0.967003     0.960799  
llc_miss_diff           1.000000     0.997721  
joules_diff             0.997721     1.000000  
                   instructions_diff  cycles_diff  ref_cycles_diff  \
instructions_diff           1.000000     0.888553         0.888531   
cycles_diff                 0.888553     1.000000         1.000000   
ref_cycles_diff             0.888531     1.000000         1.000000   
llc_miss_di