## Checking if merged logs correlation matrix is similar to per-log correlation matrices:
#### Merge scheme 1: merge on index i

In [1]:
import os
import time
import pandas as pd
import numpy as np

In [2]:
import eigen_analysis

cols = eigen_analysis.LINUX_COLS
time_unit = eigen_analysis.TIME_CONVERSION_khz
joules_unit = eigen_analysis.JOULE_CONVERSION

In [5]:
def handle_neg_diffs(df_diffs, df, core):
    
    tmp = df_diffs.copy()

    # isolating rows with negative diffs
    tmp_neg = tmp[(tmp['joules_diff'] < 0) | (tmp['instructions_diff'] < 0) | (tmp['cycles_diff'] < 0) \
                   | (tmp['ref_cycles_diff'] < 0) | (tmp['llc_miss_diff'] < 0) | (tmp['timestamp_diff'] < 0)]

    # re-computing diffs if possible; else dropping rows
    for i,j in tmp_neg.iterrows():
        prev = df.shift(1).loc[i]
        cur = df.loc[i]
        # TODO note that we are only handling case of RAPL-energy-status register overflow
        if (tmp.loc[i]['joules_diff'] < 0) & (tmp.loc[i]['timestamp_diff'] >= 0.001):
            print('CORE ' + str(core) + '  ---  JOULES COUNTER OVERFLOW AT LOG ENTRY DIFF #' + str(i) + '\n')
            tmp.loc[i, ['joules_diff']] = (2**32 - 1) * joules_unit - prev['joules'] + cur['joules'] 
        else:
            print('CORE ' + str(core) + '  ---  UNEXPLAINED NEGATIVE VALS AT LOG ENTRY DIFF # ' + str(i) + '\n')
            cols = ''
            prevs = ''
            currs = ''
            for col in list(df.columns):
                cols += col + '  '
                prevs += str(prev[col]) + '  '
                currs += str(cur[col]) + '  '
            print('          ' + cols + '\n')
            print('         log[' + str(i-1) + ']: ' + prevs + '\n')
            print('         log[' + str(i) + ']: ' + currs + '\n')
            tmp = tmp.drop(i, axis=0)

    return tmp

In [6]:
def prep_counters_df(df, start, end):
    df = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)]
    df.loc[:, 'timestamp'] = df['timestamp'] - df['timestamp'].min()
    df.loc[:, 'timestamp'] = df['timestamp'] * time_unit
    df.loc[:, 'joules'] = df['joules'] * joules_unit

    # removing zero-filled log-entries
    # -> these represent interrupt occurrences at a frequency greater than per-1ms
    counters_df = df[['joules', 'instructions', 'cycles', 'ref_cycles', 'llc_miss', 'timestamp']].copy()
    counters_df = counters_df[(counters_df['joules'] > 0) & (counters_df['instructions'] > 0) \
                                        & (counters_df['cycles'] > 0) & (counters_df['ref_cycles'] > 0) \
                                        & (counters_df['llc_miss'] > 0)]

    # computing diffs of counter readings
    tmp = counters_df['timestamp']
    df_diffs = counters_df.diff().dropna().copy()
    df_diffs.columns = [f'{c}_diff' for c in df_diffs.columns]
    df_diffs = handle_neg_diffs(df_diffs, counters_df, c)
    df_diffs.drop(['timestamp_diff'], axis=1)
    df_diffs['timestamp'] = tmp
    return counters_df, df_diffs

In [23]:
qps = '400k'
run = '0'
dvfs = '0xd00'
rapl = '135'
itr = '100'

merged_logs_dir = qps + '_' + dvfs + '_counters_merged/'
merged_logs_file = merged_logs_dir + dvfs + '_' + qps + '_' + itr + '_counters_merged'
df_merged = pd.read_csv(merged_logs_file, sep = ',', index_col=0)

logs_dir = qps + '_qps/linux_mcd_dmesg_' + run + '_' + dvfs + '_' + rapl + '_' + qps + '/'
rdtsc_dir = qps + '_qps/linux_mcd_rdtsc_' + run + '_' + dvfs + '_' + rapl + '_' + qps +'/'
rdtsc_file = rdtsc_dir + 'linux.mcd.rdtsc.' + run + '_' + itr + '_' + dvfs + '_' + rapl + '_' + qps[:-1] + '000'
start, end = eigen_analysis.get_rdtsc(rdtsc_file)

for c in range(0, 1):
    core = str(c)
    c_log_file = logs_dir + 'linux.mcd.dmesg.0_' + core + '_100_0xd00_135_' + qps[:-1] + '000'
    df = pd.read_csv(c_log_file, sep = ' ', names=cols, index_col='i')
    df_counters, df_diffs = prep_counters_df(df, start, end)

In [24]:
df_counters

Unnamed: 0_level_0,joules,instructions,cycles,ref_cycles,llc_miss,timestamp
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
76,58388.623794,7577654649792,13246500137108,18960923160037,25794571774,0.000279
86,58388.758665,7577655437171,13246501468105,18960926129202,25794576124,0.001303
96,58388.892804,7577656153859,13246502769280,18960929031841,25794579615,0.002344
106,58389.025113,7577656928284,13246504097361,18960931994481,25794582967,0.003368
116,58389.157361,7577657631061,13246505385610,18960934868265,25794586103,0.004392
...,...,...,...,...,...,...
194781,60839.603916,7587826079456,13265920254375,19004246046829,25831035073,19.999348
194790,60839.716095,7587826316709,13265920829577,19004247330079,25831036367,20.000586
194800,60839.831812,7587826984545,13265922171728,19004250335668,25831040127,20.001682
194810,60839.963816,7587827597238,13265923344407,19004252951671,25831043171,20.002765


In [25]:
df_diffs

Unnamed: 0_level_0,joules_diff,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,timestamp_diff,timestamp
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
86,0.134871,787379.0,1330997.0,2969165.0,4350.0,0.001024,0.001303
96,0.134139,716688.0,1301175.0,2902639.0,3491.0,0.001041,0.002344
106,0.132309,774425.0,1328081.0,2962640.0,3352.0,0.001024,0.003368
116,0.132248,702777.0,1288249.0,2873784.0,3136.0,0.001024,0.004392
126,0.128771,807976.0,1331119.0,2969426.0,3948.0,0.001024,0.005416
...,...,...,...,...,...,...,...
194781,0.121390,526798.0,994001.0,2217485.0,1880.0,0.001024,19.999348
194790,0.112179,237253.0,575202.0,1283250.0,1294.0,0.001238,20.000586
194800,0.115717,667836.0,1342151.0,3005589.0,3760.0,0.001096,20.001682
194810,0.132004,612693.0,1172679.0,2616003.0,3044.0,0.001084,20.002765


In [26]:
df_merged

Unnamed: 0,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff
0,659180.5625,1.292605e+06,2.890472e+06,3949.6250,0.100783
1,725621.8750,1.278691e+06,2.852474e+06,3832.7500,0.133967
2,646577.3750,1.204876e+06,2.687822e+06,3305.6875,0.132854
3,658467.1875,1.142643e+06,2.548986e+06,2915.5625,0.131306
4,662251.9375,1.127502e+06,2.515230e+06,2899.4375,0.127425
...,...,...,...,...,...
19497,61039.3125,1.316624e+05,2.937174e+05,344.6250,0.023302
19498,37979.8750,9.005506e+04,2.023366e+05,232.8125,0.014022
19499,74369.7500,1.550226e+05,3.458232e+05,510.0625,0.014465
19500,83953.0000,1.604496e+05,3.579289e+05,486.3125,0.024629


In [28]:
df_diffs.drop(['timestamp', 'timestamp_diff'], axis = 1).corr()

Unnamed: 0,joules_diff,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff
joules_diff,1.0,0.037138,0.037152,0.037148,0.037775
instructions_diff,0.037138,1.0,0.837637,0.837618,0.562625
cycles_diff,0.037152,0.837637,1.0,1.0,0.723921
ref_cycles_diff,0.037148,0.837618,1.0,1.0,0.723918
llc_miss_diff,0.037775,0.562625,0.723921,0.723918,1.0


In [29]:
df_merged.corr()

Unnamed: 0,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff
instructions_diff,1.0,0.864393,0.864376,0.408567,0.20656
cycles_diff,0.864393,1.0,1.0,0.698259,0.244254
ref_cycles_diff,0.864376,1.0,1.0,0.698281,0.244228
llc_miss_diff,0.408567,0.698259,0.698281,1.0,0.150526
joules_diff,0.20656,0.244254,0.244228,0.150526,1.0
