## Looking at correlations between the same metrics across different core logs:

In [2]:
import os
import time
import pandas as pd
import numpy as np

In [3]:
import eigen_analysis

cols = eigen_analysis.LINUX_COLS
time_unit = eigen_analysis.TIME_CONVERSION_khz
joules_unit = eigen_analysis.JOULE_CONVERSION

In [102]:
def handle_neg_diffs(df_diffs, df, core):
    
    tmp = df_diffs.copy()

    # isolating rows with negative diffs
    tmp_neg = tmp[(tmp['joules_diff'] < 0) | (tmp['instructions_diff'] < 0) | (tmp['cycles_diff'] < 0) \
                   | (tmp['ref_cycles_diff'] < 0) | (tmp['llc_miss_diff'] < 0) | (tmp['timestamp_diff'] < 0)]

    # re-computing diffs if possible; else dropping rows
    for i,j in tmp_neg.iterrows():
        prev = df.shift(1).loc[i]
        cur = df.loc[i]
        # TODO note that we are only handling case of RAPL-energy-status register overflow
        if (tmp.loc[i]['joules_diff'] < 0) & (tmp.loc[i]['timestamp_diff'] >= 0.001):
            print('CORE ' + str(core) + '  ---  JOULES COUNTER OVERFLOW AT LOG ENTRY DIFF #' + str(i) + '\n')
            tmp.loc[i, ['joules_diff']] = (2**32 - 1) * joules_unit - prev['joules'] + cur['joules'] 
        else:
            print('CORE ' + str(core) + '  ---  UNEXPLAINED NEGATIVE VALS AT LOG ENTRY DIFF # ' + str(i) + '\n')
            cols = ''
            prevs = ''
            currs = ''
            for col in list(df.columns):
                cols += col + '  '
                prevs += str(prev[col]) + '  '
                currs += str(cur[col]) + '  '
            print('          ' + cols + '\n')
            print('         log[' + str(i-1) + ']: ' + prevs + '\n')
            print('         log[' + str(i) + ']: ' + currs + '\n')
            tmp = tmp.drop(i, axis=0)

    return tmp

In [134]:
def prep_counters_df(df, start, end):
    df = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)]
    df.loc[:, 'timestamp'] = df['timestamp'] - df['timestamp'].min()
    df.loc[:, 'timestamp'] = df['timestamp'] * time_unit
    df.loc[:, 'joules'] = df['joules'] * joules_unit

    # removing zero-filled log-entries
    # -> these represent interrupt occurrences at a frequency greater than per-1ms
    counters_df = df[['joules', 'instructions', 'cycles', 'ref_cycles', 'llc_miss', 'timestamp']].copy()
    counters_df = counters_df[(counters_df['joules'] > 0) & (counters_df['instructions'] > 0) \
                                        & (counters_df['cycles'] > 0) & (counters_df['ref_cycles'] > 0) \
                                        & (counters_df['llc_miss'] > 0)]

    # computing diffs of counter readings
    tmp = counters_df['timestamp']
    df_diffs = counters_df.diff().dropna().copy()
    df_diffs.columns = [f'{c}_diff' for c in df_diffs.columns]
    df_diffs = handle_neg_diffs(df_diffs, counters_df, c)
    df_diffs.drop(['timestamp_diff'], axis=1)
    df_diffs['timestamp'] = tmp
    return counters_df, df_diffs

In [135]:
qps = '200k'
run = '0'
dvfs = '0xd00'
rapl = '135'
itr = '100'

logs_dir = qps + '_qps/linux_mcd_dmesg_' + run + '_' + dvfs + '_' + rapl + '_' + qps + '/'
rdtsc_dir = qps + '_qps/linux_mcd_rdtsc_' + run + '_' + dvfs + '_' + rapl + '_' + qps +'/'
rdtsc_file = rdtsc_dir + 'linux.mcd.rdtsc.' + run + '_' + itr + '_' + dvfs + '_' + rapl + '_' + qps[:-1] + '000'
start, end = eigen_analysis.get_rdtsc(rdtsc_file)

df_inst = pd.DataFrame()
df_inst_diff = pd.DataFrame()
for c in range(0, 16):
    core = str(c)
    c_log_file = logs_dir + 'linux.mcd.dmesg.0_' + core + '_100_0xd00_135_200000'
    df = pd.read_csv(c_log_file, sep = ' ', names=cols, index_col='i')
    df_counters, df_diffs = prep_counters_df(df, start, end)
    
    df_inst_c = df_counters[['instructions', 'timestamp']].copy()
    df_inst_diff_c = df_diffs[['instructions_diff', 'timestamp']]
    
    if df_inst.shape[0] == 0:
        df_inst = df_inst_c
        df_inst_diff = df_inst_diff_c
    else:
        df_inst = df_inst.merge(df_inst_c, on='timestamp', \
                         how='outer', sort=True, suffixes=('', '_0')).fillna(0)
        df_inst_diff = df_inst_diff.merge(df_inst_diff_c, on='timestamp', \
                         how='outer', sort=True, suffixes=('', '_0')).fillna(0)

In [136]:
df_inst

Unnamed: 0,instructions,timestamp,instructions_0,instructions_0.1,instructions_0.2,instructions_0.3,instructions_0.4,instructions_0.5,instructions_0.6,instructions_0.7,instructions_0.8,instructions_0.9,instructions_0.10,instructions_0.11,instructions_0.12,instructions_0.13,instructions_0.14
0,0.000000e+00,0.000000,7.686818e+12,0.000000e+00,7.676870e+12,7.522764e+12,0.000000e+00,7.438144e+12,0.000000e+00,7.500489e+12,7.682446e+12,7.523976e+12,0.000000e+00,7.513715e+12,7.687445e+12,7.550595e+12,7.703373e+12
1,0.000000e+00,0.000267,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.663011e+12,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
2,0.000000e+00,0.000300,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.688966e+12,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
3,0.000000e+00,0.000304,0.000000e+00,7.512387e+12,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
4,7.548044e+12,0.000319,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307824,0.000000e+00,20.003593,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.688587e+12,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
307825,0.000000e+00,20.003672,7.692975e+12,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
307826,0.000000e+00,20.003673,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.530044e+12,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
307827,0.000000e+00,20.003709,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.685489e+12,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00


In [137]:
df_inst_diff

Unnamed: 0,instructions_diff,timestamp,instructions_diff_0,instructions_diff_0.1,instructions_diff_0.2,instructions_diff_0.3,instructions_diff_0.4,instructions_diff_0.5,instructions_diff_0.6,instructions_diff_0.7,instructions_diff_0.8,instructions_diff_0.9,instructions_diff_0.10,instructions_diff_0.11,instructions_diff_0.12,instructions_diff_0.13,instructions_diff_0.14
0,0.0,0.001005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,474967.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.001013,0.0,0.0,0.0,482490.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.001014,0.0,0.0,0.0,0.0,0.0,0.0,0.0,469175.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.001021,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,462789.0,0.0
4,0.0,0.001022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,492354.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307818,0.0,20.003593,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,541560.0,0.0,0.0,0.0,0.0,0.0,0.0
307819,0.0,20.003672,574673.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
307820,0.0,20.003673,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,670913.0,0.0,0.0,0.0,0.0,0.0
307821,0.0,20.003709,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,671581.0,0.0,0.0,0.0,0.0


In [138]:
df_inst.drop(['timestamp'], axis = 1).corr()

Unnamed: 0,instructions,instructions_0,instructions_0.1,instructions_0.2,instructions_0.3,instructions_0.4,instructions_0.5,instructions_0.6,instructions_0.7,instructions_0.8,instructions_0.9,instructions_0.10,instructions_0.11,instructions_0.12,instructions_0.13,instructions_0.14
instructions,1.0,-0.066678,-0.066714,-0.06668,-0.06669,-0.066636,-0.066717,-0.06666,-0.066717,-0.066673,-0.066686,-0.066669,-0.066678,-0.066682,-0.06669,-0.066673
instructions_0,-0.066678,1.0,-0.066686,-0.066597,-0.066606,-0.066664,-0.066634,-0.066632,-0.066634,-0.06659,-0.066603,-0.066641,-0.066595,-0.066599,-0.066606,-0.06659
instructions_0,-0.066714,-0.066686,1.0,-0.066688,-0.066697,-0.066699,-0.066725,-0.066667,-0.066725,-0.06668,-0.066693,-0.066677,-0.066686,-0.06669,-0.066697,-0.06668
instructions_0,-0.06668,-0.066597,-0.066688,1.0,-0.066608,-0.066666,-0.066636,-0.066634,-0.066636,-0.066592,-0.066605,-0.066643,-0.066597,-0.066601,-0.066608,-0.066592
instructions_0,-0.06669,-0.066606,-0.066697,-0.066608,1.0,-0.066675,-0.066645,-0.066643,-0.066645,-0.066601,-0.066614,-0.066653,-0.066606,-0.06661,-0.066618,-0.066601
instructions_0,-0.066636,-0.066664,-0.066699,-0.066666,-0.066675,1.0,-0.066702,-0.066645,-0.066702,-0.066658,-0.066671,-0.066654,-0.066664,-0.066667,-0.066675,-0.066658
instructions_0,-0.066717,-0.066634,-0.066725,-0.066636,-0.066645,-0.066702,1.0,-0.066671,-0.066673,-0.066629,-0.066642,-0.06668,-0.066634,-0.066638,-0.066645,-0.066629
instructions_0,-0.06666,-0.066632,-0.066667,-0.066634,-0.066643,-0.066645,-0.066671,1.0,-0.066671,-0.066627,-0.06664,-0.066623,-0.066632,-0.066636,-0.066643,-0.066627
instructions_0,-0.066717,-0.066634,-0.066725,-0.066636,-0.066645,-0.066702,-0.066673,-0.066671,1.0,-0.066629,-0.066642,-0.06668,-0.066634,-0.066638,-0.066645,-0.066629
instructions_0,-0.066673,-0.06659,-0.06668,-0.066592,-0.066601,-0.066658,-0.066629,-0.066627,-0.066629,1.0,-0.066597,-0.066636,-0.06659,-0.066593,-0.066601,-0.066584


In [139]:
df_inst_diff.drop(['timestamp'], axis = 1).corr()

Unnamed: 0,instructions_diff,instructions_diff_0,instructions_diff_0.1,instructions_diff_0.2,instructions_diff_0.3,instructions_diff_0.4,instructions_diff_0.5,instructions_diff_0.6,instructions_diff_0.7,instructions_diff_0.8,instructions_diff_0.9,instructions_diff_0.10,instructions_diff_0.11,instructions_diff_0.12,instructions_diff_0.13,instructions_diff_0.14
instructions_diff,1.0,-0.062636,-0.062922,-0.06269,-0.062967,-0.062638,-0.062969,-0.062597,-0.062943,-0.062624,-0.062938,-0.062627,-0.062903,-0.062624,-0.06287,-0.062613
instructions_diff_0,-0.062636,1.0,-0.062626,-0.062394,-0.06267,-0.062374,-0.062672,-0.062301,-0.062646,-0.062329,-0.062641,-0.062332,-0.062607,-0.062329,-0.062574,-0.062318
instructions_diff_0,-0.062922,-0.062626,1.0,-0.06268,-0.062956,-0.062659,-0.062959,-0.062586,-0.062933,-0.062614,-0.062927,-0.062617,-0.062893,-0.062614,-0.06286,-0.062602
instructions_diff_0,-0.06269,-0.062394,-0.06268,1.0,-0.062724,-0.062427,-0.062726,-0.062355,-0.062701,-0.062383,-0.062695,-0.062386,-0.062661,-0.062382,-0.062628,-0.062371
instructions_diff_0,-0.062967,-0.06267,-0.062956,-0.062724,1.0,-0.062703,-0.063003,-0.06263,-0.062977,-0.062658,-0.062972,-0.062661,-0.062937,-0.062658,-0.062904,-0.062646
instructions_diff_0,-0.062638,-0.062374,-0.062659,-0.062427,-0.062703,1.0,-0.062706,-0.062334,-0.06268,-0.062362,-0.062674,-0.062365,-0.06264,-0.062362,-0.062607,-0.062351
instructions_diff_0,-0.062969,-0.062672,-0.062959,-0.062726,-0.063003,-0.062706,1.0,-0.062633,-0.06298,-0.062661,-0.062974,-0.062664,-0.06294,-0.06266,-0.062907,-0.062649
instructions_diff_0,-0.062597,-0.062301,-0.062586,-0.062355,-0.06263,-0.062334,-0.062633,1.0,-0.062607,-0.06229,-0.062602,-0.062293,-0.062567,-0.062289,-0.062534,-0.062278
instructions_diff_0,-0.062943,-0.062646,-0.062933,-0.062701,-0.062977,-0.06268,-0.06298,-0.062607,1.0,-0.062635,-0.062948,-0.062638,-0.062914,-0.062635,-0.062881,-0.062623
instructions_diff_0,-0.062624,-0.062329,-0.062614,-0.062383,-0.062658,-0.062362,-0.062661,-0.06229,-0.062635,1.0,-0.062629,-0.06232,-0.062595,-0.062317,-0.062562,-0.062306


In [140]:
from numpy.linalg import eig, eigvalsh

vals, vecs = eig(df_inst.drop(['timestamp'], axis = 1).corr())
vals_diff, vecs_diff = eig(df_inst_diff.drop(['timestamp'], axis = 1).corr())

In [141]:
vals

array([3.53123762e-04, 1.06686820e+00, 1.06673021e+00, 1.06655533e+00,
       1.06668016e+00, 1.06665690e+00, 1.06667299e+00, 1.06663574e+00,
       1.06662894e+00, 1.06658625e+00, 1.06660710e+00, 1.06660109e+00,
       1.06659742e+00, 1.06659531e+00, 1.06661374e+00, 1.06661751e+00])

In [142]:
vals_diff

array([0.06051019, 1.06300344, 1.06298564, 1.06294979, 1.06293887,
       1.06291975, 1.06288584, 1.06282642, 1.0626367 , 1.06222568,
       1.06243688, 1.06240102, 1.06227215, 1.06236016, 1.06233041,
       1.06231708])

### Test on 2 cores

In [90]:
c0_log_file = logs_dir + 'linux.mcd.dmesg.0_0_100_0xd00_135_200000'
c1_log_file = logs_dir + 'linux.mcd.dmesg.0_1_100_0xd00_135_200000'

df0 = pd.read_csv(c0_log_file, sep = ' ', names=cols, index_col='i')
df1 = pd.read_csv(c1_log_file, sep = ' ', names=cols, index_col='i')

df0_counters = prep_counters_df(df0, start, end)
df1_counters = prep_counters_df(df1, start, end)

df0_inst = df0_counters[['instructions', 'timestamp']].copy()
df1_inst = df1_counters[['instructions', 'timestamp']].copy()

In [91]:
df1_inst

Unnamed: 0_level_0,instructions,timestamp
i,Unnamed: 1_level_1,Unnamed: 2_level_1
133,7686818085571,0.000000
143,7686818632656,0.001035
153,7686819093449,0.002047
163,7686819510702,0.003086
173,7686819808421,0.004156
...,...,...
188485,7692972769333,19.999252
188493,7692972967446,20.000586
188503,7692973576699,20.001609
188513,7692974205848,20.002634


In [92]:
df_inst = pd.DataFrame()
df_inst = df0_inst.merge(df1_inst, on='timestamp', \
                         how='outer', sort=True, suffixes=('', '_0')).fillna(0)

In [93]:
df_inst

Unnamed: 0,instructions,timestamp,instructions_0
0,0.000000e+00,0.000000,7.686818e+12
1,7.548044e+12,0.000319,0.000000e+00
2,0.000000e+00,0.001035,7.686819e+12
3,7.548044e+12,0.001417,0.000000e+00
4,0.000000e+00,0.002047,7.686819e+12
...,...,...,...
38480,0.000000e+00,20.001609,7.692974e+12
38481,7.554094e+12,20.002006,0.000000e+00
38482,0.000000e+00,20.002634,7.692974e+12
38483,7.554095e+12,20.003100,0.000000e+00


In [94]:
df_inst.drop(['timestamp'], axis = 1).corr()

Unnamed: 0,instructions,instructions_0
instructions,1.0,-1.0
instructions_0,-1.0,1.0


In [95]:
from numpy.linalg import eig, eigvalsh

vals, vecs = eig(df_inst.drop(['timestamp'], axis = 1).corr())

In [96]:
vals

array([1.99999989e+00, 1.07592474e-07])