In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [1]:
from eigen_analysis import *
import os

### Raw csv file

#### Read data

In [31]:
fname = 'linux_mcd_dmesg_0_0xd00_135_200k/linux.mcd.dmesg.0_6_10_0xd00_135_200000'
qps = str(int(fname.split('_')[-1])//1000) + 'k'
dvfs = fname.split('_')[-3]
print(qps, dvfs)

df = pd.read_csv(f'{qps}_qps/' + fname, sep=" ", skiprows=1, index_col=0, names=LINUX_COLS)

200k 0xd00


#### RDTSC file

In [32]:
tag = fname.split('.')[-1].split('_')
desc = '_'.join(np.delete(tag, [1]))
loc_rdtsc = 'linux_mcd_rdtsc_0_' + dvfs + '_135_' + qps
rdtsc_fname = f'{loc_rdtsc}/linux.mcd.rdtsc.{desc}'
START_RDTSC, END_RDTSC = get_rdtsc(f'{qps}_qps/' + rdtsc_fname)

#### Impact of time filtering

In [33]:
print(START_RDTSC, END_RDTSC)
print(f'Shape before time filtering: {df.shape}')
df = df[(df['timestamp'] >= START_RDTSC) & (df['timestamp'] <= END_RDTSC)].copy()
print(f'Shape after time filtering: {df.shape}')

71192578637589 71250593056120
Shape before time filtering: (523548, 15)
Shape after time filtering: (523387, 15)


#### Unit conversions for time and energy

In [34]:
df['timestamp'] = df['timestamp'] - df['timestamp'].min()
df['timestamp'] = df['timestamp'] * TIME_CONVERSION_khz
df['joules'] = df['joules'] * JOULE_CONVERSION

#### Drop all sleep state columns

In [35]:
df = df.drop(['c6', 'c1', 'c1e', 'c3', 'c7'], axis=1).copy()

#### Drop null columns

In [37]:
### df.dropna(inplace=True)
df.isnull().sum(axis=0)

rx_desc         0
rx_bytes        0
tx_desc         0
tx_bytes        0
instructions    0
cycles          0
ref_cycles      0
llc_miss        0
joules          0
timestamp       0
dtype: int64

#### No negative raw counters since they are all unsigned ints

In [38]:
# NOTE this should never be the case
df_neg = df[(df['joules'] < 0) | (df['instructions'] < 0) | (df['cycles'] < 0) | (df['ref_cycles'] < 0) | (df['llc_miss'] < 0)].copy()
if df_neg.shape[0] > 0:
    print("UNEXPECTED NEGATIVE VAL IN ", fname)

#### Separate metrics that are collected per-interrupt (as opposed to per-ms) and also are not counter based i.e. shouldn't be diffed

In [39]:
# non-continuous counter metrics: rx-bytes/desc, tx-bytes/desc
df_no_diffs = df[['rx_bytes' , 'rx_desc', 'tx_bytes', 'tx_desc']].copy()

In [41]:
df_no_diffs.head()

Unnamed: 0_level_0,rx_bytes,rx_desc,tx_bytes,tx_desc
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
158,0,0,0,0
159,668,1,0,0
160,246,2,66,2
161,116,1,132,4
162,131,1,66,2


#### Separate counter-based per-ms metrics that need to be diffed

In [43]:
df_diffs = df[['instructions', 'cycles', 'ref_cycles', 'llc_miss', 'joules', 'timestamp']].copy()

df_diffs.columns = [f'{c}_diff' for c in df_diffs.columns]

print(f'Shape before filtering: {df_diffs.shape}')
df_diffs = df_diffs[(df_diffs['joules_diff']>0) & \
                    (df_diffs['instructions_diff'] > 0) &\
                    (df_diffs['cycles_diff'] > 0) &\
                    (df_diffs['ref_cycles_diff'] > 0) &\
                    (df_diffs['llc_miss_diff'] > 0)].copy()
print(f'Shape after filtering: {df_diffs.shape}')

Shape before filtering: (523387, 6)
Shape after filtering: (19293, 6)


##### Why are there non-positive entries?

In [47]:
df_diffs = df[['instructions', 'cycles', 'ref_cycles', 'llc_miss', 'joules', 'timestamp']].copy()

df_diffs.columns = [f'{c}_diff' for c in df_diffs.columns]

In [68]:
#no negative entries and no null entries
for c in df_diffs.columns: print(c, \
                                 #negative entries
                                 df_diffs[(df_diffs[c] < 0)][c].shape[0], \
                                 
                                 #null entries
                                 df_diffs[c].isnull().sum(), \
                                 
                                 #zero entries
                                 (df_diffs[c]==0).sum())

instructions_diff 0 0 504094
cycles_diff 0 0 504094
ref_cycles_diff 0 0 504094
llc_miss_diff 0 0 504094
joules_diff 0 0 504094
timestamp_diff 0 0 1


In [70]:
'''
The non-zero entries should coincide across the columns i.e. the same set of rows should have non-zero entries for
all columns. 

The subtraction below indicates this is true - one can verify it more explicitly
'''

523387 - 504094

19293

In [82]:
neg_idx = {}
for c in df_diffs:
    neg_idx[c] = set(df_diffs[df_diffs[c] > 0].index)

In [87]:
keys = list(neg_idx.keys())
for k in range(1, len(keys)):
    prev_k = keys[k-1]
    curr_k = keys[k]
    print(prev_k, curr_k)
    
    print(len(neg_idx[prev_k].symmetric_difference(neg_idx[curr_k])))

instructions_diff cycles_diff
0
cycles_diff ref_cycles_diff
0
ref_cycles_diff llc_miss_diff
0
llc_miss_diff joules_diff
0
joules_diff timestamp_diff
504093


In [88]:
df_diffs = df[['instructions', 'cycles', 'ref_cycles', 'llc_miss', 'joules', 'timestamp']].copy()

df_diffs.columns = [f'{c}_diff' for c in df_diffs.columns]

print(f'Shape before filtering: {df_diffs.shape}')
df_diffs = df_diffs[(df_diffs['joules_diff']>0) & \
                    (df_diffs['instructions_diff'] > 0) &\
                    (df_diffs['cycles_diff'] > 0) &\
                    (df_diffs['ref_cycles_diff'] > 0) &\
                    (df_diffs['llc_miss_diff'] > 0)].copy()
print(f'Shape after filtering: {df_diffs.shape}')

Shape before filtering: (523387, 6)
Shape after filtering: (19293, 6)


#### Diffs of per-ms column

In [89]:
tmp = df_diffs.diff().copy()

In [90]:
tmp.head()

Unnamed: 0_level_0,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff,timestamp_diff
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
168,,,,,,
207,453435.0,1200901.0,2678875.0,3712.0,0.023745,0.001
243,408433.0,893152.0,1992474.0,2357.0,0.032351,0.001019
278,410936.0,837830.0,1869021.0,2061.0,0.030932,0.00101
310,391844.0,831453.0,1854811.0,2003.0,0.031252,0.00106


In [91]:
df_diffs_neg = tmp[(tmp['joules_diff'] < 0) \
                       | (tmp['instructions_diff'] < 0) \
                       | (tmp['cycles_diff'] < 0) \
                       | (tmp['ref_cycles_diff'] < 0) \
                       | (tmp['llc_miss_diff'] < 0)]

In [92]:
print(f'{tmp.shape[0]} {df_diffs_neg.shape[0]}')

19293 1


##### Examine negative rows

In [94]:
df_diffs_neg

Unnamed: 0_level_0,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff,timestamp_diff
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
57927,-809958165.0,-1605617000.0,-3582787000.0,-5015000.0,-147.536976,0.000892


In [96]:
df_diffs.loc[df_diffs_neg.index[0]]

instructions_diff    6.107979e+12
cycles_diff          1.032032e+13
ref_cycles_diff      1.438424e+13
llc_miss_diff        1.792888e+10
joules_diff          2.787587e+04
timestamp_diff       2.111743e+00
Name: 57927, dtype: float64

##### Reset index to make search easier

In [104]:
#focus on df_diffs (not actual diffs) and tmp (actual diffs) and df_diffs_neg (negative diffs)
a = df_diffs.reset_index().copy()
b = a.diff().copy()
c = b[(b['joules_diff'] < 0) \
                       | (b['instructions_diff'] < 0) \
                       | (b['cycles_diff'] < 0) \
                       | (b['ref_cycles_diff'] < 0) \
                       | (b['llc_miss_diff'] < 0)]

In [105]:
c

Unnamed: 0,i,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff,timestamp_diff
2035,14.0,-809958165.0,-1605617000.0,-3582787000.0,-5015000.0,-147.536976,0.000892


In [109]:
a.loc[2035-3:2035+3]

Unnamed: 0,i,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff,timestamp_diff
2032,57866,6108788271841,10321924170240,14387815972221,17933893107,28023.34659,2.108805
2033,57897,6108788666437,10321924990990,14387817803194,17933894050,28023.375629,2.109848
2034,57913,6108789152433,10321925662713,14387819301537,17933894844,28023.404379,2.110851
2035,57927,6107979194268,10320320045917,14384236514720,17928879844,27875.867403,2.111743
2036,57930,6108789870350,10321926655615,14387821516412,17933896222,28023.432885,2.111916
2037,57945,6108790425828,10321927434301,14387823253483,17933897440,28023.461772,2.113066
2038,57961,6108791065255,10321928277899,14387825135351,17933898714,28023.518768,2.114482


In [110]:
b.loc[2035-3:2035+3]

Unnamed: 0,i,instructions_diff,cycles_diff,ref_cycles_diff,llc_miss_diff,joules_diff,timestamp_diff
2032,41.0,465913.0,1012776.0,2259390.0,1149.0,0.028979,0.001006
2033,31.0,394596.0,820750.0,1830973.0,943.0,0.02904,0.001043
2034,16.0,485996.0,671723.0,1498343.0,794.0,0.02875,0.001003
2035,14.0,-809958165.0,-1605617000.0,-3582787000.0,-5015000.0,-147.536976,0.000892
2036,3.0,810676082.0,1606610000.0,3585002000.0,5016378.0,147.565482,0.000173
2037,15.0,555478.0,778686.0,1737071.0,1218.0,0.028887,0.00115
2038,16.0,639427.0,843598.0,1881868.0,1274.0,0.056996,0.001416
