In [None]:
import os
import fnmatch
import pandas as pd
import numpy as np
from datetime import datetime
import ipywidgets as widgets
from ipywidgets import interact, interact_manual,Checkbox
from scipy.stats import pearsonr,spearmanr
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
%matplotlib notebook
import plotly.express as px
from sklearn import preprocessing
from IPython.core.display import display, HTML
from IPython.core.debugger import set_trace
display(HTML('<style>.container { width:90% !important; }</style>')) 

!pwd

In [None]:
def find(pattern, path):
    result = []
    for root, dirs, files in os.walk(path):
        for name in files:
            if fnmatch.fnmatch(name, pattern):
                result.append(os.path.join(root, name))
    return result

In [None]:
# add meta data from score file
# tslog_parser = lambda x: datetime.strptime(x, '%B%d %H:%M:%S')
# sdf = pd.read_excel(score_file,  parse_dates = ['start_time', 'end_time'], date_parser = tslog_parser)

def get_trace_score(tt):
    score=0
    trace='_idle'
    sc=sdf.loc[(sdf['start_time']<tt) & (sdf['end_time']>tt) ,['score','trace_name','num_of_sec_between']].values
    if len(sc)>0:
        score=sc[0][0]
        trace=sc[0][1]+'_'+str(sc[0][2])
    return score,trace


# Real Environment

In [None]:
data_path='/home/gkoren2/share/Data/MLA/DTT/scarlet/experiments'
# data_path='C:\\Users\\gkoren2\\Documents\\PycharmProjects\\work\\DTT\\experiments'
# data_path='D:\\MLA\\Data\\DTT\\Scarlet\\experiments'

In [None]:
sorted(os.listdir(data_path))

In [None]:
folder_name='psvt_at-9_25_45_64-greedy_1'
folder_name=os.path.join(data_path,folder_name)
esif_file=find('*_esif.csv',os.path.join(data_path,folder_name))[0]
tat_file=find('*_TAT.csv',os.path.join(data_path,folder_name))[0]
score_file=find('*.xlsx',folder_name)[0]
print(esif_file)
print(tat_file)
print(score_file)

In [None]:
# add meta data from score file
tslog_parser = lambda x: datetime.strptime(x, '%B%d %H:%M:%S')
sdf = pd.read_excel(score_file,  parse_dates = ['start_time', 'end_time'], date_parser = tslog_parser)

## Analyzing TAT

In [None]:
tat_df=pd.read_csv(tat_file)
tat_df['timestamp']=pd.to_datetime(tat_df['timestamp'])
tat_df=tat_df.sort_values(by= 'timestamp')
tat_df.reset_index(inplace=True)
# extract dominant power levels
tat_df['PL1']=np.minimum(tat_df['Turbo Parameters-MMIO Power Limit_1 Power(Watts)'],tat_df['Turbo Parameters-MSR Power Limit_1 Power(Watts)'])
tat_df['PL2']=np.minimum(tat_df['Turbo Parameters-MMIO Power Limit_2 Power(Watts)'],tat_df['Turbo Parameters-MSR Power Limit_2 Power(Watts)'])
# calculate turbo budget
tat_df['ewma']=(tat_df['PL1'] - tat_df['POWER']).ewm(com=27.5, adjust=False).mean()
tat_df.columns

In [None]:
tat_df['timestamp']

In [None]:
# show trace name and score
tat_df['score']=0
tat_df['trace_name']=''
tat_df.loc[:,['score','trace_name']]=np.array([[a,b] for a,b in tat_df['timestamp'].apply(get_trace_score)])
le=preprocessing.LabelEncoder()
tat_df['trace_code'] = 10* le.fit_transform(tat_df['trace_name'])
tcd={10*c:le.classes_[c] for c in range(len(le.classes_))}
cols_to_draw=['trace_code']
print(tcd)

In [None]:
# show clip reason
le2=preprocessing.LabelEncoder()
tat_df['turbo_clip_code'] = 10* le2.fit_transform(tat_df['Turbo Parameters-IA Clip Reason'])
ccd={10*c:le2.classes_[c] for c in range(len(le2.classes_))}
print(ccd)

In [None]:
# cols_to_draw+=['tj', 'POWER', 'PL1','PL2','ewma']
cols_to_draw=['POWER', 'PL1','PL2','ewma','tj','TMEM-temp(Degree C)','TSKN-temp(Degree C)','trace_code']
tat_df.melt(id_vars=['timestamp'], value_vars=cols_to_draw)
px.line(tat_df.melt(id_vars=['timestamp'], value_vars=cols_to_draw),x='timestamp',y='value',color='variable')

## Analyzing ESIF

In [None]:
esif_df=pd.read_csv(esif_file)
esif_df['timestamp']=pd.to_datetime(esif_df['timestamp'])
esif_df=esif_df.sort_values(by= 'timestamp')
esif_df.reset_index(inplace=True)
# scal the MMIO PL
esif_df.loc[:,['MMIO_PL1','MMIO_PL2']] = esif_df.loc[:,['MMIO_PL1','MMIO_PL2']]/1000
# scale the ips
ips_cols=['cpu{}_inst_delta'.format(i) for i in range(8)]
esif_df['ips']=esif_df.loc[:,ips_cols].mean(axis=1)/1e8
# calc ewma
esif_df['ewma']=(esif_df['MMIO_PL1'] - esif_df['POWER']).ewm(com=27.5, adjust=False).mean()
print(esif_df.shape)
esif_df.columns

In [None]:
edf=esif_df.loc[:,['IA Clip','timestamp']]
edf=edf.set_index('timestamp')
edf.head()

In [None]:
tdf=tat_df.loc[:,['Turbo Parameters-IA Clip Reason','timestamp']]
tdf=tdf.set_index('timestamp')
tdf.head()

In [None]:
res=pd.concat([edf,tdf],axis=1,join='inner')

In [None]:
esif_df.index

In [None]:
{k:hex(k) for k in esif_df['IA Clip'].value_counts().keys()}

In [None]:
cr=[tat_df.loc[tat_df['timestamp']==t,'Turbo Parameters-IA Clip Reason'].values for t in esif_df.loc[esif_df['IA Clip']==1023543296,'timestamp']]
cr

In [None]:
print(sum(['Max Turbo Limit' in str(v) for v in cr])/len(cr))
print(sum(['RAPL PL2' in str(v) for v in cr])/len(cr))


In [None]:
print(sum(['Thermal Event' in str(v) for v in cr])/len(cr))

In [None]:
esif_df.loc[:10,'timestamp']

In [None]:
######## Slow cell #############################
# add meta data about trace name and score 
esif_df['score']=0
esif_df['trace_name']=''

esif_df.loc[:,['score','trace_name']]=np.array([[a,b] for a,b in esif_df['timestamp'].apply(get_trace_score)])
le=preprocessing.LabelEncoder()
esif_df['trace_code'] = 10* le.fit_transform(esif_df['trace_name'])
tcd={10*c:le.classes_[c] for c in range(len(le.classes_))}
cols_to_draw=['trace_code']
print(tcd)

In [None]:
cols_to_draw+=['tj', 'POWER','tskin','MMIO_PL1','MMIO_PL2']
px.line(esif_df.melt(id_vars=['timestamp'], value_vars=cols_to_draw),x='timestamp',y='value',color='variable')

In [None]:
# esif_df.loc[:,['POWER','MMIO_PL1','MMIO_PL2','tj','tskin']].plot(figsize=(16,4),grid=True)
# esif_df.loc[:,['MMIO_PL1','MMIO_PL2']].plot(figsize=(16,4),grid=True)
esif_df.loc[:,['POWER','tj','tskin','MMIO_PL1','MMIO_PL2']].plot(figsize=(16,4),grid=True)

# DTT Sim

In [None]:
sim_data_path='/home/gkoren2/PycharmProjects/remote/MLA/RL/DTT/dtt_rl/train/tmp'
# sim_data_path='/home/gkoren2/share/Data/MLA/DTT/results/stbl/onl_dttsim_dqn-15-06-2020_17-31-23/1'
# sim_data_path= '/home/gkoren2/share/Data/MLA/DTT/results/stbl/onl_dttsim_qrdqn-15-06-2020_17-31-43/1'

## ESIF

In [None]:
sim_esif_file=os.path.join(sim_data_path,'DTTSim_esif.csv')
print(sim_esif_file)
edf=pd.read_csv(sim_esif_file)
# scale the ips
edf['ips_mean']=edf['ips_mean']/1e8
print(edf.shape)
edf.columns

In [None]:
edf['Episode_Scores'].values

In [None]:
cols_to_draw=['tj', 'power','tskin','pl1','pl2','tmem','ewma']
edf['timestamp']=edf.index
fig=px.line(edf.melt(id_vars=['timestamp'], value_vars=cols_to_draw),x='timestamp',y='value',color='variable')
# fig.update_layout(hovermode="x")
fig.show()

In [None]:
edf.index

### sim_esif.csv
this file is generate by the `main` of `custom_envs.py` and should identical to `DTTSim_esif.csv`

In [None]:
sim_esif_file=os.path.join(sim_data_path,'sim_esif.csv')
print(sim_esif_file)
sedf=pd.read_csv(sim_esif_file)
print(sedf.shape)
sedf.columns

In [None]:
cols_to_draw=['tj', 'power','tskin','pl1','pl2','tmem','ewma']
fig=px.line(sedf.melt(id_vars=['timestamp'], value_vars=cols_to_draw),x='timestamp',y='value',color='variable')
# fig.update_layout(hovermode="x")
fig.show()

## DTTStateRewardWrapper
explore the csv that is generated by the wrapper. its the features.


In [None]:
sim_feat_file=os.path.join(sim_data_path,'sim_features.csv')
print(sim_feat_file)
fdf=pd.read_csv(sim_feat_file)
print(fdf.shape)
fdf.columns

In [None]:
fdf['timestamp']=fdf.index
fdf

In [None]:
cols_to_draw=['tj', 'power_to_maxpl1_mean','tskin','pl1','pl2','tmem','torbu']
fig=px.line(fdf.melt(id_vars=['timestamp'], value_vars=cols_to_draw),x='timestamp',y='value',color='variable')
# fig.update_layout(hovermode="x")
fig.show()

## Analyze score vs reward
In this subsection we analyze the relation between score and various reward functions - based on running DTTSim

In [None]:
rew_score_file = os.path.join(sim_data_path,'rew_scores_0_all.csv')
rsdf = pd.read_csv(rew_score_file)
rsdf.head()

In [None]:
fig=px.scatter(rsdf.melt(id_vars=['benchmark','policy','reward'], value_vars='avg score'),x='reward',y='value',color='policy',symbol='benchmark')
fig.show()

In [None]:
rsdf['policy'].value_counts()