In [None]:
import pandas as pd
import plotly.express as px
import dataframeLoader as dfl

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
appliance_id = '37286f5a-9f8d-4f05-829a-2e9a8f25c5e4'

def filterbyApplianceId(dfv, appId):
    dfv = dfv.loc[dfv['appliance_id'] == appId]
    dfv['ts']=pd.to_datetime(dfv['ts'],unit='s')

    return dfv

df1 = filterbyApplianceId(dfl.loadDataFrameFromFileRegex('dataDir', 'securiti_appliance_cpu_used-max*.csv', metrics='cpu-max'), appliance_id)
df2 = filterbyApplianceId(dfl.loadDataFrameFromFileRegex('dataDir', 'securiti_appliance_cpu_used-avg*.csv', metrics='cpu-avg'), appliance_id)
df3 = filterbyApplianceId(dfl.loadDataFrameFromFileRegex('dataDir', 'securiti_appliance_memory_used-max*.csv', metrics='memory-max'), appliance_id)
df4 = filterbyApplianceId(dfl.loadDataFrameFromFileRegex('dataDir', 'securiti_appliance_memory_used-avg*.csv', metrics='memory-avg'), appliance_id)
df5 = filterbyApplianceId(dfl.loadDataFrameFromFileRegex('dataDir', 'securiti_appliance_download_workers_count-avg*.csv', metrics='dw-avg'), appliance_id)
df6 = filterbyApplianceId(dfl.loadDataFrameFromFileRegex('dataDir', 'securiti_appliance_download_workers_count-max*.csv', metrics='dw-max'), appliance_id)

df7 = filterbyApplianceId(dfl.loadDataFrameFromFileRegex('dataDir', 'securiti_appliance_task_queue_length-avg*.csv', metrics='taskq-avg'), appliance_id)
df8 = filterbyApplianceId(dfl.loadDataFrameFromFileRegex('dataDir', 'securiti_appliance_task_queue_length-max*.csv', metrics='taskq-max'), appliance_id)
df7.loc[df7['metrics_name'].str.contains('securiti-appliance-downloader-tasks-queue', regex=False), 'metrics'] = 'taskq-avg'
df7.loc[df7['metrics_name'].str.contains('t-appliance-downloader-tasks-queue', regex=False), 'metrics'] = 'downloadq-avg'
df8.loc[df8['metrics_name'].str.contains('securiti-appliance-downloader-tasks-queue', regex=False), 'metrics'] = 'taskq-max'
df8.loc[df8['metrics_name'].str.contains('t-appliance-downloader-tasks-queue', regex=False), 'metrics'] = 'downloadq-max'

df9 = dfl.loadDataFrameFromFileRegex('dataDir', 'UNSTRUCTURED-*.csv', metrics='dataScanned')
df9.rename(columns={'pod':'appliance_id'}, inplace=True)
df9 = df9.loc[df9['appliance_id'] == appliance_id]
df9['node_ip']="master"
# df = df.groupby(['tenant','domain', 'scan-id']).agg(first=('timestamp', 'min'),last=('timestamp', 'max')).reset_index()
df9=df9.groupby(['appliance_id', 'ts', 'node_ip', 'metrics']).agg(value=('dataScannedInGB', 'sum')).reset_index()
df9['ts']=pd.to_datetime(df9['ts'],unit='ms')


df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9])
df['node_ip']=df['node_ip'].fillna("master")
df = df.pivot_table(index=['appliance_id','ts', 'node_ip'], columns='metrics', values=['value'], aggfunc=('max', 'mean')).reset_index()
df.columns = [' '.join(col).strip() for col in df.columns.values]

df = df[['appliance_id','ts', 'node_ip', 'value max cpu-max', 'value mean cpu-avg', 'value max memory-max', 'value mean memory-avg', 'value max dataScanned',  \
           'value max dw-max', 'value mean dw-avg', 'value max taskq-max', 'value mean taskq-avg', 'value max downloadq-max', 'value mean downloadq-avg']]

df.rename(columns={'value max cpu-max': 'cpu-max', 'value mean cpu-avg': 'cpu-avg', 'value max memory-max':'memory-max',\
                    'value mean memory-avg':'memory-avg', 'value max dw-max':'dw-max','value mean dw-avg':'dw-avg'\
                   , 'value max taskq-max': 'taskq-max', 'value mean taskq-avg': 'taskq-avg', 'value max downloadq-max':'downloadq-max', \
                    'value mean downloadq-avg': 'downloadq-avg', 'value max dataScanned': 'dataScannedInGB'}, inplace=True)


display(df)


In [None]:
dfy = df[(df['ts'] >= min(df1['ts'])) & (df['ts'] <= max(df1['ts']))]
ttle_str='scan performace for appliance_id: '+ min(df1['appliance_id']) + ' between dates ' + str(min(df1['ts'])) +' - '+ str(max(df1['ts']))
dfx = pd.melt(dfy, id_vars=['appliance_id','ts', 'node_ip'], var_name='metrics', value_name='value')
fig = px.line(dfx, x="ts", y="value", color='node_ip', facet_row='metrics', height=2000, title=ttle_str, facet_row_spacing=0.005)
fig = fig.update_yaxes(matches=None)

fig.show()
# for i in df2['metrics'].unique():
#     print(i)
#     fig = px.line(df2.loc[df2['metrics'].isin([i])], x="ts", y="value", color='node_ip')
#     fig.show()