In [2]:
import pandas as pd
df_unst = pd.read_csv('.dataDir/testData/UNSTRUCTURED-2024-07-08.csv')
df_strc = pd.read_csv('.dataDir/testData/STRUCTURED-2024-07-08.csv')
df_cpu = pd.read_csv('.dataDir/testData/securiti_appliance_cpu_used-2024-07-08.csv')
df_dw = pd.read_csv('.dataDir/testData/securiti_appliance_download_workers_count-2024-07-08.csv')
df_infra_lat=pd.read_csv('.dataDir/testData/securiti_appliance_infra_access_latency-2024-07-08.csv')
df_mem=pd.read_csv('.dataDir/testData/securiti_appliance_memory_used-2024-07-08.csv')
df_po_cpu=pd.read_csv('.dataDir/testData/securiti_appliance_pod_cpu_usage-2024-07-08.csv')
df_po_mem=pd.read_csv('.dataDir/testData/securiti_appliance_pod_memory_usage-2024-07-08.csv')
df_tsk_qlen=pd.read_csv('.dataDir/testData/securiti_appliance_task_queue_length-2024-07-08.csv')


In [None]:
df_unst.rename(columns={"pod":"appliance_id"}, inplace=True)
df_strc.rename(columns={"pod":"appliance_id"}, inplace=True)
df=df_unst.groupby(['tenant', 'appliance_id']).agg({'dataScannedInGB':'sum'}).add_suffix('_sum').reset_index()
df.sort_values('dataScannedInGB_sum', inplace=True, ascending=False)
display(df)

In [None]:
import plotly.express as px
from plotly.subplots import make_subplots
from functools import reduce


def PlotScanPerformance(appliance_id, tenant_name):
    df_unst_cust = df_unst[(df_unst['appliance_id'].str.contains(appliance_id, regex=False))].groupby(['ts', 'appliance_id']).agg({'dataScannedInGB':sum, 'processingTimeinHrs':'sum', 'numberOfFilesScanned':'sum'}).add_suffix('_Count').reset_index()

    df_unst_cust['ts'] = pd.to_datetime(df_unst_cust['ts'],unit='ms')
    dateStr = str(df_unst_cust['ts'].min())+' and '+str(df_unst_cust['ts'].max())

    df_mem['ts']=pd.to_datetime(df_mem['ts'],unit='s')
    df_tsk_qlen['ts']=pd.to_datetime(df_tsk_qlen['ts'],unit='s')
    df_cpu['ts']=pd.to_datetime(df_cpu['ts'],unit='s')
    df_dw['ts']=pd.to_datetime(df_dw['ts'],unit='s')
    dw_app = df_dw[(df_dw['appliance_id'].str.contains(appliance_id, regex=False))].groupby(['appliance_id', 'ts']).agg({'value':'max'}).add_suffix('_dw').reset_index()
    mem_app = df_mem[(df_mem['appliance_id'].str.contains(appliance_id, regex=False))].groupby(['appliance_id', 'ts']).agg({'value':'mean'}).add_suffix('_mem').reset_index()
    cpu_app = df_cpu[(df_cpu['appliance_id'].str.contains(appliance_id, regex=False))].groupby(['appliance_id', 'ts']).agg({'value':'mean'}).add_suffix('_cpu').reset_index()
    tsk_qlen_app = df_tsk_qlen[(df_tsk_qlen['appliance_id'].str.contains(appliance_id, regex=False)) & (df_tsk_qlen['metrics_name'].str.contains('securiti-appliance-downloader-tasks-queue', regex=False))].groupby(['appliance_id', 'ts']).agg({'value':'max'}).add_suffix('_taskQueue').reset_index()

    dwn_qlen_app = df_tsk_qlen[(df_tsk_qlen['appliance_id'].str.contains(appliance_id, regex=False)) & (df_tsk_qlen['metrics_name'].str.contains('t-appliance-downloader-tasks-queue', regex=False))].groupby(['appliance_id', 'ts']).agg({'value':'max'}).add_suffix('_tempDownloadQueue').reset_index()

    data_frames = [df_unst_cust, tsk_qlen_app, dwn_qlen_app, cpu_app, mem_app, dw_app]
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['appliance_id', 'ts'],
                                            how='outer'), data_frames)
    df_merged['value_dw(x0.1)'] = (df_merged['value_dw']*10).astype(float)
    df_merged['tempDownloadQueue(x1000)'] = (df_merged['value_tempDownloadQueue']/1000).astype(float)
    df_merged['numberOfFilesScanned(x1000)'] = (df_merged['numberOfFilesScanned_Count']/1000).astype(float)
    subfig1 = make_subplots(specs=[[{"secondary_y": True}]])
    fig1 = px.area(df_merged, x='ts', y=['dataScannedInGB_Count', 'processingTimeinHrs_Count', 'numberOfFilesScanned(x1000)', 'value_cpu', 'value_mem', 'value_dw(x0.1)'])
    fig1.update_traces(yaxis="y1",showlegend=True)

    fig2 = px.line(df_merged, x='ts', y=['tempDownloadQueue(x1000)', 'value_taskQueue'])
    fig2.update_traces(yaxis="y2",showlegend=True)
    subfig1.add_traces(fig1.data + fig2.data)
    subfig1.layout.yaxis2.title="task_queue"
    subfig1.layout.yaxis.title="GB"
    subfig1.layout.title=appliance_id+'['+tenant_name+'] performance between '+dateStr
    subfig1.layout.update(legend=dict(
            orientation='h',
    ))
    subfig1.show()

# PlotScanPerformance('408b37cf-7e71-4d8f-ad4b-c6496d0739d6')
for index, row in df[(df['dataScannedInGB_sum'] > 50)].iterrows():
    PlotScanPerformance(row['appliance_id'], row['tenant'])