In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots 
from tqdm.notebook import tqdm
import re 
pd.set_option('max_columns', None)

In [None]:
df = pd.read_csv('data.csv', index_col=0)
df.shape

In [None]:
df_temp = df[df['bytes_returned'] > 16*pow(1024,2)]
exclude_cid = set(df_temp['cid'].unique())
len(exclude_cid)

In [None]:
df_temp = df[['agent','timestamp']].groupby(['agent']).count()
df_temp = df_temp.rename(columns={"timestamp": "count"})
df_temp = df_temp[df_temp['count']>10000]
df_temp = df_temp.reset_index()
exclude_agent = set(df_temp['agent'].unique())
len(exclude_agent)

In [None]:
df1 = df[(~df['agent'].isin(exclude_agent)) & (~df['cid'].isin(exclude_cid))]
df1.shape

In [None]:
df1.shape[0]/df.shape[0]

In [None]:
df1 = df1[['agent','bytes_returned']]
df1['bytes_returned'] = df1['bytes_returned']/pow(1024,2)

In [None]:
def q10(x):
    return x.quantile(0.1)

def q90(x):
    return x.quantile(0.9)

df2 = df1.groupby(['agent']).agg(['min','median','max','mean'])
df2.columns = df2.columns.get_level_values(1)
df2 = df2.round(0).astype(int)
df2 = df2.reset_index()
df2.head()

In [None]:
# df2[['max','min','mean']] = df2[['max','min','mean']].astype(int)
df2['gap'] = df2['max'] - df2['min']
df2 = df2.sort_values(by=['min','max'])

df2 = df2.reset_index()
df2 = df2.drop(['index'],axis=1)
df2 = df2.reset_index()
df2['idx_percentage'] = df2['index']/df2.shape[0]
df2.head()

In [None]:
df_temp = df2[(df2['min']==0) & (df2['gap']==0)]
p1 = df_temp.shape[0]/df2.shape[0]
p1

In [None]:
fig = go.Figure()

# Create and style traces
fig.add_trace(go.Scatter(x=df2['idx_percentage'], y=df2['max'], name='max', line=dict(color='red')))
fig.add_trace(go.Scatter(x=df2['idx_percentage'], y=df2['mean'], name='mean', line=dict(color='yellow')))
fig.add_trace(go.Scatter(x=df2['idx_percentage'], y=df2['min'], name='min', line=dict(color='green')))

# Edit the layout
fig.update_layout(title='Request size by agent',
                   xaxis_title='agent',
                   yaxis_title='request size in MB')

fig.add_vline(x=p1, line_width=1, line_dash="dash", line_color="grey",
annotation_text="44.2% of total agent", annotation_position="top right")

fig.update_xaxes(tickformat = ',.0%')

# fig.update_xaxes(visible=True, showticklabels=False)
# fig.update_yaxes(visible=True, showticklabels=True)

fig.show()