In [149]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.graph_objects as go
import plotly.express as px
import os
import plotly.io as plt_io
# Enable this to export as iframe
# plt_io.renderers.default = 'iframe'

In [150]:
trello_table = pd.read_csv('./INPUT/trello_board.csv')
trello_fields_dict = {'Card Name':'T#',
                          'Card Description': 'DESC',
                          'Labels': 'LABELS',
                          'List Name': 'STATUS', 
                          'T_CRE_TS':'T_CREATION_TIMESTAMP', 
                          'Card ID': 'T_ACK_TIMESTAMP',
                          'T_RES_TS': 'T_RESOLUTION_TIMESTAMP'}
trello_table = trello_table[trello_fields_dict]
trello_table.rename(columns=trello_fields_dict, inplace=True)
trello_table['T_CREATION_TIMESTAMP'] = pd.to_datetime(trello_table['T_CREATION_TIMESTAMP'])
trello_table['T_RESOLUTION_TIMESTAMP'] = pd.to_datetime(trello_table['T_RESOLUTION_TIMESTAMP'])

trello_table.start_timestamp = trello_table['T_CREATION_TIMESTAMP'].min().strftime('%d/%m/%Y|%H:%M:%S')
trello_table.end_timestamp = trello_table['T_CREATION_TIMESTAMP'].max().strftime('%d/%m/%Y|%H:%M:%S')
trello_table.no_days = len(trello_table['T_CREATION_TIMESTAMP'].dt.normalize().unique())


In [151]:
# Only analyze resolved tickets
trello_table = trello_table[trello_table['STATUS'] == 'RESOLVED']
trello_table = trello_table.sort_values(by=['T#'],ignore_index=True)
# To get the card creation timestamp from it's ID
trello_table['T_ACK_TIMESTAMP'] = [x[:8] for x in trello_table['T_ACK_TIMESTAMP']]
trello_table['T_ACK_TIMESTAMP'] = trello_table['T_ACK_TIMESTAMP'].apply(int, base=16)
trello_table['T_ACK_TIMESTAMP'] = pd.to_datetime(trello_table['T_ACK_TIMESTAMP'],unit='s')
trello_table['T_ACK_TIMESTAMP'] = trello_table['T_ACK_TIMESTAMP'].dt.tz_localize('GMT').dt.tz_convert('Asia/Riyadh')

In [152]:
# # Import and clean the excel file
# trello_table = pd.read_excel('./INPUT/vsoc_report.xlsx')
# vsoc_fields_dict = {'ISS Ticket ID':'T#',
#                           'Status': 'STATUS',
#                           'Severity Code': 'SEVERITY',
#                           'Create Date': 'TIMESTAMP',
#                           'Resolution': 'RESOLUTION',
#                           'Assigned To': 'T_ISSUER',
#                           'Comments' : 'COMMENT',}

# trello_table = trello_table[vsoc_fields_dict]
# trello_table.rename(columns= vsoc_fields_dict, inplace=True)
# severity_mapper = {'SEV1':'CRITICAL', 'SEV2': 'HIGH', 'SEV3': 'MEDIUM', 'SEV4': 'LOW'}
# trello_table = trello_table.replace({'SEVERITY': severity_mapper})
# trello_table['TIMESTAMP'] = pd.to_datetime(trello_table['TIMESTAMP'])
# trello_table['TIMESTAMP'] = trello_table['TIMESTAMP'].dt.tz_localize('GMT').dt.tz_convert('Asia/Riyadh')
# trello_table.start_timestamp = trello_table['TIMESTAMP'].min().strftime('%d/%m/%Y|%H:%M:%S')
# trello_table.end_timestamp = trello_table['TIMESTAMP'].max().strftime('%d/%m/%Y|%H:%M:%S')
# trello_table.no_days = len(trello_table['TIMESTAMP'].dt.normalize().unique())
# tags_mapper = {'S1':'CRITICAL', 'SEV2': 'HIGH', 'SEV3': 'MEDIUM', 'SEV4': 'LOW'}
# trello_table.to_csv('./OUTPUT/trello_table.csv', sep=',')

# combined_table = pd.merge(trello_table, trello_table, on='T#')
# combined_table.sort_values(by=['T#'],ignore_index=True)
# combined_table.index += 1 
# display(combined_table)
# combined_table.to_csv('./OUTPUT/combined_table.csv')

In [153]:
# @TODO: time is off by one hour for some reason
# @TODO: handle empty/NaT timestamp field 
# Adjust time to resolve to business hours
sip_bh = pd.offsets.CustomBusinessHour(start='08:00', end='16:00', weekmask='Sun Mon Tue Wed Thu')

def f(x):
    idx = pd.date_range(start=x.T_CREATION_TIMESTAMP, end=x.T_RESOLUTION_TIMESTAMP, freq= sip_bh)
    mask = ~((idx.dayofweek == 0) & (idx.hour <= 7))
    return len(idx[mask])


trello_table['T_CREATION_TIMESTAMP_ADJUSTED'] = trello_table['T_CREATION_TIMESTAMP']  + sip_bh
trello_table['BUSINESS_HOURS_TO_RESOLVE'] = trello_table.apply(f, axis=1)

# @TODO: ack time is wrong. Why?
trello_table['T_ACK_TIMESTAMP_ADJUSTED'] = trello_table['T_ACK_TIMESTAMP']  + sip_bh
trello_table['BUSINESS_HOURS_TO_ACK'] = trello_table.apply(f, axis=1)

mtta = round(trello_table['BUSINESS_HOURS_TO_ACK'].mean(), 2) 
mttr =  round(trello_table['BUSINESS_HOURS_TO_RESOLVE'].mean(), 2) 

print('Mean time to resolve = ', mttr, 'business hours')
print('Mean time to ack = ', mtta, 'business hours')

Mean time to resolve =  15.22 business hours
Mean time to ack =  15.22 business hours



Non-vectorized DateOffset being applied to Series or DatetimeIndex


Non-vectorized DateOffset being applied to Series or DatetimeIndex



In [154]:
# Handle labels
labels_dict = {'SC' : 'SEC_CONTROL', 'RC' : 'RES_CODE', 'CO' : 'COLLABS', 'TB' : 'ROOT_CAUSE', 'PR' : 'PRIORITY'}
trello_table["LABELS"] = trello_table["LABELS"].str.replace('\([^)]*\)', '')
trello_table["LABELS"] = trello_table["LABELS"].str.replace(',', ' ')
#  Temp. solution :)
trello_table['SEC_CONTROL'] = trello_table['LABELS'].str.extract(r'(\bSC\d{2}\b)')
trello_table['RES_CODE'] = trello_table['LABELS'].str.extract(r'(\bRC\d{2}\b)')
trello_table['COLLABS'] = trello_table['LABELS'].str.extract(r'(\bCO\d{2}\b)')
trello_table['ROOT_CAUSE'] = trello_table['LABELS'].str.extract(r'(\bTB\d{2}\b)')
trello_table['PRIORITY'] = trello_table['LABELS'].str.extract(r'(\bPR\d{2}\b)')
# Load labels translation as a separate external file to keep the confidentiality of your log sources
labels_translation = pd.read_csv('./INPUT/labels_translation.csv', index_col=0, header=None, squeeze=True).to_dict()
trello_table = trello_table.replace(labels_translation)
trello_table.drop('LABELS', axis=1, inplace=True)




The default value of regex will change from True to False in a future version.



In [155]:
# for label in labels_dict.keys():
#     test = trello_table_labels_freq[trello_table_labels_freq['LBL'].str.contains(r'\b'+label+r'\d{2}\b')]
#     fig = px.pie(test, values='CNT', names='LBL', title='Distribution of resolved ticket categorized by {}'.format(labels_dict[label]))
#     fig.update_traces(textposition='inside', textinfo='percent+label')
#     fig.write_image('./OUTPUT/{}_COUNT.pdf'.format(labels_dict[label]))
#     # fig.show()

In [156]:

# trello_table_labels_freq = trello_table['LABELS'].str.split(expand=True).stack().value_counts()
# trello_table_labels_freq = pd.DataFrame({'LBL':trello_table_labels_freq.index, 'CNT':trello_table_labels_freq.values})


# for label in labels_dict.keys():
#     test = trello_table_labels_freq[trello_table_labels_freq['LBL'].str.contains(r'\b'+label+r'\d{2}\b')]
#     fig = px.pie(test, values='CNT', names='LBL', title='Distribution of resolved ticket categorized by {}'.format(labels_dict[label]))
#     fig.update_traces(textposition='inside', textinfo='percent+label')
#     fig.write_image('./OUTPUT/{}_COUNT.pdf'.format(labels_dict[label]))
#     # fig.show()

# trello_table_labels_freq.to_csv('./OUTPUT/trello_table_labels_freq.csv', sep=',')

In [157]:
required_fields = ['SEC_CONTROL', 'RES_CODE', 'COLLABS', 'ROOT_CAUSE', 'PRIORITY']

trello_table.start_timestamp = trello_table['T_CREATION_TIMESTAMP'].min().strftime('%d/%m/%Y|%H:%M:%S')
trello_table.end_timestamp = trello_table['T_CREATION_TIMESTAMP'].max().strftime('%d/%m/%Y|%H:%M:%S')
trello_table.no_days = len(trello_table['T_CREATION_TIMESTAMP'].dt.normalize().unique())

for required_field in required_fields:
    copy_of_trello_table = trello_table.copy()
    series = pd.value_counts(copy_of_trello_table[required_field])
    mask = (series/series.sum() * 100).lt(1.0)
    copy_of_trello_table[required_field] = np.where(copy_of_trello_table[required_field].isin(series[mask].index),'OTHERS ≤ 1.0%'.format(required_field),copy_of_trello_table[required_field])
    required_field_count = copy_of_trello_table[required_field].value_counts()    
    required_field_count = required_field_count.rename_axis(required_field).reset_index(name='{}_COUNT'.format(required_field))
    required_field_count = required_field_count.reset_index(drop=True)
    required_field_count.index.rename('NO.', inplace=True)
    required_field_count.index+=1
    required_field_count['{}_PCT'.format(required_field)] = required_field_count['{}_COUNT'.format(required_field)] / required_field_count['{}_COUNT'.format(required_field)].sum()
    required_field_count.to_csv('./OUTPUT/{}.csv'.format(required_field), sep=',')
    fig = go.Figure(data=[go.Pie(labels=required_field_count[required_field], values=required_field_count['{}_PCT'.format(required_field)], textinfo='label+percent',
                             insidetextorientation='radial', showlegend=False
                            )])
    fig.write_image("./OUTPUT/{}_COUNT.pdf".format(required_field))
    fig.show()

In [158]:
# Generate a summary table of the main features
summary_table = trello_table.groupby(['SEC_CONTROL', 'RES_CODE']).size().unstack(fill_value=0)
summary_table.to_csv('./OUTPUT/SUMMARY_TABLE.csv', sep=',')

In [159]:
trello_table.start_timestamp = trello_table['T_CREATION_TIMESTAMP'].min().strftime('%d/%m/%Y|%H:%M:%S')
trello_table.end_timestamp = trello_table['T_CREATION_TIMESTAMP'].max().strftime('%d/%m/%Y|%H:%M:%S')
trello_table.no_days = len(trello_table['T_CREATION_TIMESTAMP'].dt.normalize().unique())

tickets_count = trello_table['T#'].value_counts()
tickets_count = tickets_count.rename_axis('T#').reset_index(name='T#_COUNT')
tickets_count = tickets_count.reset_index(drop=True)
tickets_count.index.rename('NO.', inplace=True)
tickets_count.index+=1
tickets_count['T#_PCT'] = tickets_count['T#_COUNT'] / tickets_count['T#_COUNT'].sum()
s = pd.to_datetime(trello_table['T_CREATION_TIMESTAMP'])
tickets_count = s.groupby(s.dt.floor('d')).size().reset_index(name='COUNT')
# Plot ----------------------------------------------------------------------------------------------
my_layout = go.Layout(
    title='Global Font',
    font=dict(color='#7f7f7f', size=10),
    showlegend=False)
my_data = go.Scatter(
    x=tickets_count['T_CREATION_TIMESTAMP'],
    y=tickets_count['COUNT'],
    mode='lines+markers+text')
fig = go.Figure(data=my_data, layout=my_layout)

fig.update_layout(shapes=[
    # Line Horizontal
    go.layout.Shape(
        type='line',
        x0=tickets_count['T_CREATION_TIMESTAMP'].min(),
        y0=tickets_count['COUNT'].mean(),
        x1=tickets_count['T_CREATION_TIMESTAMP'].max(),
        y1=tickets_count['COUNT'].mean(),
        line=dict(
            color='black',
            width=2,
            dash='longdash',
        ),
    ),
])

fig.add_trace(
    go.Scatter(
        x=[
            tickets_count['T_CREATION_TIMESTAMP'].max() -
            pd.Timedelta(days=trello_table
        .no_days) / 2.0
        ],
        y=[tickets_count['COUNT'].mean()],
        mode='markers+text',
        name='Markers and Text',
        hoverinfo='skip',
        textposition='top right'))

fig.update_traces(
    marker_color='rgb(231,198,91)',
    marker_line_color='black',
    marker_line_width=1,
    opacity=1.0)
fig.update_layout(
    title_text='VSOC tickets trendline grouped by the day<br>From {} to {}. dashed line '
               'represents<br>the average no. VSOC tickets ({} '
               'VSOC tickets per day)'.format(trello_table
            .start_timestamp, trello_table
            .end_timestamp,
                                             int(tickets_count['COUNT'].mean())),font_size=10,title_x=0.5)

fig.update_traces(
marker_color='rgb(231,198,91)',
marker_line_color='black',
marker_line_width=1.0,
opacity=1.0)
# tune the font_size to your needs
fig.update_layout(font_size=12)
# fig.show()
fig.write_image('./OUTPUT/TRENDLINE.pdf')

In [160]:
s = pd.to_datetime(trello_table['T_CREATION_TIMESTAMP'])
tickets_count = s.groupby(s.dt.floor('H')).size().reset_index(name='COUNT')
tickets_count['DAY'] = tickets_count['T_CREATION_TIMESTAMP'].dt.day_name()
tickets_count['HR'] = tickets_count['T_CREATION_TIMESTAMP'].dt.hour
heat_map_data = [go.Heatmap(x=tickets_count['DAY'], y = tickets_count['HR'], z = tickets_count['COUNT'],colorscale='Viridis')]
heat_map_layout = go.Layout(title_text='Heatmap of no. VSOC tickets created per hour of the day<br>From {} '
                                       'to {}.'.format(trello_table.start_timestamp, trello_table.end_timestamp,))
fig = go.Figure(data = heat_map_data, layout = heat_map_layout)
fig.show()
fig.write_image('./OUTPUT/HEATMAP.pdf')

In [161]:
# #  FIGURE 1: EOI Chart
# ce_categories_center = ['caused by external actors', 'caused by internal technical actors', 'caused by internal non-technical actors']
# last_month_ce_count = [33, 35, 12]
# current_month_ce_count = [19, 22, 15]
# ce_sum = list(((np.array(current_month_ce_count ) + np.array(last_month_ce_count))))
# delta_ce = list(((np.array(current_month_ce_count) - np.array(last_month_ce_count )) / last_month_ce_count ) * 100)
# delta_ce = np.round(delta_ce, decimals=1)

# green_tag = '<span style="color:green">▼</span>'
# red_tag = '<span style="color:red">▲</span>'
# ibm_logo_path = os.path.join(os.getcwd(), 'INPUT/sip-logo.png')

# fig = go.Figure()
# fig.add_trace(go.Bar(
#     x=ce_categories_center,
#     y=last_month_ce_count,
#     name='SI count on August',
#     marker_color='rgb(231,198,91)', 
#     marker_line_color='black',
#     textposition='inside', 
#     text=last_month_ce_count,
#     marker_line_width=1.5, 
#     opacity=1.0, width=0.5))

# fig.add_trace(go.Bar(
#     x=ce_categories_center,
#     y=current_month_ce_count,
#     name='SI count in September', 
#     text=current_month_ce_count,
#     textposition='inside',
#     marker_color='white', 
#     marker_line_color='black',
#     marker_line_width=1.5, 
#     opacity=0.6, width=0.6))

# fig.add_trace(go.Bar(
#     x=ce_categories_center, 
#     y=max(current_month_ce_count,last_month_ce_count),
#     text=delta_ce,
#     textposition='outside',
#     marker_line_width=1.5, 
#     opacity=0, 
#     width=0.6,
#     showlegend=False))

# for i, (x, y, z) in enumerate(zip(current_month_ce_count, last_month_ce_count, delta_ce)):
#     print(i, x, y, z)
#     if z > 0:
#         fig.add_annotation(x=i,y=max(x+1, y+1),text='<b>▲+{}%</b>'.format(z),showarrow=False,font=dict(color='red', size=12),align='center')
#     else:
#         fig.add_annotation(x=i,y=max(x+1, y+1),text='<b>▼{}%</b>'.format(z),showarrow=False,font=dict(color='green', size=12),align='center')


# title_plot1 = '<b> Increase {} and decrease {} in security incidents count categorized by root cause</b>'.format(red_tag, green_tag)

# fig.layout.images = [dict(
#     source=ibm_logo_path,
#     xref='paper', 
#     yref='paper',
#     x=0.05, 
#     y=1.05,
#     sizex=0.11, 
#     sizey=0.15,
#     xanchor='center', 
#     yanchor='bottom')]

# fig.update_layout(
#     height=600,
#     width=1200,
#     template='plotly_white',
#     font_family='IBM Plex Sans',
#     font_size=14,
#     barmode='overlay', 
#     xaxis_tickangle=0,
#     title=title_plot1,
#     title_x=0.5, 
#     font=dict(color='black'),
#     legend=dict(
#     orientation='h',
#     yanchor='top',
#     y=1.1,
#     xanchor='center',
#     x=0.5))
    
# fig.show()
# fig.write_image('./OUTPUT/EOI.pdf')

In [162]:
# last_month_ce_count = [4, 14, 12, 25, 2, 3]
# current_month_ce_count = [2, 5, 2, 9, 5, 7]
# # delta_count_ce = last_
# delta_percentage_ce = list(((np.array(current_month_ce_count) - np.array(last_month_ce_count )) / np.array(last_month_ce_count)) * 100)
# delta_percentage_ce = np.round(delta_percentage_ce, decimals=1)
# ce_sum = current_month_ce_count  + last_month_ce_count

# print('last_month_sum: {}'.format(np.average(last_month_ce_count)))
# print('current_month_ce_sum: {}'.format(np.average(current_month_ce_count)))
# print('delta_ce_sum: {}'.format(np.average(delta_ce)))

In [163]:
# @TODO: same figure using CO as parents, SC as labels
# fig =go.Figure(go.Sunburst(
#     labels=["Eve", "Cain", "Seth", "Enos", "Noam", "Abel", "Awan", "Enoch", "Azura"],
#     parents=["", "Eve", "Eve", "Seth", "Seth", "Eve", "Eve", "Awan", "Eve" ],
#     values=[10, 14, 12, 10, 2, 6, 6, 4, 4],
# ))
# fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))

# fig.show()

In [164]:
# trello_table
trello_table.drop(['T_ACK_TIMESTAMP', 'T_RESOLUTION_TIMESTAMP', 'BUSINESS_HOURS_TO_RESOLVE', 'T_ACK_TIMESTAMP_ADJUSTED', 'BUSINESS_HOURS_TO_ACK', 'STATUS', 'T_CREATION_TIMESTAMP_ADJUSTED', 'COLLABS', 'ROOT_CAUSE'], axis=1, inplace=True)
customer_report = trello_table[['T#', 'T_CREATION_TIMESTAMP', 'PRIORITY','SEC_CONTROL', 'RES_CODE', 'DESC']]
customer_report.to_html('./OUTPUT/CUSTOMER_REPORT.html')

In [178]:
from pretty_html_table import build_table

html_table_blue_light = build_table(customer_report, 'yellow_dark', font_family='Fira Code',even_color='black',even_bg_color='white')

# html_table = build_table(customer_report
#                         , 'yellow_dark'
#                         , font_size='medium'
#                         , font_family='Open Sans'
#                         , text_align='left'
#                         , width='auto'
#                         , index=False
# 			, even_color='black'
# 			, even_bg_color='white'
#                         )

# Save to html file
with open('pretty_table.html', 'w') as f:
    f.write(html_table_blue_light)

TypeError: build_table() got an unexpected keyword argument 'bg_color'