In [9]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.graph_objects as go
import plotly.express as px
import os
import plotly.io as plt_io
# Enable this to export as iframe
# plt_io.renderers.default = 'iframe'

In [10]:
# Import and clean the excel file
cleaned_table = pd.read_excel("./INPUT/vsoc_report.xlsx")
fields_dict = {'ISS Ticket ID':'T#',
                          'Status': 'STATUS',
                          'Severity Code': 'SEVERITY',
                          'Create Date': 'TIMESTAMP',
                          'Resolution': 'RESOLUTION',
                          'Comments' : 'COMMENT'}

cleaned_table = cleaned_table[fields_dict]
cleaned_table.rename(columns= fields_dict, inplace=True)
severity_mapper = {'SEV1':'CRITICAL', 'SEV2': 'HIGH', 'SEV3': 'MEDIUM', 'SEV4': 'LOW'}
cleaned_table = cleaned_table.replace({'SEVERITY': severity_mapper})
cleaned_table['TIMESTAMP'] = pd.to_datetime(cleaned_table['TIMESTAMP'])
cleaned_table['TIMESTAMP'] = cleaned_table['TIMESTAMP'].dt.tz_localize('GMT').dt.tz_convert('Asia/Riyadh')
cleaned_table.start_timestamp = cleaned_table['TIMESTAMP'].min().strftime('%d/%m/%Y|%H:%M:%S')
cleaned_table.end_timestamp = cleaned_table['TIMESTAMP'].max().strftime('%d/%m/%Y|%H:%M:%S')
cleaned_table.no_days = len(cleaned_table['TIMESTAMP'].dt.normalize().unique())
cleaned_table.to_csv('./OUTPUT/CLEANED_TABLE.csv', sep=',')

In [11]:
# Generate basic bar plot for the main features of si-report
required_fields = ['STATUS', 'SEVERITY', 'RESOLUTION']
for required_field in required_fields:
    cleaned_table_copy = cleaned_table.copy()
    series = pd.value_counts(cleaned_table_copy[required_field])
    mask = (series/series.sum() * 100).lt(1.0)
    cleaned_table_copy[required_field] = np.where(cleaned_table_copy[required_field].isin(series[mask].index),'OTHERS ≤ 1.0%'.format(required_field),cleaned_table_copy[required_field])
    required_field_count = cleaned_table_copy[required_field].value_counts()    
    required_field_count = required_field_count.rename_axis(required_field).reset_index(name='{}_COUNT'.format(required_field))
    required_field_count = required_field_count.reset_index(drop=True)
    required_field_count.index.rename('NO.', inplace=True)
    required_field_count.index+=1
    required_field_count['{}_PCT'.format(required_field)] = required_field_count['{}_COUNT'.format(required_field)] / required_field_count['{}_COUNT'.format(required_field)].sum()
    required_field_count.to_csv('./OUTPUT/{}.csv'.format(required_field), sep=',')
    my_layout = go.Layout(title_text='Distribution of Common Event<br>From {} to {}<br>sorted by {} '.format(
        cleaned_table.start_timestamp, cleaned_table.end_timestamp,required_field), font=dict(color='#7f7f7f',size=10))
    
    my_data = go.Bar(x=required_field_count['{}_COUNT'.format(required_field)], y=required_field_count[required_field], orientation='h')
    
    fig = go.Figure(data=my_data, layout=my_layout)
    # tune the font_size to your needs
    fig.update_layout(font_size=10,title_x=0.5)
    fig.update_traces(
        marker_color='rgb(231,198,91)',
        marker_line_color='black',
        marker_line_width=1.0,
        textposition='outside',
        opacity=1.0)
    fig.write_image("./OUTPUT/{}_COUNT.pdf".format(required_field))
    # fig.show()

In [12]:
# Generate a summary table of the main features
summary_table = cleaned_table.groupby(['STATUS', 'SEVERITY', 'RESOLUTION']).size().unstack(fill_value=0)
summary_table.to_csv('./OUTPUT/SUMMARY_TABLE.csv', sep=',')

In [13]:
tickets_count = cleaned_table['T#'].value_counts()
tickets_count = tickets_count.rename_axis('T#').reset_index(name='T#_COUNT')
tickets_count = tickets_count.reset_index(drop=True)
tickets_count.index.rename('NO.', inplace=True)
tickets_count.index+=1
tickets_count['T#_PCT'] = tickets_count['T#_COUNT'] / tickets_count['T#_COUNT'].sum()
s = pd.to_datetime(cleaned_table['TIMESTAMP'])
tickets_count = s.groupby(s.dt.floor('d')).size().reset_index(name='COUNT')
# Plot ----------------------------------------------------------------------------------------------
my_layout = go.Layout(
    title='Global Font',
    font=dict(color='#7f7f7f', size=10),
    showlegend=False)
my_data = go.Scatter(
    x=tickets_count['TIMESTAMP'],
    y=tickets_count['COUNT'],
    mode='lines+markers+text')
fig = go.Figure(data=my_data, layout=my_layout)

fig.update_layout(shapes=[
    # Line Horizontal
    go.layout.Shape(
        type="line",
        x0=tickets_count['TIMESTAMP'].min(),
        y0=tickets_count['COUNT'].mean(),
        x1=tickets_count['TIMESTAMP'].max(),
        y1=tickets_count['COUNT'].mean(),
        line=dict(
            color="red",
            width=2,
            dash="longdash",
        ),
    ),
])

fig.add_trace(
    go.Scatter(
        x=[
            tickets_count['TIMESTAMP'].max() -
            pd.Timedelta(days=cleaned_table
        .no_days) / 2
        ],
        y=[tickets_count['COUNT'].mean()],
        mode="markers+text",
        name="Markers and Text",
        hoverinfo='skip',
        textposition="top right"))

fig.update_traces(
    marker_color='rgb(231,198,91)',
    marker_line_color='black',
    marker_line_width=1,
    opacity=1.0)
fig.update_layout(
    title_text='VSOC tickets trendline grouped by the day<br>From {} to {}. Red line '
               'represents<br>the average no. VSOC tickets ({} '
               'VSOC tickets per day)'.format(cleaned_table
            .start_timestamp, cleaned_table
            .end_timestamp,
                                             int(tickets_count['COUNT'].mean())),font_size=10,title_x=0.5)

fig.update_traces(
marker_color='rgb(231,198,91)',
marker_line_color='black',
marker_line_width=1.0,
opacity=1.0)
# tune the font_size to your needs
fig.update_layout(font_size=12)
# fig.show()
fig.write_image("./OUTPUT/TRENDLINE.pdf")

In [14]:
s = pd.to_datetime(cleaned_table['TIMESTAMP'])
tickets_count = s.groupby(s.dt.floor('H')).size().reset_index(name='COUNT')
tickets_count['DAY'] = tickets_count['TIMESTAMP'].dt.day_name()
tickets_count['HR'] = tickets_count['TIMESTAMP'].dt.hour
heat_map_data = [go.Heatmap(x=tickets_count['DAY'], y = tickets_count['HR'], z = tickets_count['COUNT'],colorscale='Viridis')]
heat_map_layout = go.Layout(title_text='Heatmap of no. VSOC tickets created per hour of the day<br>From {} '
                                       'to {}.'.format(cleaned_table.start_timestamp, cleaned_table.end_timestamp,))
fig = go.Figure(data = heat_map_data, layout = heat_map_layout)
fig.show()
fig.write_image("./OUTPUT/HEATMAP.pdf")

In [49]:
#  FIGURE 1: EOI Chart - OJA
ce_categories_center = ['caused by external actors', 'caused by internal technical actors', 'caused by internal non-technical actors']
last_month_ce_count = [33, 35, 12]
current_month_ce_count = [19, 22, 15]
ce_sum = list(((np.array(current_month_ce_count ) + np.array(last_month_ce_count))))
delta_ce = list(((np.array(current_month_ce_count) - np.array(last_month_ce_count )) / last_month_ce_count ) * 100)
delta_ce = np.round(delta_ce, decimals=1)

green_tag = '<span style="color:green">▼</span>'
red_tag = '<span style="color:red">▲</span>'
ibm_logo_path = os.path.join(os.getcwd(), 'INPUT/sip-logo.png')

fig = go.Figure()
fig.add_trace(go.Bar(
    x=ce_categories_center,
    y=last_month_ce_count,
    name='SI count on August',
    marker_color='rgb(231,198,91)', 
    marker_line_color='black',
    textposition='inside', 
    text=last_month_ce_count,
    marker_line_width=1.5, 
    opacity=1.0, width=0.5))

fig.add_trace(go.Bar(
    x=ce_categories_center,
    y=current_month_ce_count,
    name='SI count in September', 
    text=current_month_ce_count,
    textposition='inside',
    marker_color='white', 
    marker_line_color='black',
    marker_line_width=1.5, 
    opacity=0.6, width=0.6))

fig.add_trace(go.Bar(
    x=ce_categories_center, 
    y=max(current_month_ce_count,last_month_ce_count),
    text=delta_ce,
    textposition='outside',
    marker_line_width=1.5, 
    opacity=0, 
    width=0.6,
    showlegend=False))

for i, (x, y, z) in enumerate(zip(current_month_ce_count, last_month_ce_count, delta_ce)):
    print(i, x, y, z)
    if z > 0:
        fig.add_annotation(x=i,y=max(x+1, y+1),text="<b>▲+{}%</b>".format(z),showarrow=False,font=dict(color="red", size=12),align="center")
    else:
        fig.add_annotation(x=i,y=max(x+1, y+1),text="<b>▼{}%</b>".format(z),showarrow=False,font=dict(color="green", size=12),align="center")


title_plot1 = "<b> Increase {} and decrease {} in security incidents count categorized by root cause</b>".format(red_tag, green_tag)

fig.layout.images = [dict(
    source=ibm_logo_path,
    xref="paper", 
    yref="paper",
    x=0.05, 
    y=1.05,
    sizex=0.11, 
    sizey=0.15,
    xanchor="center", 
    yanchor="bottom")]

fig.update_layout(
    height=600,
    width=1200,
    template="plotly_white",
    font_family="IBM Plex Sans",
    font_size=14,
    barmode='overlay', 
    xaxis_tickangle=0,
    title=title_plot1,
    title_x=0.5, 
    font=dict(color="black"),
    legend=dict(
    orientation="h",
    yanchor="top",
    y=1.1,
    xanchor="center",
    x=0.5))
    
# fig.show()
fig.write_image("./OUTPUT/EOI.pdf")

0 19 33 -42.4
1 22 35 -37.1
2 15 12 25.0


In [17]:
last_month_ce_count = [4, 14, 12, 25, 2, 3]
current_month_ce_count = [2, 5, 2, 9, 5, 7]
# delta_count_ce = last_
delta_percentage_ce = list(((np.array(current_month_ce_count) - np.array(last_month_ce_count )) / np.array(last_month_ce_count)) * 100)
delta_percentage_ce = np.round(delta_percentage_ce, decimals=1)
ce_sum = current_month_ce_count  + last_month_ce_count

print('last_month_sum: {}'.format(np.average(last_month_ce_count)))
print('current_month_ce_sum: {}'.format(np.average(current_month_ce_count)))
print('delta_ce_sum: {}'.format(np.average(delta_ce)))

last_month_sum: 10.0
current_month_ce_sum: 5.0
delta_ce_sum: -18.166666666666668
