In [None]:
!pip install ciw 
!pip install chart_studio
!pip install plotly

In [45]:
import ciw
from ciw.dists import *
import math
import json
from collections import defaultdict
import chart_studio.plotly as py
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display, Markdown, HTML
from sklearn.neighbors import KernelDensity

import datetime
import zipfile

import pickle
import pandas as pd
import numpy as np
import random

from typing import List, Dict
from ciw.simulation import Simulation

import plotly
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
######################################################
# Constants

TICKS_IN_HOUR = 60
TICKS_IN_DAY = 24 * TICKS_IN_HOUR
TICKS_IN_YEAR = 365 * TICKS_IN_DAY
CUR_COLOR = '#2E7D32'
PREV_COLOR = '#A5D6A7'
INDEX_TRIAGE_NODE = 1
INDEX_INV_NODE = 2

In [273]:
def calc_throughput(_q: Simulation) -> float:
    '''
    Calculate and return the throughput of a given Simulation
    '''
    return len(_q.get_all_records()) / _q.current_time
    
def calc_utilization(_q: Simulation, util_index: int=INDEX_TRIAGE_NODE) -> int:
    '''
    Calculate and return the utilization of a node given a Simulation object.
    
    NOTE: Will detect if it was supplied an Exit Node (which has no utilization),
    and just return None. Makes the code calling this function less complex.
    '''
    if str(_q.nodes[util_index]) == 'Exit Node': 
        return None
    return _q.nodes[util_index].server_utilisation * 100

def make_indicator(current: int, previous: int=None, gauge: bool=False, suffix='/min') -> plotly.graph_objs.Indicator:
    '''
    Makes a plotly indicator figure. Callers can draw gauges, and specify suffixes.
    '''
    kwargs = {"mode": "number+delta",
              "value": current,
              "number": {'suffix': suffix},
              "domain": {'x': [0, 1], 'y': [0, 1]}}
              
    if previous is not None:
        kwargs["delta"] = {'position':'bottom','reference': previous}
              
    if gauge:
        kwargs['mode'] = "gauge+number+delta" if previous else "gauge+number"
        kwargs['gauge'] = {'axis': {'range': [0, 100]},
                           'steps' : [ {'range': [0, 70], 'color': "lightgray"},
                                       {'range': [70, 90], 'color': "gray"}],
                            'threshold' : { 'line': {'color': "red", 'width': 4}, 
                                             'thickness': 0.75, 'value': 90}}
    return go.Indicator(**kwargs)

def build_df(q:Simulation) -> pd.DataFrame:
    '''
    Build a DataFrame of the various values we need. The DataFrame is used by all other code
    to build various aspects of the dashboard.
    '''
    df = pd.DataFrame([{
        'date': datetime.datetime.now() + datetime.timedelta(minutes=rec.arrival_date),
        'exit_date': rec.exit_date,
        'arrival_date': rec.arrival_date,
        'latency': rec.exit_date -  rec.arrival_date,
        'customer_class': rec.customer_class,
        'wait_time': rec.waiting_time 
    } for rec in  q.get_all_records()])
    df['date'] = pd.to_datetime(df['date']).dt.round('s')
    df.sort_values(by='date', inplace=True)
    return df

def make_histogram(values: List, name: str, marker_color: str, nbins:int= 20, bingroup=None, **kwargs) -> plotly.graph_objs.Histogram:
    '''
    Make a histogram figure. Will return max count which can be used to draw slo lines.
    '''
    counts, _ = np.histogram(values, bins=nbins)
    return (go.Histogram(x=values, 
                       name=name,
                       marker_color=marker_color,
                       bingroup=bingroup, **kwargs), max(counts))


def make_ts(x, y, line=dict(color=CUR_COLOR,width=1)) -> plotly.graph_objs.Scatter:
    '''
    Make a time series figure.
    '''
    return go.Scatter(x=x, y=y,
                      showlegend=False,
                      line=line)

class Dashboard(object):
    '''
    Class is used to abstract away some of the intricacies of building plotly subplots.
    '''
    def __init__(self, title):
        self.title = title
        self.titles = []
        self.specs = []
        self.figures = []
        self.num_cols = 0
        
    def add_row(self, titles, cell_type, figures, colspan=None):
        '''
        Add a row to the dashboard. This method under the hood, will
        recalculate max number of columns, handle column spanning, and updating
        figure specs.
        '''
        if type(titles) != list:
            titles = [titles]    
        self.titles.extend(titles)
        
        if type(figures) != list:
            figures = [figures]
        
        if self.num_cols < len(titles):
            self.num_cols = len(titles)
        
        if cell_type == 'domain':
            self.specs.append([{"type": cell_type} for i in range(len(titles))])
        elif cell_type == 'xy':
            if colspan is None:
                colspan = self.num_cols 
            tup = (None, ) * (colspan-1)
            
            self.specs.append([{"type": "xy", "colspan": colspan}, *tup])
        else:
            raise Exception(f"Currently dont support cell_type={cell_type}")
        
        for idx, fig in enumerate(figures):
            self.figures.append((fig, len(self.specs), idx+1))
            
    def insert_figure(self, row, col, figure):
        '''
        Add a figure to an already "added row" this is if you are adding
        a trace to overlay lines/bars etc you'll call insert after you've
        called add_row(..)
        '''
        self.figures.append((figure, row, col))
        
    def draw_slo(self, slo, top_y, row, col, showlegend=True):
        '''
        Draw a read line to represent the service level obejctive and
        cap the line using top_y val.
        '''
        self.figures.append((go.Scatter(x=[slo,slo], y=[0,top_y],
                                mode="lines+text",
                                name="SLO",
                                showlegend=showlegend,
                                text=[None, "SLO"],
                                textposition="bottom center",
                                line=dict(color='red',width=3)), row, col))
        
    def build(self):
        '''
        This method is used once you've filled out your plotly subplot. It will
        build the subplot with all the figures and return the main figure object.
        '''
        main_fig = make_subplots(rows=len(self.specs), cols=self.num_cols,
                         subplot_titles=self.titles,
                         specs=self.specs)

        for (fig, row, col) in self.figures:
            main_fig.add_trace(fig, row=row, col=col)   
            
        main_fig.update_layout(barmode="overlay", bargap=0.1, 
                       height=900, width=800,
                       title={ 'text': self.title,
                       'y':0.98, 'x':0.5, 'xanchor': 'center',
                       'font': {'size': 20}, 'yanchor': 'top'})

        
        
        main_fig['layout']['annotations'][0].update({'y':1.03})
        main_fig['layout']['annotations'][1].update({'y':1.03})
        if self.num_cols == 3:
            main_fig['layout']['annotations'][2].update({'y':1.03})
        return main_fig
        

def build_dashboard(q, prev_q=None, wait_time_slo=15, latency_slo=30, title="Run Data", show_slo=False):
    '''
    This method builds the dashboards used throughout the presentation.
    
    Args:
        q (Simulation): The current simulation that was run.
        prev_q (Simulation, optional): The previous simulation run, if specified will contrast current run with previous.
        wait_time_slo (int): The wait time SLO 
        latency_slo (int): The latency SLO
        title (str): Title of the dashboard
        show_slo(bool): If true will draw SLO line on latency and wait time histograms.
        
    Returns:
        A plotly figure that is our simulation dashboard. Callers are responsible for calling "show"
    '''
    prev_throughput = prev_utilization = utilization_inv = ut_inv_indicator = None
    top_y_lat = top_y_lat_prev = top_y_wait = top_y_wait_prev = 0
    
    # Create the dashboard object
    db = Dashboard(title)
    
    # Calculate throughput and utliziation
    throughput = calc_throughput(q)
    utilization = calc_utilization(q)
    
    # Calculate if we have a previous Q
    if prev_q != None:
        prev_throughput = calc_throughput(prev_q)
        prev_utilization = calc_utilization(prev_q)
        
    # Build our indicator images
    tp_indicator = make_indicator(throughput, previous=prev_throughput)
    ut_indicator = make_indicator(utilization, previous=prev_utilization, gauge=True, suffix='%')
    
    # Initialize our figures, and titles this currently assumes we aren't using a simulation with "tiering"
    figures = [tp_indicator, ut_indicator]
    titles = ["Throughput","Utilization"]
    
    # If we have 5 nodes we are in a simulation with tiering and want to break out by Triage and Investigation
    if len(q.nodes) == 5:
        # Update titles, figures, and build utilization indicator figure
        titles = ["Throughput","Utilization (Triage)", "Utilization (Investigation)"]
        utilization_inv = calc_utilization(q, util_index=INDEX_INV_NODE)
        ut_inv_indicator = make_indicator(utilization_inv, previous=calc_utilization(prev_q, util_index=INDEX_INV_NODE), gauge=True, suffix='%')
        figures.append(ut_inv_indicator)

    # Add the rows
    db.add_row(titles, "domain", figures)
    
    # Build dataframe of current simulation
    df_q = build_df(q)

    # Make latency histogram
    h1, top_y_lat = make_histogram(df_q[(df_q['exit_date'] > df_q['arrival_date'])]['latency'].values, "Current", CUR_COLOR, nbins=20, bingroup=1, nbinsx=20)
    db.add_row("Latency Distribution", "xy", h1)
    
    # Make wait time histogram
    h2, top_y_wait = make_histogram(df_q[(df_q['wait_time'] > 0)]['wait_time'], "Current", CUR_COLOR, nbins=20, bingroup=2, showlegend=False)
    db.add_row("Wait Time Distribution", "xy", h2)
            
    # Build arrival rate scatter plot
    arrival = pd.Series(np.ones(len(df_q)), index=df_q['date']).resample('5T').sum()
    ts1 = make_ts(arrival.index, arrival)
    db.add_row("Arrival Rate", "xy", ts1)
    
    # Build latency scatter plot
    ts1 = make_ts(df_q['date'], df_q['latency'])
    db.add_row("Latency", "xy", ts1)
    
    if prev_q is not None:
        # If we have a previous q object, then do the same with that using the PREV_COLOR to differentiate
        df_pq = build_df(prev_q)
        ph1, top_y_lat_prev = make_histogram(df_pq[(df_pq['exit_date'] > df_pq['arrival_date'])]['latency'].values, "Prev", PREV_COLOR, nbins=20, bingroup=1, nbinsx=20)
        db.insert_figure(row=2, col=1, figure=ph1)
        
        # Add wait time
        ph2, top_y_wait_prev = make_histogram(df_pq[(df_pq['wait_time'] > 0)]['wait_time'], "Prev", PREV_COLOR, nbins=20, bingroup=2, showlegend=False)
        db.insert_figure(row=3, col=1, figure=ph2)
        
        # Add arrival times overlay
        arrival = pd.Series(np.ones(len(df_pq)), index=df_pq['date']).resample('5T').sum()
        ts2 = make_ts(arrival.index, arrival, line=dict(color=PREV_COLOR,width=1))
        db.insert_figure(row=4, col=1, figure=ts2)
        
        # Add latency overlay
        ts2 = make_ts(df_pq['date'], df_pq['latency'], line=dict(color=PREV_COLOR,width=1))
        db.insert_figure(row=5, col=1, figure=ts2)
    
    if show_slo:
        db.draw_slo(latency_slo, max([top_y_lat, top_y_lat_prev]), row=2, col=1)
        db.draw_slo(wait_time_slo, max([top_y_wait, top_y_wait_prev]), row=3, col=1, showlegend=False)

    
    return db.build()

# All your queues belong to us: Optimizing Human in the Loop
* Matt Peters (CPO)
* Peter Silberman (CTO)
![title foo](img/expel_logo.png "Title")

<h1>Introduction</h1>

What we're going to cover in this talk:

<ul>
    <li>General Idea of Queues / Queueing</li>
    <li>CIW Simulation Framework</li>
    <li>Application to Security Operations</li>
</ul>

<h1>Queues Explained - Parameters</h1>

![caption](img/queue-intro.png)

There are several parameters that define a queueing system
<ul>
    <li><b>Arrival Rate</b> - &lambda; - How fast does work arrive?</li>
    <li><b>Service Rate</b> - &mu; - How long does each job take to do?</li>
    <li><b>Number of servers</b> - How many jobs can we do at the same time</li>
    <li><b>Classes of service</b> - what order to we process work in?</li>
</ul>

![caption](img/queue-intro-measurement.png)

There are several measurement points you want to have
<ul>
    <li><b>[2] - [1]</b> - How long is a job waiting to be serviced?
    <li><b>[3] - [4]</b> - How long is a job taking to be serviced?
</ul>

<h1>Queues Explained - Observation</h1>

![caption](img/arrival-times.png)    
    
If we're in the middle, then we can try to optimize for:

<ul>
    <li><b>Throughput</b> - How many jobs per unit time can we do?</li>
    <li><b>Latency</b> - What's the end-to-end time for a job to travel through the system?</li>
    <li><b>Utilization</b> - How busy is each server in our system?</li>
</ul>



<h1>Queues In The Real World</h1>

![caption](img/queue-stock.jpg)

<h1>Simulating Queues - A Simple Example</h1>

<img src="img/queue-demo-1-new.png" alt="drawing"/>


In [47]:
network = ciw.create_network(arrival_distributions=[ Deterministic(4) ], 
                             service_distributions=[ Deterministic(5) ], 
                             number_of_servers=[1]) 

Q1 = ciw.Simulation(network)
Q1.simulate_until_max_time(1440)

In [274]:
dashboard = build_dashboard(Q1, title='Arrival Rate (4) = Service Rate (5)')
dashboard.show()

<h1>What Happens if arrival or service aren't constant?</h1>

<img src="img/queue-random-functions-new.png" alt="drawing"/>



In [49]:
# Note: We changed our distributions below -- they are now exponentially distributed around
#       means of 4 and 5 min, respectively.
network = ciw.create_network(arrival_distributions=[ ciw.dists.Exponential(1.0/5) ],
                             service_distributions=[ ciw.dists.Exponential(1.0/4) ],
                             number_of_servers=[1])
Q2 = ciw.Simulation(network)
Q2.simulate_until_max_time(1440)

In [275]:
# Display The simulation Results Via Dashboard:
dashboard = build_dashboard(Q2, title='Arrval (Exp(5)) ~= Service Rate (Exp(4))', prev_q=Q1)
dashboard.show()

<h1>So What? How does this apply to me?</h1>

<img src="img/soc-as-queues.png" alt="drawing" width="500" height="500"/>

<b>Your SOC/SRE/Dev team is a queueing system. Understanding the parameters allows you to tune the output</b>

<ul>
    <li><b>Utilization</b> - This is your team, which usually feels like they're at 110%</li>
    <li><b>Throughput</b> - How many things can you triage per unit time</li>
    <li><b>Latency</b> - This is your SLO/SLA - how long until you see something?</li>
</ul>



<h1>Our Example SOC</h1>

<img src="img/our-soc-new.png" alt="drawing" width="700"/>

    with zipfile.ZipFile("arr_times_csv.zip").open('arr_times.csv') as fd:
        for line in fd.readlines()[:3]:
            print(line)

<h1>Building A Historical Distribution</h1>

<img src="img/historical-distribution.png" alt="drawing" width="700"/>

# A Historical Distribution
    class HistoricalDistribution(ciw.dists.Distribution):
        def __init__(self, sev='TOTAL',
                           filename='arr_times.pickle', 
                           adjustment_factor=0.0):
                           
            self.dists = defaultdict(dict)

            counts = get_arr_counts(filename, sev=sev)
            for day in counts.keys():
                for hr, avg_per_min in counts[day].items():
                    self.dists[day][hr] = ciw.dists.Exponential(avg_per_min - \
                                                (avg_per_min * adjustment_factor))

        def sample(self, t, ind=None):
            day    = math.floor((t / TICKS_IN_DAY) % 7)
            hour   = math.floor((t / TICKS_IN_HOUR) % 24)
            sample = self.dists[day][hour].sample(t,ind)
            return sample



<h1>Modeling A Service Distribution</h1>

<img src="img/workflow.png" alt="drawing" width="700"/>

<img src="img/service-dist-code.png" alt="drawing" width="700"/>

# Putting That All Together


    TIME_TO_TRIAGE      = 4
    TIME_TO_INVESTIGATE = 20
    TIME_TO_REPORT      = 10
    NUM_SOC_WORKERS     = 1
    SIMULATION_TIME     = 24*60

    class HistoricalDistribution(ciw.dists.Distribution):

        def __init__(self, sev='TOTAL', 
                     filename='arr_times.pickle', 
                     adjustment_factor=0.0):
                     
            self.dists = defaultdict(dict)

            counts = get_arr_counts(filename, sev=sev)
            for day in counts.keys():
                for hr, avg_per_min in counts[day].items():
                    rate = avg_per_min - (avg_per_min * adjustment_factor)
                    self.dists[day][hr] = Exponential(rate)

        def sample(self, t, ind=None):
            day    = math.floor((t / TICKS_IN_DAY) % 7)
            hour   = math.floor((t / TICKS_IN_HOUR) % 24)
            sample = self.dists[day][hour].sample(t,ind)
            return sample

    class ServiceDistribution(ciw.dists.Distribution):
        def __init__(self, avg_time_to_triage, 
                     avg_time_to_investigate,
                     avg_time_to_report, 
                     prob_of_inv=.2):
                     
            self.time_to_triage      = Exponential(1/avg_time_to_triage)
            self.time_to_investigate = Exponential(1/avg_time_to_investigate)
            self.time_to_report      = Exponential(1/avg_time_to_report)
            self.prob_of_inv         = prob_of_inv

        def sample(self, t, ind=None):
            total_time = self.time_to_triage.sample()
            if random.random() < self.prob_of_inv:
                total_time += self.time_to_investigate.sample()
                total_time += self.time_to_report.sample()

            return total_time            
            
    svc_d = ServiceDistribution(TIME_TO_TRIAGE,
                                TIME_TO_INVESTIGATE,
                                TIME_TO_REPORT)

    # Create and Run the Simulation
    network = ciw.create_network(arrival_distributions=[ HistoricalDistribution() ],
                                 service_distributions=[ svc_d ],
                                 number_of_servers=[ NUM_SOC_WORKERS ])

    Q_basic_soc = ciw.Simulation(network)
    Q_basic_soc.simulate_until_max_time(SIMULATION_TIME)
    
    dashboard = build_dashboard(Q_basic_soc, title='Basic SOC')
    dashboard.show()

In [51]:
TIME_TO_TRIAGE      = 4
TIME_TO_INVESTIGATE = 20
TIME_TO_REPORT      = 10
NUM_SOC_WORKERS     = 1
SIMULATION_TIME     = 24*60

def get_arr_counts(filename, sev='TOTAL'):
    '''
    Loads pickle file with alert rate counts. 
    '''
    with open(filename, 'rb') as fd:
        dd = pickle.load(fd)
        # DD = {<day of week>: {<hour 1>: [<count week 1 for day of week and hour 1>, 
        #                                 < count week 2 for day of week and hour 1>, 
        #                                 ..., ...]}

    # Build look up that is keyd on day of week -> hour -> counts
    counts = {day: {hour : 0.0 for hour in range(24)} for day in range(7)}    
    # Iterate over pickle file, we have day as an index, and then sub index of hours : counts
    for (day, hr_sevs) in dd.items():
        for hr in hr_sevs.keys():
            # Grab the list of counts observed for that day going back possibly 52 weeks
            cnts = dd[day][hr][sev]
            # Calculate arrival rate
            counts[day][hr] =  ((sum(cnts) / 3120)) # 52*60
    return counts


class HistoricalDistribution(ciw.dists.Distribution):
    '''
    Object is used by CIW to simulate our arrival rates
    '''
    def __init__(self, sev='TOTAL', filename='arr_times.pickle', adjustment_factor=0.0):
        self.dists = defaultdict(dict)
        
        # Get counts from historical distribution
        counts = get_arr_counts(filename, sev=sev)
        # Build our Expeonential distributions off the arrival rate
        for day in counts.keys():
            for hr, avg_per_min in counts[day].items():
                self.dists[day][hr] = ciw.dists.Exponential(avg_per_min -(avg_per_min * adjustment_factor) )
        
    def sample(self, t, ind=None):
        # Method called by CIW
        day    = math.floor((t / TICKS_IN_DAY) % 7)
        hour   = math.floor((t / TICKS_IN_HOUR) % 24)
        sample = self.dists[day][hour].sample(t,ind)
        return sample

class ServiceDistribution(ciw.dists.Distribution):
    '''
    Object used by queue to simulate service times 
    '''
    def __init__(self, avg_time_to_triage, avg_time_to_investigate,
                 avg_time_to_report, prob_of_inv=.2):
        self.time_to_triage      = Exponential(1/avg_time_to_triage)
        self.time_to_investigate = Exponential(1/avg_time_to_investigate)
        self.time_to_report      = Exponential(1/avg_time_to_report)
        self.prob_of_inv         = prob_of_inv

    def sample(self, t, ind=None):
        total_time = self.time_to_triage.sample()
        if random.random() < self.prob_of_inv:
            total_time += self.time_to_investigate.sample()
            total_time += self.time_to_report.sample()

        return total_time            

network = ciw.create_network(arrival_distributions=[ HistoricalDistribution() ],
                             service_distributions=[ 
                                     ServiceDistribution(
                                                     TIME_TO_TRIAGE,
                                                     TIME_TO_INVESTIGATE,
                                                     TIME_TO_REPORT) 
                                     ],
                             number_of_servers=[ NUM_SOC_WORKERS ])

Q_basic_soc = ciw.Simulation(network)
Q_basic_soc.simulate_until_max_time(SIMULATION_TIME)

In [276]:
dashboard = build_dashboard(Q_basic_soc, title='Basic SOC', show_slo=True)
dashboard.show()

<h1>Our Example SOC - Adding Capacity</h1>

<img src="img/our-soc-capacity.png" alt="drawing" width="600"/>

Here we add staff, so the number of servers (SOC workers) increases.

    NUM_SOC_WORKERS = 2

    network = ciw.create_network(arrival_distributions=[ HistoricalDistribution() ],
                                 service_distributions=[ 
                                     ServiceDistribution(TIME_TO_TRIAGE,
                                                         TIME_TO_INVESTIGATE,
                                                         TIME_TO_REPORT) 
                                 ],
                                 number_of_servers=[ NUM_SOC_WORKERS ])

    Q = ciw.Simulation(network)

    Q.simulate_until_max_time(SIMULATION_TIME)

In [53]:
NUM_SOC_WORKERS = 2

network = ciw.create_network(arrival_distributions=[ HistoricalDistribution() ],
                             service_distributions=[ ServiceDistribution(TIME_TO_TRIAGE,
                                                                         TIME_TO_INVESTIGATE,
                                                                         TIME_TO_REPORT) ],
                             number_of_servers=[ NUM_SOC_WORKERS ])

Q_add_cap = ciw.Simulation(network)

Q_add_cap.simulate_until_max_time(SIMULATION_TIME)

In [277]:
dashboard = build_dashboard(Q_add_cap, prev_q=Q_basic_soc, title='Added Capacity vs Basic SOC', show_slo=True)
dashboard.show()

<h1>Our Example SOC - Training</h1>

<img src="img/our-soc-training.png" alt="drawing" width="700"/>

Here we add training, so the probability of investigation is reduced by 10%, as well as the time it takes by 20%.

    TIME_TO_INVESTIGATE  *= .20
    TIME_TO_REPORT       *= .20
    PROB_OF_INVESTIGATION = .18

    network = ciw.create_network(arrival_distributions=[ HistoricalDistribution() ],
                                 service_distributions=[ 
                                         ServiceDistribution(TIME_TO_TRIAGE,
                                                             TIME_TO_INVESTIGATE,
                                                             TIME_TO_REPORT,
                                                             PROB_OF_INVESTIGATION) 
                                                  ],
                                number_of_servers=[ NUM_SOC_WORKERS ])

    Q = ciw.Simulation(network)

    Q.simulate_until_max_time(simulation_time)

In [55]:
TIME_TO_INVESTIGATE  *= .20
TIME_TO_REPORT       *= .20
PROB_OF_INVESTIGATION = .18

network = ciw.create_network(arrival_distributions=[ HistoricalDistribution() ],
                             service_distributions=[ 
                                             ServiceDistribution(TIME_TO_TRIAGE,
                                                                 TIME_TO_INVESTIGATE,
                                                                 TIME_TO_REPORT,
                                                                 PROB_OF_INVESTIGATION) ],
                            number_of_servers=[ NUM_SOC_WORKERS ])

Q_train = ciw.Simulation(network)
Q_train.simulate_until_max_time(SIMULATION_TIME)

In [278]:
dashboard = build_dashboard(Q_train, prev_q=Q_add_cap, title='Added Capacity vs Training', show_slo=True)
dashboard.show()

<h1>Our Example SOC - Tuning</h1>

<img src="img/our-soc-tuning.png" alt="drawing" width="800"/>

In [57]:
ADJUSTMENT_FACTOR = .15
PROB_OF_INVESTIGATION  = 0.2 * .1
network = ciw.create_network(arrival_distributions=[ HistoricalDistribution(adjustment_factor=ADJUSTMENT_FACTOR) ],
                             service_distributions=[ ServiceDistribution(TIME_TO_TRIAGE,
                                                                         TIME_TO_INVESTIGATE,
                                                                         TIME_TO_REPORT,
                                                                         PROB_OF_INVESTIGATION) ],

                             number_of_servers=[ NUM_SOC_WORKERS ])

Q_tuning = ciw.Simulation(network)
Q_tuning.simulate_until_max_time(SIMULATION_TIME)

In [279]:
dashboard = build_dashboard(Q_tuning, prev_q=Q_train, show_slo=True, title='Tuning vs Training')
dashboard.show()

<h1>Our Example SOC - Service Classes</h1>

<img src="img/our-soc-classes.png" alt="drawing" width="700"/>

Here we add service classes, so our alerts are now associated with a severity (HIGH, MED, LOW) which are each handled in order.

    priority_classes={ 'Class 0' : 0,
                       'Class 1' : 1,
                       'Class 2' : 2 }

    arrival_distributions = { 'Class 0' :  [ HistoricalDistribution(sev='HIGH', 
                                                    adjustment_factor=ADJUSTMENT_FACTOR) ],
                              'Class 1' :  [ HistoricalDistribution(sev='MEDIUM', 
                                                    adjustment_factor=ADJUSTMENT_FACTOR) ],
                              'Class 2' :  [ HistoricalDistribution(sev='LOW', 
                                                    adjustment_factor=ADJUSTMENT_FACTOR) ] }


    service_distributions={ 'Class 0' : [ ServiceDistribution(TIME_TO_TRIAGE,
                                                                TIME_TO_INVESTIGATE,
                                                                TIME_TO_REPORT,
                                                                PROB_OF_INVESTIGATION) ],
                            'Class 1' : ServiceDistribution(TIME_TO_TRIAGE,
                                                                TIME_TO_INVESTIGATE,
                                                                TIME_TO_REPORT,
                                                                PROB_OF_INVESTIGATION) ],
                            'Class 2' : ServiceDistribution(TIME_TO_TRIAGE,
                                                                TIME_TO_INVESTIGATE,
                                                                TIME_TO_REPORT,
                                                                PROB_OF_INVESTIGATION) ] }

    network = ciw.create_network(arrival_distributions=arrival_distributions,
                                 service_distributions=service_distributions,
                                 priority_classes=priority_classes,
                                 number_of_servers=[ NUM_SOC_WORKERS ])

    Q = ciw.Simulation(network)
    Q.simulate_until_max_time(SIMULATION_TIME)

In [59]:
priority_classes={ 'Class 0' : 0,
                   'Class 1' : 1,
                   'Class 2' : 2 }

arrival_distributions = { 'Class 0' :  [ HistoricalDistribution(sev='HIGH', adjustment_factor=ADJUSTMENT_FACTOR) ],
                          'Class 1' :  [ HistoricalDistribution(sev='MEDIUM', adjustment_factor=ADJUSTMENT_FACTOR) ],
                          'Class 2' :  [ HistoricalDistribution(sev='LOW', adjustment_factor=ADJUSTMENT_FACTOR) ] }


service_distributions={ 'Class 0' : [ ServiceDistribution(TIME_TO_TRIAGE, TIME_TO_INVESTIGATE,
                                                          TIME_TO_REPORT, PROB_OF_INVESTIGATION) ],
                        'Class 1' : [ ServiceDistribution(TIME_TO_TRIAGE, TIME_TO_INVESTIGATE,
                                                          TIME_TO_REPORT, PROB_OF_INVESTIGATION) ],
                        'Class 2' : [ ServiceDistribution(TIME_TO_TRIAGE, TIME_TO_INVESTIGATE,
                                                          TIME_TO_REPORT, PROB_OF_INVESTIGATION) ] }

network = ciw.create_network(arrival_distributions=arrival_distributions,
                             service_distributions=service_distributions,
                             priority_classes=priority_classes,
                             number_of_servers=[ NUM_SOC_WORKERS ])

Q_svc_cls = ciw.Simulation(network)
Q_svc_cls.simulate_until_max_time(SIMULATION_TIME)

In [283]:
dashboard = build_dashboard(Q_svc_cls, prev_q=Q_tuning, show_slo=True, title='Service Classes vs Tuning')
dashboard.show()

df = build_df(Q_svc_cls)

fig = go.Figure(data=[
    go.Histogram(x=df[df['customer_class'] == 0]['latency'], name='high severity'),
    go.Histogram(x=df[df['customer_class'] == 2]['latency'], name='low severity')])
fig.update_layout(title='Latency of high vs low severity alerts')
fig.show()

<h1>Our Example SOC - Tiering</h1>

<img src="img/our-soc-tiering-img.png" alt="drawing" width="800"/>
<img src="img/our-soc-tiering-code.png" alt="drawing" width="800"/>

In [61]:
class InvestigationDistribution(ciw.dists.Distribution):
    def __init__(self, avg_time_to_investigate, avg_time_to_report):
        self.time_to_investigate = ciw.dists.Exponential(1/avg_time_to_investigate)
        self.time_to_report = ciw.dists.Exponential(1/avg_time_to_report)

    def sample(self, t, ind=None):
        return self.time_to_investigate.sample() + self.time_to_report.sample()

number_of_servers = [ 2, 1, 1 ]

#arrival_distributions = [ HistoricalDistribution(ADJUSTMENT_FACTOR), NoArrivals(), NoArrivals() ]

arrival_per_class = { 'Class 0' : [HistoricalDistribution(sev='HIGH', adjustment_factor=ADJUSTMENT_FACTOR), NoArrivals(), NoArrivals() ],
                      'Class 1' : [HistoricalDistribution(sev='MEDIUM', adjustment_factor=ADJUSTMENT_FACTOR), NoArrivals(), NoArrivals() ],
                      'Class 2' : [HistoricalDistribution(sev='LOW', adjustment_factor=ADJUSTMENT_FACTOR), NoArrivals(), NoArrivals() ] }

service_distributions = [ Exponential(1/TIME_TO_TRIAGE), 
                          InvestigationDistribution(TIME_TO_INVESTIGATE,TIME_TO_REPORT), 
                          Deterministic(0) ]

service_per_class = { 'Class 0' : service_distributions,
                      'Class 1' : service_distributions,
                      'Class 2' : service_distributions }

std_routing = [ [0.0, PROB_OF_INVESTIGATION, 1-PROB_OF_INVESTIGATION ],
                [0.0, 0.0, 1.0],
                [0.0, 0.0, 0.0] ]

routing_matrix = { 'Class 0' : std_routing, 
                   'Class 1' : std_routing,
                   'Class 2' : std_routing }

network = ciw.create_network(arrival_distributions=arrival_per_class,
                             service_distributions=service_per_class,
                             routing=routing_matrix,
                             priority_classes=priority_classes,
                             number_of_servers=number_of_servers)

Q_tier = ciw.Simulation(network)
Q_tier.simulate_until_max_time(SIMULATION_TIME)

In [281]:
dashboard = build_dashboard(Q_tier, prev_q=Q_svc_cls, show_slo=True, title='Tiering vs Service Classes')
dashboard.show()

<h1>Our Example SOC - Automation</h1>

<img src="img/our-soc-automated.png" alt="drawing" width="800"/>

In [63]:
class AutoInvestigationDistribution(ciw.dists.Distribution):

    def __init__(self, avg_time_to_investigate, avg_time_to_report,
                 prob_of_automation=0):
        self.time_to_investigate = ciw.dists.Exponential(1/avg_time_to_investigate)
        self.time_to_report      = ciw.dists.Exponential(1/avg_time_to_report)
        self.prob_of_automation  = prob_of_automation

    def is_automated(self):
        return random.random() >= self.prob_of_automation

    def sample(self, t, ind=None):
        if self.is_automated():
            return 0
        return self.time_to_investigate.sample() + self.time_to_report.sample()

PROB_OF_AUTOMATION = 0.4

service_distributions = [ Exponential(1/TIME_TO_TRIAGE), 
                          AutoInvestigationDistribution(TIME_TO_INVESTIGATE,
                                                                    TIME_TO_REPORT,
                                                                    PROB_OF_AUTOMATION), 
                          Deterministic(0) ]


service_per_class = { 'Class 0' : service_distributions,
                      'Class 1' : service_distributions,
                      'Class 2' : service_distributions }

network = ciw.create_network(arrival_distributions=arrival_per_class,
                             service_distributions=service_per_class,
                             routing=routing_matrix,
                             priority_classes=priority_classes,
                             number_of_servers=number_of_servers)

Q_auto = ciw.Simulation(network)
Q_auto.simulate_until_max_time(SIMULATION_TIME)

In [282]:
dashboard = build_dashboard(Q_auto, prev_q=Q_tier, show_slo=True, title='Automation vs Tiering')
dashboard.show()

<h1>Conclusion</h1>
<img src="img/themoreyouknow.png" alt="drawing" width="800"/>
<ul>
    <li>Understand what you're optimizing for</li>
    <li>Change one thing at a time to observe effects</li>
    <li>Use this to guide real-world modifications of your process</li>
</ul>
