In [1]:
import simpy
import numpy as np
import pandas as pd
from random import expovariate, seed

### ToDos:

- ~~write all simulations results into DataFrame~~
- understand how to choose `a` and `b` for simulations
- rewrite lab tasks here

### Notes

- `a` and `b` should NOT be dependent from `n_request`, since we trying to find from which `n_request`
payload will converge to expected value.

In [2]:
# global constants
N_PROCESSORS = 1
N_REQUESTS = 2000
MAX_QUEUE_LEN = 100
AVG_REQUEST_INTERVAL = 700
AVG_SERVING_TIME = 100
MAX_TIME = 30000
SEED = 42

seed(SEED)

payload_balance = {
    30000: {
        0.1: {'a': 700, 'b': 100},
        0.9: {'a': 104, 'b': 100},
    },
    20000: {
        0.1: {'a': 1100, 'b': 100},
        0.9: {'a': 104, 'b': 100},
    },
    10000: {
        0.1: {'a': 450, 'b': 100},
        0.9: {'a': 104, 'b': 100},
    },
    8000: {
        0.1: {'a': 450, 'b': 100},
        0.9: {'a': 102, 'b': 100},
    },
    6000: {
        0.1: {'a': 450, 'b': 100},
        0.9: {'a': 102, 'b': 100},
    }
}

n_requests = [10, 100, 500, 2000, 4000, 6000, 8000, 10000, 20000, 30000, 50000, 100000, 150000]

In [3]:
class ProcessMonitor:
    ''' Collects the data about running inside process. 
    
    This class does not pretend to be general by any means.
    
    Attributes:
        env(simpy.Environment): to not use the global env each monitored
            object has its own env
        resource(simpy.Resource): resource being monitored
        _serving_time(list of floats): amounts of time that were spend to
            serve request
        _waiting_time(list of floats): amounts of time requests had to wait
            until they would be served
        _entry_time(list of float): times when events arrived at the 
            processor/queue
        _num_rejected(int): amount of rejected request. Would be better to make
            it list of times.
    '''
    def __init__(self, env, resource):
        self.env = env
        self.resource = resource
        
        self._serving_time = []
        self._waiting_time = []
        self._entry_time = []
        self._num_rejected = 0
        
    def collect_data(self, n_requests, avg_request_interval, avg_serving_time, max_time):
        ''' Gets results of simulation with given parameters. 
        
        Args:
            n_requests(int)            : number of jobs/request in simulation
            avg_request_interval(float): in term of model this is `a`
            avg_serving_time(float)    : in term of model this is `b`
            max_time(int)              : time limit of simulation
            
        Note: 
            to start a new simulation you should restart environment by doing
            `env = simpy.Environment(0)`.
        '''
        self.n_requests = n_requests
        self._avg_request_interval = avg_request_interval
        self._avg_serving_time = avg_serving_time
        
        self.env.process(self._generate_requests())
        self.env.run(until=max_time)
        
        return {
            'n_requests': n_requests,
            'avg_request_interval': avg_request_interval,
            'avg_serving_time': avg_serving_time,
            'max_time': max_time,
            'waiting_time': self._waiting_time, 
            'serving_time': self._serving_time,
            'entry_time': self._entry_time,
            'experiment_payload': sum(self._serving_time),
            'n_rejected': self._num_rejected,
        }
    
    def _generate_requests(self):
        for i in range(self.n_requests):
            self.env.process(self._serve())
            t = expovariate(1.0/self._avg_request_interval)
            yield self.env.timeout(t) # wait for the next request to appear      
        
    def _serve(self):
        num_rejected = 0
        arrive = self.env.now
        
        self._entry_time.append(arrive)
        
        if len(self.resource.queue) < MAX_QUEUE_LEN:
            with self.resource.request() as req:
                yield req
                wait_time = self.env.now-arrive
                self._waiting_time.append(wait_time)

                service_time = expovariate(1.0/self._avg_serving_time)
                before_service = env.now
                yield self.env.timeout(service_time) # wait to be served
                self._serving_time.append(service_time)

        else:
            num_rejected += 1

In [4]:
results_list = []

for simulation_time in payload_balance:
    for expected_payload, params in payload_balance[simulation_time].items():
        for nr in n_requests:
            # start simulation from the scratch
            env = simpy.Environment(0)
            k = simpy.Resource(env, capacity=N_PROCESSORS)
            # add process to monitor
            m = ProcessMonitor(env, k)
            # record results
            results_list.append(m.collect_data(nr, params['a'], params['b'], simulation_time))
            results_list[-1]['expected_payload'] = expected_payload

results = pd.DataFrame(results_list)
results.head(5)

Unnamed: 0,avg_request_interval,avg_serving_time,entry_time,expected_payload,experiment_payload,max_time,n_rejected,n_requests,serving_time,waiting_time
0,700,100,"[0, 714.0422010923608, 939.1790459448366, 1872...",0.1,413.755574,30000,0,10,"[2.532883904273889, 25.258618567011354, 112.91...","[0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.3430843401..."
1,700,100,"[0, 1147.2761034633859, 1438.4026640123925, 36...",0.1,3177.538885,30000,0,100,"[119.77899409734852, 16.89862680872698, 41.036...","[0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...."
2,700,100,"[0, 2480.216048905628, 2488.299255303941, 3289...",0.1,4109.36647,30000,0,500,"[197.16977244312196, 127.55469306730419, 76.99...","[0, 0.0, 119.47148666899102, 0.0, 0.0, 0.0, 0...."
3,700,100,"[0, 360.6427988647165, 1745.3348347147403, 189...",0.1,4874.798694,30000,0,2000,"[41.44593433398194, 28.589212059888684, 59.531...","[0, 0.0, 0.0, 0.0, 0.0, 55.54369072802592, 0.0..."
4,700,100,"[0, 53.258103856899886, 268.9683469137225, 175...",0.1,5263.340192,30000,0,4000,"[23.972312636845544, 270.69414858598014, 211.4...","[0, 0.0, 54.98390552915754, 0.0, 0.0, 0.0, 0.0..."


In [5]:
# DataFrame describing payload
p = results.loc[:,['n_requests', 'max_time', 'experiment_payload', 'expected_payload']]

p.loc[:,'exp_payload_percents'] = p.loc[:,'experiment_payload']/p.loc[:,'max_time']

p.loc[:,'diff'] = p.loc[:,'expected_payload'] - p.loc[:,'experiment_payload']/p.loc[:,'max_time']

p[p['max_time']==30000].head(15)

Unnamed: 0,n_requests,max_time,experiment_payload,expected_payload,exp_payload_percents,diff
0,10,30000,413.755574,0.1,0.013792,0.086208
1,100,30000,3177.538885,0.1,0.105918,-0.005918
2,500,30000,4109.36647,0.1,0.136979,-0.036979
3,2000,30000,4874.798694,0.1,0.162493,-0.062493
4,4000,30000,5263.340192,0.1,0.175445,-0.075445
5,6000,30000,2829.254014,0.1,0.094308,0.005692
6,8000,30000,4813.105582,0.1,0.160437,-0.060437
7,10000,30000,5178.681199,0.1,0.172623,-0.072623
8,20000,30000,2092.861438,0.1,0.069762,0.030238
9,30000,30000,5145.927875,0.1,0.171531,-0.071531
