# Exercise: Write some batch processing code

## Set correct directory

In [1]:
cd ..

/bptk-py


## Import libraries

In [2]:
import pandas as pd
import csv
import json
import time
import datetime
import pickle
from elasticsearch import Elasticsearch
import os.path
from src.config.conf import width,height
from src.setup import setup_model
from ipywidgets import widgets
from ipywidgets import IntSlider

## Obtain Number Of Taxis

In [3]:
with open('scenarios/abm.json') as scenario_file:
    abm = json.load(scenario_file)

num_cars=abm["CARMODEL"]["scenarios"]["scenario"]["agents"][1]["count"]
num_cars

3

## Current Simulation Time

In [5]:
# read the time
time_file_object = open("csv/sim_time.pickle",'rb')  
current_time = pickle.load(time_file_object)
current_time

10080

## Reading and writing to Elasticsearch

In [7]:
es = Elasticsearch([{'host': 'es_node1', 'port': 9200}])

## example elastic search query
es.search(index="1_car",body={"size": 0,
                "aggs" : {
                    "sum_profit" : {
                        "date_range": {
                            "field": "time",
                            "ranges": [
                                { "from": 1,
                                  "to": 10 } 
                            ]
                        },
                        "aggs": {
                            "sum_cost": {
                              "sum": {
                                "field": "cost"
                              }
                            }
                        }
                    }
                }})

{'took': 930,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 10000, 'relation': 'gte'},
  'max_score': None,
  'hits': []},
 'aggregations': {'sum_profit': {'buckets': [{'key': '1.0-10.0',
     'from': 1.0,
     'to': 10.0,
     'doc_count': 9,
     'sum_cost': {'value': 1.8000000268220901}}]}}}

In [8]:
## writing to a (new) index

es.index(index="test_index", body={"id":"123","timestamp": datetime.datetime.now(), "name":"oliver grasl"})

{'_index': 'test_index',
 '_type': '_doc',
 '_id': '59fajW4BT3mDCAC2q6UU',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 0,
 '_primary_term': 1}

## Calculate Batch Results

Write some batch processing code that processes batches of 1440 entries (i.e. every 1440 timesteps) in each of the Elasticsearch *_car indexes and sends the following data strucutre to the corresponding *_car_batch index in Elasticsearch:

```
{
    "time":current_sim_time, # current timestep
    "timestamp": datetime.datetime.now(), # current time
    "id":car_id, # car id
    "sum_revenue":sum_revenue, # the sum of this cars revenue up to this timestep
    "sum_cost":sum_cost, # the sum of this cars cost up to this timestep
    "profit":sum_profit, # differnece bzw. revenue and cost
    "avg_sum_revenue":avg_sum_revenue, # average revenue per timestep
    "avg_sum_cost":avg_sum_cost, # average cost per timestep
    "avg_profit":avg_sum_profit # average profit per timestep
}
```

HINT: it takes some time for 1440 new entries to accumulate in the *_car indices ... so make sure you don't check the current simulation time too often.