# atoti Value-at-Risk benchmark

This notebook is a benchmark template used to expose atoti performances over ClickHouse by reproducing a benchmark detailed in a [blog post by Altinity on calculating Value-at-Risk (VaR)](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management). Check out the [full ](https://www.activeviam.com/blog/atoti-for-risk-management-aggregation-comparison-with-the-clickhouse-benchmark/)

In [None]:
import atoti
import time

In [None]:
config = atoti.config.create_config(java_args=["-XX:MaxDirectMemorySize=12G"], )
session = atoti.create_session("RiskBenchmark", config=config)

In [None]:
# Load risk dataset
dataset = session.read_parquet("dataset.parquet", partitioning="hash32(str0)")

In [None]:
cube = session.create_cube(dataset)

In [None]:
cube.schema

In [None]:
# Disable the aggregate cache
cube.aggregates_cache.capacity = -1

In [None]:
# Define vector aggregation and value at risk
m = cube.measures
lvl = cube.levels
m["PnlVector"] = atoti.agg.sum(dataset["arrFloat"])
m["ValueAtRisk"] = atoti.array.quantile(m["PnlVector"], q=0.95)

# Create a hierarchy to access vector elements
cube.create_static_parameter_hierarchy("Scenarios", list(range(0, 1000)))
m["PnL at index"] = m["PnlVector"][lvl["Scenarios"]]

In [None]:
session.load_all_data()

In [None]:
session.url

In [None]:
# Number of facts in the cube
cube.query(m["contributors.COUNT"])

### Query Benchmark

In [None]:
def Q1():
    return cube.query(m["ValueAtRisk"], levels=[lvl["str0"]], mode='raw')

def Q2():
    return cube.query(m["ValueAtRisk"], levels=[lvl["str0"], lvl["str1"], lvl["int10"], lvl["int11"], lvl["dttime10"], lvl["dttime11"]], mode='raw')

def Q3():
    return cube.query(m["ValueAtRisk"], levels=[lvl["str0"], lvl["str1"], lvl["str2"], lvl["str3"], lvl["int10"], lvl["int11"], lvl["int12"], lvl["int13"], lvl["dttime10"], lvl["dttime11"], lvl["dttime12"], lvl["dttime13"]], mode='raw')

def Q4():
    return cube.query(m["PnL at index"], levels=[lvl["str0"], lvl["Scenarios"]], condition=(lvl["str1"] == "KzORBHFRuFFOQm"), mode='raw')

# Iterations
iterations = 10
    
def benchmark(name, task):
    # Warmup
    for w in range(10):
        task()

    start = time.time()
    for i in range(iterations):
        task()
    elapsed = time.time() - start

    print(f"{name} average query time {round(1000*elapsed/iterations)} ms")

In [None]:
benchmark("Q1", Q1)

In [None]:
benchmark("Q2", Q2)

In [None]:
benchmark("Q3", Q3)

In [None]:
benchmark("Q4", Q4)

### Launch queries manually

In [None]:
%%time
Q1()

In [None]:
%%time
Q2()

In [None]:
%%time
Q3()

In [None]:
%%time
Q4()