# Benchmarking

## Imports

In [1]:
import numpy as np
import pandas as pd
import posixpath
import subprocess
from advanced_databases_project import data, preprocessing, benchmarking, DATA_PATH, OUTPUT_PATH, PROJECT_PATH, PROMETHEUS_OPENMETRICS_PATH

## Load Data

In [2]:
filename = "preprocessed_aws_1hour.csv"
filepath = posixpath.join(OUTPUT_PATH, filename)
data_df = data.load_data_csv(filepath=filepath)
data_df

Loading data from c:\users\derar\documents\advanced databases project\code\outputs/preprocessed_aws_1hour.csv..
Data loaded successfully


Unnamed: 0_level_0,air_pressure,air_temperature,relative_humidity,precipitation
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-12-01 00:00:00,987.84,8.32,96.37,1.20
2003-12-01 01:00:00,987.31,8.27,96.48,1.10
2003-12-01 02:00:00,986.98,8.35,96.90,0.50
2003-12-01 03:00:00,986.56,8.86,97.23,0.10
2003-12-01 04:00:00,986.25,9.69,95.93,0.10
...,...,...,...,...
2024-11-19 11:00:00,979.99,8.57,96.55,2.72
2024-11-19 12:00:00,980.17,8.42,94.12,0.48
2024-11-19 13:00:00,981.22,6.59,93.31,1.18
2024-11-19 14:00:00,982.19,5.11,93.40,0.99


## Benchmarking parameters

In [3]:
ns = [1, 5, 10, 20, 50, 100]
Ns = [10, 50, 100, 500, 1000, 5000, 10000, 50000]
ns = [1, 5, 10, 20, 50, 100]
Bs = ["1h", "2h", "5h", "12h", "24h", "48h", "168h"]

params = []
for n in ns:
    for N in Ns:
        if N // n >= 10:
            param = {"n": n, "N": N // n}
            params.append(param)
print(len(params))

35


## Save Data in Openmetrics Format Given Different Parameters

In [4]:
col_name = "air_temperature"

for param in params:
    n = param["n"]
    N = param["N"]

    filename = f"n-{n}_N-{N}.txt"
    filepath = posixpath.join(PROMETHEUS_OPENMETRICS_PATH, filename)
    if -N*(n-1) == 0:
        data_df2 = pd.DataFrame(data_df.iloc[-N*n:][col_name])
    else:
        data_df2 = pd.DataFrame(data_df.iloc[-N*n:-N*(n-1)][col_name])
    
    benchmarking.save_data_openmetrics(data_df2, column=col_name, filepath=filepath)
    for i in range(1, n):
        new_col_name = col_name + str(i)
        if -N*(n-i-1) == 0:
            data_df2.insert(i, new_col_name, np.array(data_df.iloc[-N*(n-i): ][col_name]))
        else:    
            data_df2.insert(i, new_col_name, np.array(data_df.iloc[-N*(n-i): -N*(n-i-1)][col_name]))

        benchmarking.save_data_openmetrics(data_df2, column=new_col_name, filepath=filepath, append=True)    
    

Data saved successfully to c:\users\derar\documents\advanced databases project\code\prometheus\openMetrics/n-1_N-10.txt
Data saved successfully to c:\users\derar\documents\advanced databases project\code\prometheus\openMetrics/n-1_N-50.txt
Data saved successfully to c:\users\derar\documents\advanced databases project\code\prometheus\openMetrics/n-1_N-100.txt
Data saved successfully to c:\users\derar\documents\advanced databases project\code\prometheus\openMetrics/n-1_N-500.txt
Data saved successfully to c:\users\derar\documents\advanced databases project\code\prometheus\openMetrics/n-1_N-1000.txt
Data saved successfully to c:\users\derar\documents\advanced databases project\code\prometheus\openMetrics/n-1_N-5000.txt
Data saved successfully to c:\users\derar\documents\advanced databases project\code\prometheus\openMetrics/n-1_N-10000.txt
Data saved successfully to c:\users\derar\documents\advanced databases project\code\prometheus\openMetrics/n-1_N-50000.txt
Data saved successfully to c