In [None]:
import sys 
sys.path.insert(1, "../")
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
import ast

from workloads.util import use_results, use_dataset

%load_ext autoreload
%autoreload 2

In [None]:
experiment = "stl-yahoo-A4-keys-100-interval-10000-events-200000-queries-200000"

results_dir = get_results(experiment)
dataset_dir = use_dataset(experiment)
print(results_dir)
print(dataset_dir)

In [None]:
#import wandb
#run = wandb.init(project="ralf-stl", entity="ucb-ralf")
#experiment = 'stl-A4-keys-100-interval-10000-events-200000'
#dataset_artifact = run.use_artifact(f'{experiment}:latest', type='dataset')
#results_artifact = run.use_artifact(f'{experiment}-results:latest', type='results')
#dataset_dir = dataset_artifact.download()
#results_dir = results_artifact.download()
#print(dataset_dir)
#print(results_dir)

In [None]:
os.listdir(results_dir)

In [None]:
queries_df = pd.read_csv(f"{dataset_dir}/queries.csv")
events_df = pd.read_csv(f"{dataset_dir}/events.csv")
oracle_df = pd.read_csv(f"{dataset_dir}/oracle_features.csv")
#oracle_df["seasonality"] = oracle_df['seasonality'].dropna().apply(ast.literal_eval)

## Feature Results for Queries 

In [None]:
results = {}
for result_file in os.listdir(results_dir): 
    results_df = pd.read_csv(f"{results_dir}/{result_file}")
    results_df["seasonality"] = results_df['seasonality'].apply(ast.literal_eval)
    #print(results_df["seasonality"][0][0])
    joined_df = queries_df.join(results_df.set_index("key_id"), on="key_id", how="outer", lsuffix='_query', rsuffix='_feature')
    query_result_df = joined_df[joined_df["timestamp_ms_query"] >= joined_df["timestamp_ms_feature"]].set_index("timestamp_ms_feature", drop=False).groupby(by=["query_id"]).max()
    #print(joined_df.columns)
    #print(result_file, query_result_df.columns)
    results[result_file] = query_result_df

## Oracle Feature Results for Queries 

In [None]:
query_oracle_df = queries_df.join(oracle_df.set_index(["key_id", "timestamp_ms"]), on=["key_id", "timestamp_ms"], how="left", lsuffix='_query', rsuffix='_feature').dropna().set_index("query_id").sort_index()

In [None]:
oracle_df['seasonality'][1000]

# Evaluate Approximated Features 

In [None]:
from workloads.stl.stl_util import predict, predict_seasonality

In [None]:
key_id = 3

for key, df in results.items():
    print(df.columns)
    #residuals = df.apply(lambda x: print(x), axis=1)
    residuals = df.apply(lambda x: predict(
        x["value"], 
        x["trend"], 
        x["seasonality"], 
        int(x["timestamp_ms_query"]), 
        int(x["timestamp_ms_feature"]), 
        interval=5
    ), axis=1)
    print(residuals.abs().mean())
    staleness = df["timestamp_ms_query"] - df["timestamp_ms_feature"]
    error = query_oracle_df["trend"] - df["trend"]
    print(key, "staleness", staleness.mean(), "trend error", error.abs().mean())
    
    residuals = df.apply(lambda x: predict(
        int(x["value"]), 
        int(x["trend"]), 
        x["seasonality"], 
        int(x["timestamp_ms_query"]), 
        int(x["timestamp_ms_feature"]), 
        interval=5
    ), axis=1)
    print(residuals.abs().mean())
    
    df["pred_seasonality"] = df.apply(lambda x: predict_seasonality(
        x["seasonality"], 
        int(x["timestamp_ms_query"]), 
        int(x["timestamp_ms_feature"]), 
        interval=5
    ), axis=1)
    df[df["key_id"] == 3]["trend"].plot()

In [None]:
residuals = query_oracle_df.apply(lambda x: predict(
        int(x["value_query"]), 
        int(x["trend"]), 
        x["seasonality"], 
        int(x["timestamp_ms"]), 
        int(x["timestamp_ms"]), 
        interval=5
), axis=1)
print(residuals.abs().mean())