# Raw tss from direct bmw responses
The goal of this notebook is to demonstrate how to parse the raw tss from direct bmw responses.

## Setup




### Imports

In [None]:
from functools import reduce

from core.pandas_utils import *
from core.singleton_s3_bucket import bucket

## Implementation

Let's first take a look at an example response.

In [None]:
EXAMPLE_KEY = "response/BMW/WBY71AW000FM68170/2024-12-02.json"
response = bucket.read_json_file(EXAMPLE_KEY)
response

In [None]:
def parse_raw_ts_from_dict_lst(response:list) -> DF:
    return (
        DF.from_dict(response)
        .pivot(
            index="date_of_value",
            columns="key",
            values="value",
        )
    )
parse_raw_ts_from_dict_lst(response["data"])

In [None]:
responses = bucket.list_responses_keys_of_brand("BMW")
responses

In [None]:
responses_dicts = responses.query("vin == 'WBY1Z610407A12415'")["key"].apply(bucket.read_json_file)
display(responses_dicts)
cat_responses_dicts = reduce(lambda cat_rep, rep_2: cat_rep + rep_2["data"], responses_dicts, [])
display(cat_responses_dicts)

In [None]:
unpivoted_df = DF.from_dict(cat_responses_dicts).drop(columns=["unit", "info"])
unpivoted_df
#raw_ts = parse_raw_ts_from_dict_lst(cat_responses_dicts)

In [None]:
unpivoted_df.drop_duplicates(subset=["date_of_value", "key"]).query("date_of_value == '2024-11-06T15:48:10Z'").pivot(
    index="date_of_value",
    columns="key",
    values="value",
)


In [None]:
unpivoted_df.drop_duplicates(subset=["date_of_value", "key", "value"]).query("date_of_value == '2024-11-06T15:48:10Z'")[["date_of_value", "key"]].value_counts()

In [None]:
unpivoted_df.drop_duplicates(subset=["date_of_value", "key", "value"]).query("date_of_value == '2024-11-06T15:48:10Z'")

In [None]:
unpivoted_df.drop_duplicates(subset=["date_of_value", "value"])["date_of_value"].value_counts(sort=True, ascending=False)

In [None]:
unpivoted_df[unpivoted_df["date_of_value"].duplicated()]

In [None]:
df = (
    unpivoted_df
    .drop_duplicates(subset=["date_of_value", "key"])
    .pivot(index="date_of_value", columns="key", values="value")
)

df

In [None]:
min_date = df.reset_index()["date_of_value"].pipe(pd.to_datetime, format="mixed").min()
max_date = df.reset_index()["date_of_value"].pipe(pd.to_datetime, format="mixed").max()

duration = (max_date - min_date).total_seconds()
freq = len(df) / duration
freq * 3600

In [None]:
unpivoted_df[unpivoted_df.duplicated(subset=["date_of_value", "key"])].to_csv("data_cache/test.csv")

In [None]:
df.count(axis=1).describe()

In [None]:
def parse_responses(responses:DF) -> DF:
    print("reading responses of", responses.name, end="")
    responses_dicts = responses["key"].apply(bucket.read_json_file)
    print(", concatenating...", end="")
    cat_responses_dicts = reduce(lambda cat_rep, rep_2: cat_rep + rep_2["data"], responses_dicts, [])
    print("Parsing reps.")
    return (
        DF.from_dict(cat_responses_dicts)
        .drop(columns=["unit", "info"])
        .drop_duplicates(subset=["date_of_value", "key"])
        .pivot(index="date_of_value", columns="key", values="value")
        .assign(vin=responses.name)
    )

raw_tss = (
    responses
    .groupby("vin")
    .apply(parse_responses, include_groups=False)
)

In [None]:
sanity_check(raw_tss.drop(columns=["vin"]).reset_index(drop=False))

In [None]:
raw_tss.drop(columns=["vin"]).reset_index(drop=False)