# Mercedes raw time series EDA
The goals of this notebook are to:
- visualize and get an intuition of how the mercedes data is like.  
- Find a lead to estimate the soh.

## Imports

In [None]:
import logging
from datetime import datetime as DT
from datetime import timedelta as TD
from dateutil import parser
from dotenv import load_dotenv
import os

from rich import print
import pandas as pd
from pandas import Series
from pandas import DataFrame as DF
import plotly.express as px

from core.s3_utils import S3_Bucket
from jobs.base_jobs.job_interval import Jobinterval
from core.config import *
from core.time_series_processing import preprocess_date
from jobs.high_mobility.constants import *

## Setup

In [None]:
fleet_info = pd.read_csv("fleet_info.csv", usecols=["VIN","Make","Model","Type"], dtype={"Make":"string"})
# display(fleet_info["Make"].str.lower().value_counts())
fleet_info = (
    fleet_info
    .rename(columns={"VIN": "vin"})
    .assign(Make=fleet_info["Make"].str.lower())
    .query("Make == 'mercedes-benz' & Type != 'x'")
    .set_index("vin", drop=False)
)
fleet_info[["Model", "Type"]].value_counts()

In [None]:
PROD_CREDS = {
    "bucket_name":os.getenv("PROD_S3_BUCKET"),
    "aws_access_key_id":os.getenv("PROD_S3_KEY"),
    "aws_secret_access_key":os.getenv("PROD_S3_SECRET"),
}


bucket = S3_Bucket(PROD_CREDS)

def get_renault_raw_ts(vin:str) -> DF:
    return (
        bucket.read_parquet_df(f"raw_ts/mercedes-benz/time_series/{vin}.parquet")
        .set_index("date", drop=False)
        .sort_index()
    )

raw_tss = {}
for vin, vehicle_info in fleet_info.iterrows():
    try:
        raw_tss[vin] = (
            get_renault_raw_ts(vin)
            .assign(vin=vin)
            .assign(type=vehicle_info["Type"])
        )
    except Exception as e:
        # display(e)
        continue
raw_tss = pd.concat(raw_tss, axis="index", keys=raw_tss.keys(), names=["vin"])

raw_tss["type"].unique()

## Time series processing

In [None]:
raw_tss.index

In [None]:
import plotly.graph_objects as go

def twinx(df, cols_y1, cols_y2, x_col=None):
    """
    Creates a Plotly figure with two y-axes (twin y-axis plot).
    
    Parameters:
    - df: pd.DataFrame - The dataframe containing the data.
    - cols_y1: list - List of column names for the primary y-axis (left side).
    - cols_y2: list - List of column names for the secondary y-axis (right side).
    - x_col: str (optional) - The column name to be used for the x-axis. If not provided, index is used.
    
    Returns:
    - fig: go.Figure - Plotly figure with dual y-axis.
    """
    fig = go.Figure()

    # Determine the x-axis data
    if x_col is None:
        x_data = df.index
    else:
        x_data = df[x_col]
    
    # Add traces for the first (left) y-axis
    for col in cols_y1:
        fig.add_trace(go.Scatter(x=x_data, y=df[col], name=col, yaxis="y1"))

    # Add traces for the second (right) y-axis
    for col in cols_y2:
        fig.add_trace(go.Scatter(x=x_data, y=df[col], name=col, yaxis="y2"))

    # Update layout for dual y-axis
    fig.update_layout(
        yaxis=dict(title="Primary Y-Axis", titlefont=dict(color="blue")),
        yaxis2=dict(
            title="Secondary Y-Axis",
            titlefont=dict(color="red"),
            overlaying="y",
            side="right"
        ),
        xaxis=dict(title=x_col if x_col else "Index")
    )

    return fig


In [None]:
ts = raw_tss.xs("W1K2938901F006183", level=0)
COLS_TO_DISPLAY = [
    # 'date',
    # 'charging.preconditioning_remaining_time',
    # 'charging.preconditioning_departure_status',
    # 'charging.battery_level_at_departure',
    # 'charging.smart_charging_status',
    # 'charging.status',
    # 'charging.starter_battery_state',
    # 'vin',
    # 'type',
    # 'charging.plugged_in'
    # 'charging.charging_rate',

    # 'diagnostics.odometer',
    # 'charging.battery_level',
    'charging.estimated_range',
    'charging.max_range',
]
twinx(ts, ['charging.estimated_range','charging.max_range',], ["charging.battery_level"], x_col="date")

In [None]:
ts["yes"] = ts["charging.estimated_range"] / (ts["charging.max_range"] * ts["charging.battery_level"])
px.scatter(ts, x="charging.battery_level", y=["charging.estimated_range", "charging.max_range"]).show()
px.scatter(ts, x="charging.battery_level", y=["yes"]).show()

In [None]:
raw_tss["yes"] = raw_tss["charging.estimated_range"] / (raw_tss["charging.max_range"] * raw_tss["charging.battery_level"])
# px.scatter(raw_tss, x="charging.battery_level", y=["charging.estimated_range", "charging.max_range"]).show()
px.scatter(raw_tss, x="charging.battery_level", y=["yes"], color="vin").show()
px.scatter(raw_tss, x="diagnostics.odometer", y=["yes"], color="vin").show()