# Check monotonicity of results
This notebook is not an EDA notebook but rather a tool to check if the results in the vehicle_data table are monotonically decreasing.  

In [None]:
from core.sql_utils import get_sqlalchemy_engine
import pandas as pd
from sqlalchemy import text
import plotly.express as px

### Data extraction

In [None]:
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT v.vin, vd.timestamp, vd.odometer, vd.soh FROM vehicle_data vd
            join vehicle v
            on v.id = vd.vehicle_id;"""), con)





### Check monoticity

In [None]:
def soh_is_decreasing(df):
    s = df.sort_values("timestamp")["soh"]
    return (s.diff().dropna() <= 0).all()

montonic_mask_per_vin = (
    dbeaver_df.dropna(subset=['soh'])
    .sort_values(by='timestamp')
    .groupby("vin")
    .apply(soh_is_decreasing, include_groups=False)
    .to_frame("monotonically_decreasing")
    .reset_index(drop=False)
)


In [None]:
value_counts_per_vin = {
    "normalized": montonic_mask_per_vin["monotonically_decreasing"].value_counts(dropna=False, normalize=True),
    "absolute": montonic_mask_per_vin["monotonically_decreasing"].value_counts(dropna=False, normalize=False)
}
pd.concat(value_counts_per_vin, axis=1, keys=value_counts_per_vin.keys(), names=["value_counts_type"])

In [None]:
def soh_is_strictly_increasing(df):
    s = df.sort_values("timestamp")["soh"]
    if len(s) < 2:  # moins de 2 points => impossible de dire si c'est strictement croissant
        return False
    return (s.diff().dropna() > 0).all()

montonic_mask_per_vin = (
    dbeaver_df.dropna(subset=['soh'])
    .groupby("vin")
    .apply(soh_is_strictly_increasing, include_groups=False)
    .to_frame("monotonically_increasing")
    .reset_index(drop=False)
)
value_counts_per_vin = {
    "normalized": montonic_mask_per_vin["monotonically_increasing"].value_counts(dropna=False, normalize=True),
    "absolute": montonic_mask_per_vin["monotonically_increasing"].value_counts(dropna=False, normalize=False)
}
pd.concat(value_counts_per_vin, axis=1, keys=value_counts_per_vin.keys(), names=["value_counts_type"])

In [None]:
px.scatter(dbeaver_df[dbeaver_df['vin'].isin(montonic_mask_per_vin[montonic_mask_per_vin['monotonically_increasing']==True].vin.to_list())], x='timestamp', y='soh', color='vin')