# Calcul du BiB score pour les batteries

In [None]:
from core.sql_utils import *
import numpy as np
from core.numpy_utils import numpy_safe_eval
import plotly.graph_objects as go
import plotly.express as px
from scipy import stats
import pandas as pd
from results.trendline.main import compute_trendline_functions

In [None]:
with get_connection() as con:
    cursor = con.cursor()
    cursor.execute("""
        SELECT 
        v.vin, 
        vd.id,
        CAST(vd.soh AS FLOAT),
        CAST(vd.odometer AS FLOAT),
        vm.trendline,
        vm.trendline_min,
        vm.trendline_max,
        vm.id AS model_id
    FROM vehicle_data vd
    LEFT JOIN vehicle v ON vd.vehicle_id = v.id
    LEFT JOIN vehicle_model vm ON v.vehicle_model_id = vm.id
    WHERE vd.soh is not null
    AND vd.odometer is not null 
    AND vm.model_name = 'c40'
    AND vm.type = 'recharge twin motor'
    AND vd.soh < 1""")
    dbeaver_df = cursor.fetchall()
    dbeaver_df = pd.DataFrame(
    dbeaver_df, 
    columns=[desc[0] for desc in cursor.description]
)

In [None]:
def classify_soh(df, trendline_mean=None, trendline_upper=None, trendline_lower=None):
    df = df.copy()
    
    df['original_index'] = df.index
    
    sorted_df = df.sort_values('odometer').reset_index(drop=True)
    soh_sorted = sorted_df['soh'].to_numpy()
    
    categories_sorted = np.full(len(sorted_df), 'C', dtype=object)

    if trendline_mean is None:
        bounds = np.array([
            np.percentile(
                soh_sorted[max(0, i - 100): min(len(soh_sorted), i + 100)],
                [5, 25, 75, 95]
            )
            for i in range(len(soh_sorted))
        ])
        
        # Extraction des percentiles pour chaque point
        p5 = bounds[:, 0]  
        p25 = bounds[:, 1] 
        p75 = bounds[:, 2] 
        p95 = bounds[:, 3] 
        
        categories_sorted[soh_sorted >= p95] = 'A'
        categories_sorted[(soh_sorted >= p75) & (soh_sorted < p95)] = 'B'
        categories_sorted[(soh_sorted >= p25) & (soh_sorted < p75)] = 'C'
        categories_sorted[(soh_sorted >= p5) & (soh_sorted < p25)] = 'D'
        categories_sorted[soh_sorted < p5] = 'E'
        
    else:
        # Calcul des valeurs de trendline / max / min
        sorted_df['predicted_soh'] = sorted_df.apply(
            lambda row: numpy_safe_eval(trendline_mean['trendline'], x=row['odometer']),
            axis=1
        )

        sorted_df['trendline_95'] = sorted_df.apply(
            lambda row: numpy_safe_eval(trendline_upper['trendline'], x=row['odometer'])
            if pd.notna(row['trendline_max']) else np.nan,
            axis=1
        )

        sorted_df['trendline_5'] = sorted_df.apply(
            lambda row: numpy_safe_eval(trendline_lower['trendline'], x=row['odometer'])
            if pd.notna(row['trendline_min']) else np.nan,
            axis=1
        )

        # Calcul des trendlines 25 et 75 (en fonction du km)
        mean, upper_bound, lower_bound = compute_trendline_functions(
            sorted_df['odometer'],
            sorted_df['soh'],
            distribution=1,
            interval=(5, 75)
        )

        sorted_df['trendline_25'] = sorted_df.apply(
            lambda row: numpy_safe_eval(lower_bound['trendline'], x=row['odometer']),
            axis=1
        )
        sorted_df['trendline_75'] = sorted_df.apply(
            lambda row: numpy_safe_eval(upper_bound['trendline'], x=row['odometer']),
            axis=1
        )

        # Classification finale avec trendlines
        t95 = sorted_df['trendline_95'].to_numpy()
        t5 = sorted_df['trendline_5'].to_numpy()
        t25 = sorted_df['trendline_25'].to_numpy()
        t75 = sorted_df['trendline_75'].to_numpy()

        categories_sorted[soh_sorted >= t95] = 'A'
        categories_sorted[soh_sorted < t5] = 'E'
        categories_sorted[(soh_sorted > t75) & (soh_sorted < t95)] = 'B'
        categories_sorted[(soh_sorted < t25) & (soh_sorted >= t5)] = 'D'

    sorted_df['cat'] = categories_sorted
    categories = sorted_df.set_index('original_index')['cat']
    
    return categories

In [None]:
dbeaver_df['cat'] = classify_soh(dbeaver_df, dbeaver_df['trendline'].loc[0], dbeaver_df['trendline_max'].loc[0], dbeaver_df['trendline_min'].loc[0])

## Graphique

In [None]:

df_plot = dbeaver_df[dbeaver_df['cat'].notna()].copy()
category_order = ['A','B','C','D','E']

colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#c0392b']
color_map = dict(zip(category_order, colors))

fig = go.Figure()
for category in category_order:
    df_cat = df_plot[df_plot['cat'] == category]
    if len(df_cat) > 0:
        fig.add_trace(go.Scatter(
            x=df_cat['odometer'],
            y=df_cat['soh'],
            mode='markers',
            name=category,
            marker=dict(
                color=color_map[category],
                size=6,
                opacity=0.7,
                line=dict(
                    width=0.5,
                    color='white'
                )
            ),
            hovertemplate='<b>%{fullData.name}</b><br>' +
                         'Odometer: %{x:,.0f} km<br>' +
                         'SoH: %{y:.3f}<br>' +
                         '<extra></extra>'
        ))


trendline_data = df_plot[df_plot['trendline'].notna()]['trendline'].iloc[0]
trendline_data_max = df_plot[df_plot['trendline_max'].notna()]['trendline_max'].iloc[0]
trendline_data_min = df_plot[df_plot['trendline_min'].notna()]['trendline_min'].iloc[0]
odometer_min = df_plot['odometer'].min()
odometer_max = df_plot['odometer'].max()
odometer_range = np.linspace(0, 200_000, 200)
trendline_values = numpy_safe_eval(
    trendline_data['trendline'],
    x=odometer_range
)
fig.add_trace(go.Scatter(
    x=odometer_range,
    y=trendline_values,
    mode='lines',
    name='Trendline',
    line=dict(
        color='red',
        width=3,
        dash='dash'
    ),
    hovertemplate='Trendline<br>' +
                    'Odometer: %{x:,.0f} km<br>' +
                    'SoH prédit: %{y:.3f}<br>' +
                    '<extra></extra>'
))
trendline_values = numpy_safe_eval(
    trendline_data_max['trendline'],
    x=odometer_range
)
fig.add_trace(go.Scatter(
    x=odometer_range,
    y=trendline_values,
    mode='lines',
    name='Trendline max',
    line=dict(
        color='black',
        width=3,
        dash='dash'
    ),
    hovertemplate='Trendline max<br>' +
                    'Odometer: %{x:,.0f} km<br>' +
                    'SoH prédit: %{y:.3f}<br>' +
                    '<extra></extra>'
))
trendline_values = numpy_safe_eval(
    trendline_data_min['trendline'],
    x=odometer_range
)
fig.add_trace(go.Scatter(
    x=odometer_range,
    y=trendline_values,
    mode='lines',
    name='Trendline min',
    line=dict[str, str | int](
        color='black',
        width=3,
        dash='dash'
    ),
    hovertemplate='Trendline min<br>' +
                    'Odometer: %{x:,.0f} km<br>' +
                    'SoH prédit: %{y:.3f}<br>' +
                    '<extra></extra>'
))
fig.update_layout(
    title=dict(
        text='SoH vs Odometer - Points colorés par catégorie',
        font=dict(size=18, color='black')
    ),
    xaxis=dict(
        title='Odometer (km)',
        showgrid=True,
        gridcolor='lightgray',
        gridwidth=1,
        zeroline=False
    ),
    yaxis=dict(
        title='State of Health (SoH)',
        showgrid=True,
        gridcolor='lightgray',
        gridwidth=1,
        range=[0.85, 1.05]
    ),
    legend=dict(
        title='Catégories',
        font=dict(size=10),
        x=1.02,
        y=1,
        xanchor='left',
        yanchor='top'
    ),
    hovermode='closest',
    template='plotly_white',
    width=1200,
    height=700,
    margin=dict(r=200)
)
fig.show()



# Full function

In [None]:
def eval_trendline_vectorized(expr, x):
    if expr is None:
        return np.full_like(x, np.nan, dtype=float)
    return numpy_safe_eval(expr, x=x)

def compute_bib_score():
    with get_connection() as con:
        df = pd.read_sql("""
            SELECT 
                v.vin,
                vm.id AS model_id,
                vd.soh::float,
                vd.odometer::float,
                vm.trendline,
                vm.trendline_min,
                vm.trendline_max
            FROM vehicle_data vd
            JOIN vehicle v ON vd.vehicle_id = v.id
            JOIN vehicle_model vm ON v.vehicle_model_id = vm.id
            WHERE vd.soh IS NOT NULL
              AND vd.odometer IS NOT NULL
        """, con)

    results = []

    for model_id, df_model in df.groupby("model_id"):
        df_model = df_model.sort_values("odometer")
        soh = df_model["soh"].to_numpy()
        km = df_model["odometer"].to_numpy()

        categories = np.full(len(df_model), "C", dtype=object)

        trendline_mean = df_model["trendline"].dropna().iloc[0]['trendline'] if df_model["trendline"].notna().any() else None
        trendline_max  = df_model["trendline_max"].dropna().iloc[0]['trendline'] if df_model["trendline_max"].notna().any() else None
        trendline_min  = df_model["trendline_min"].dropna().iloc[0]['trendline'] if df_model["trendline_min"].notna().any() else None

        # No trendlines
        if trendline_mean is None:
            window = 100
            bounds = np.array([
                np.percentile(
                    soh[max(0, i-window):min(len(soh), i+window)],
                    [5, 25, 75, 95]
                )
                for i in range(len(soh))
            ])

            p5, p25, p75, p95 = bounds.T

            categories[soh >= p95] = "A"
            categories[(soh >= p75) & (soh < p95)] = "B"
            categories[(soh >= p25) & (soh < p75)] = "C"
            categories[(soh >= p5) & (soh < p25)] = "D"
            categories[soh < p5] = "E"

       # trendlines
        else:
            t95 = eval_trendline_vectorized(trendline_max, km)
            t5  = eval_trendline_vectorized(trendline_min, km)

            _, upper, lower = compute_trendline_functions(
                km, soh, distribution=1, interval=(5, 75)
            )

            t25 = eval_trendline_vectorized(lower["trendline"], km)
            t75 = eval_trendline_vectorized(upper["trendline"], km)

            categories[soh >= t95] = "A"
            categories[soh < t5] = "E"
            categories[(soh > t75) & (soh < t95)] = "B"
            categories[(soh < t25) & (soh >= t5)] = "D"

        df_model["bib_score"] = categories

        # Score minimal par VIN
        results.append(
            df_model.sort_values("soh")
            .groupby("vin", as_index=False)
            .first()[["vin", "bib_score"]]
        )

    final_df = pd.concat(results, ignore_index=True)

    # # Update the bib_score in the vehicle table
    # engine = get_sqlalchemy_engine()
    # final_df.to_sql("tmp_soh", engine, if_exists="replace", index=False)

    # with get_connection() as con:
    #     cur = con.cursor()
    #     cur.execute("""
    #         UPDATE vehicle v
    #         SET bib_score = s.bib_score
    #         FROM tmp_soh s
    #         WHERE v.vin = s.vin
    #     """)
    #     con.commit()
    return final_df
