In [None]:
from core.gsheet_utils import *
from rapidfuzz import process, fuzz
from core.sql_utils import *
from load.trendline.trendline_utils import *

In [None]:
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT vm.model_name, vm.id, vm.type, o.oem_name, b.capacity FROM vehicle_model vm
                                  join OEM o on vm.oem_id=o.id
                                  join battery b on b.id=vm.battery_id;"""), con)


In [None]:
df = load_excel_data(get_gspread_client(), "202505 - Courbes SoH", "Courbes OS")
df_sheet = pd.DataFrame(columns=df[0,:8], data=df[1:,:8])



In [None]:
def find_db_type(row, db_df):
    """_summary_

    Args:
        row (pd.Series): 
        db_df (pd.DataFrame): dataframe avec les colonnes model_name, id, type, oem_name, capacity

    Returns:
        uuid.UUID: id du modèle
    """

    #On récupère les infos 
    oem = row['OEM'].lower()
    model_target = row['Modèle'].lower()
    version_target = row['Type'].lower()
    # filtre sur l'oem 
    subset = db_df[db_df['oem_name'] == oem].copy()
    
    # Trouver la meilleure correspondance
    match_model = process.extractOne(model_target, subset['model_name'], scorer=fuzz.token_sort_ratio)
    if match_model :
        match_model_name, score, index = match_model
        # filtre sur le nom du modèle
        subset = subset[subset['model_name']==match_model_name]
        # on cherche la batetrie qui avec la capacité la + proche
        if row['battery_capacity'] != 'unknown':
            battery_target = float(row['battery_capacity'].replace('kWh', '').replace('kwh', '').strip())
            subset["distance"] = (subset["capacity"] - battery_target).abs()
            min_distance = subset["distance"].min()
            closest_rows = subset[subset["distance"] == min_distance]
            # Si +sieurs batterie -> type le plus ressemblant
            match_type = process.extractOne(version_target, closest_rows['type'], scorer=fuzz.token_sort_ratio)
            match_model_type, score, index = match_type
            return closest_rows.loc[index, "id"]

        else:  
            if subset['type'] is None:
                 return None
            
            # type le plus ressemblant
            match_type = process.extractOne(version_target, subset['type'], scorer=fuzz.token_sort_ratio)
            match_model_type, score, index = match_type
            return subset.loc[index, "id"]

        

In [None]:
df_sheet['model_id'] = df_sheet.apply(lambda row: find_db_type(row, dbeaver_df), axis=1)
df_sheet['model_id'] = df_sheet['model_id'].astype(str)

In [None]:
df_sheet['SoH'] = df_sheet['SoH'].apply(lambda x:  x.replace('%', '').strip()).astype(float) / 100
df_sheet['Odomètre (km)'] = df_sheet['Odomètre (km)'].apply(lambda x:  str(x).replace(' ', '').strip()).astype(float)


In [None]:
import plotly.graph_objects as go
def trendline_apply(x, f):
    return eval(f)

def graph(df, trendline, trendline_max, trendline_min, model):
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df['Odomètre (km)'],
        y=df['SoH'],
        mode='markers',
        marker_color='rgba(50, 182, 193, .9)',
        name='SoH compute'
    ))

    x_sorted = df['Odomètre (km)'].sort_values()
    fig.add_trace(go.Scatter(
        x=x_sorted,
        y=trendline_apply(x_sorted, trendline['trendline']),
        mode='lines',
        line=dict(color='red'),
        name='Fit'
    ))

    fig.add_trace(go.Scatter(
        x=x_sorted,
        y=trendline_apply(x_sorted, trendline_max['trendline']),
        mode='lines',
        line=dict(color='green'),
        name='Upper'
    ))

    fig.add_trace(go.Scatter(
        x=x_sorted,
        y=trendline_apply(x_sorted, trendline_min['trendline']),
        mode='lines',
        line=dict(color='green'),
        name='Lower'
    ))

    fig.update_layout(
        width=1000,
        height=600,
        xaxis_title='Odometer',
        yaxis_title='State of Health (SoH)',
        legend_title='Légende',
        title=f"version: {model}",
        template='plotly_white',
        xaxis=dict(range=[0, 150000]),  # Change selon l'échelle souhaitée pour l'odomètre
        yaxis=dict(range=[.75, 1.1])     # Change selon l'échelle souhaitée pour le SoH
    )
    

    return fig.show()
    

In [None]:
df_r110 = df_sheet[df_sheet['model_id']=='6fd1ed33-128b-4b99-a2bf-e01f992e6cf9']
df_q210 = df_sheet[df_sheet['model_id']=='6fbdaf91-ff62-4617-b4bb-ca37bfa406ab']
df_spring65 = df_sheet[df_sheet['model_id'].isin(["057e888d-4160-413d-98b2-0a3b75ab9b82", '9f4fca9a-0e8b-4570-8271-1b8be68e6607'])]
df_spring45 = df_sheet[df_sheet['model_id'].isin((["9f4fca9a-0e8b-4570-8271-1b8be68e6607", "c45d46f3-9327-4308-86d9-ae554979d6ab"]))]
df_208 = df_sheet[df_sheet['model_id']=='94d3310b-b9ac-463f-8a0a-303f492abee3']
df_2008 = df_sheet[df_sheet['model_id']=='cc16301c-84a4-43cf-8842-44e4e71df338']
df_niro = df_sheet[df_sheet['model_id']=='b84f6c00-a53b-4406-bfef-4a758a2d00e4']
df_mini = df_sheet[df_sheet['model_id']=='18e1c778-9260-4d5d-8fcd-61b2d009923f']
df_leaf = df_sheet[df_sheet['model_id']=='20b3a708-b621-408d-96b5-3f7bf7222986']
df_r90 = df_sheet[df_sheet['model_id'].isin(['f7ecad76-de51-4e72-92ba-4f0c6817cc1e', 'f8ec7f9c-1f50-490d-b243-96c689806edb'])]

df_kona = df_sheet[df_sheet['model_id']=='00291455-6521-4256-978b-6aa692837573']

df_twingo = df_sheet[df_sheet['model_id']=='41a4dc56-3b62-43e8-bb0a-eec6b7c42510']


In [None]:
list_df = [df_r110, df_q210, df_spring65, df_spring45, df_208, df_2008, df_niro, df_mini, df_leaf, df_r90, df_kona, df_twingo, ]

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

# --- 1. Charger les données ---
df = list_df[0]  # Remplace par ton chemin si nécessaire
df = df[df['SoH'] > .8]
x_data, y_data = df["Odomètre (km)"].values, df["SoH"].values
x_data = np.hstack((x_data, np.array([0])))
y_data = np.hstack((y_data, np.array([1])))
sort_idx = np.argsort(x_data)
x_sorted, y_sorted = x_data[sort_idx], y_data[sort_idx]
coef_mean, _ = curve_fit(log_function, x_sorted, y_sorted, maxfev=10000, bounds=([.97, -np.inf, -np.inf], [1.03, np.inf, np.inf]))
print(coef_mean)
x = df["Odomètre (km)"].values
y = df["SoH"].values

# --- 2. Paramètre fixe ---
a =coef_mean[2]  # Peut être ajusté plus tard

# --- 3. Transformation de x ---
X_trans = np.log1p(x / a)
X_design = sm.add_constant(X_trans)  # Ajoute l’intercept

# --- 4. Régression linéaire ---
model = sm.OLS(y, X_design).fit()

# --- 5. Prédictions et IC 95% ---
x_pred = np.linspace(x.min(), x.max(), 300)
x_pred_trans = np.log1p(x_pred / a)
X_pred_design = sm.add_constant(x_pred_trans)
pred = model.get_prediction(X_pred_design)
pred_summary = pred.summary_frame(alpha=0.05)

# --- 6. Tracé ---
mean = pred_summary["mean"].astype(float).values
ci_lower = pred_summary["mean_ci_lower"].astype(float).values
ci_upper = pred_summary["mean_ci_upper"].astype(float).values

plt.figure(figsize=(10, 6))
plt.scatter(x, y, label="Données réelles", alpha=0.7)
plt.plot(x_pred, mean, color="red", label="Trendline")
plt.fill_between(x_pred, ci_lower, ci_upper, color="red", alpha=0.3, label="IC 95%")
plt.xlabel("Odomètre (km)")
plt.ylabel("SoH")
plt.title("Trendline avec intervalle de confiance à 95%")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# --- 7. Affichage des résultats ---
print("Coefficients estimés :")
print(model.params)

print("\nIntervalles de confiance (95%) :")
print(model.conf_int())


In [None]:
def clean_battery_data(df, soh_colum, odometer_column):
    """
    Nettoie les données de batterie en supprimant les valeurs aberrantes.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame contenant les données de batterie avec des colonnes pour l'odomètre et le SoH
    soh_column: str
        Nom de la colonne qui contient les SoH
    odometer_column: str
        Nom de la colonne qui contient l'info sur l'odomètre
        
    Returns:
    --------
    pandas.DataFrame
        DataFrame nettoyé
    """
    df_clean = df.copy()
    df_clean = df_clean.rename(columns={odometer_column: 'odometer', soh_colum: 'soh'})
    df_clean = df_clean.drop(df_clean[(df_clean['odometer'] < 20000) & (df_clean['soh'] < .95)].index)
    df_clean = df_clean.drop(df_clean[(df_clean['soh'] < .8)].index)
    df_clean = df_clean.dropna(subset=["soh", "odometer"])
    return df_clean


def get_model_name(df, dbeaver_df):
    """
    Récupère le nom du modèle à partir de l'ID du modèle.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame contenant la colonne 'model_id'
    dbeaver_df : pandas.DataFrame
        DataFrame de référence avec colonnes 'id', 'model_name', 'type'
        
    Returns:
    --------
    str
        Nom complet du modèle
    """
    id = df['model_id'].unique()
    model = f"{dbeaver_df[dbeaver_df['id'].astype(str) == str(id[0])]['model_name'].values[0]} {dbeaver_df[dbeaver_df['id'].astype(str) == str(id[0])]['type'].values[0]}"
    return model


def prepare_data_for_fitting(df):
    """
    Prépare les données pour le fitting en ajoutant le point d'origine et en triant.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame avec colonnes 'odometer' et 'SoH'
        
    Returns:
    --------
    tuple
        (x_sorted, y_sorted) - données triées prêtes pour le fitting
    """
    x_data, y_data = df["odometer"].values, df["soh"].values
    x_data = np.hstack((x_data, np.array([0])))
    y_data = np.hstack((y_data, np.array([1])))
    sort_idx = np.argsort(x_data)
    x_sorted, y_sorted = x_data[sort_idx], y_data[sort_idx]
    return x_sorted, y_sorted


def compute_main_trendline(x_sorted, y_sorted):
    """
    Calcule la ligne de tendance principale et les bornes.
    
    Parameters:
    -----------
    x_sorted : numpy.array
        Données x triées
    y_sorted : numpy.array
        Données y triées
        
    Returns:
    --------
    tuple
        (coef_mean, coef_lower, coef_upper, mean, upper, lower)
    """
    coef_mean, _ = curve_fit(log_function, x_sorted, y_sorted, maxfev=10000, 
                           bounds=([.97, -np.inf, -np.inf], [1.03, np.inf, np.inf]))
    y_fit = log_function(x_sorted, *coef_mean)
    y_lower, y_upper = compute_trendline_bounds(y_sorted, y_fit)
    print(y_lower, y_upper)
    coef_lower, coef_upper = get_bound_coef(x_sorted, y_lower, y_upper)
    mean, upper, lower = build_trendline_expressions(coef_mean, coef_lower, coef_upper, y_lower, y_upper)
    return coef_mean, coef_lower, coef_upper,  mean, upper, lower


def compute_upper_bound(df, trendline, coef_mean):
    """
    Calcule la borne supérieure si nécessaire.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame des données
    trendline : str
        Equation de la trendline moyenne
    coef_mean : numpy.array
        Coefficients moyens
        
    Returns:
    --------
    dict or None
        Borne supérieure calculée ou None
    """
    mask = eval(trendline['trendline'], {"np": np, "x": df["odometer"]})
    test = df[df['soh'] > mask]
    x_sorted, y_sorted = prepare_data_for_fitting(test)
    
    coef_mean_upper, _ = curve_fit(log_function, x_sorted, y_sorted, maxfev=10000, 
                                bounds=([.97, -np.inf, -np.inf], [1.03, np.inf, np.inf]))
    y_fit = log_function(x_sorted, *coef_mean_upper)
    y_lower, y_upper = compute_trendline_bounds(y_sorted, y_fit)
    coef_lower_borne_sup, coef_upper_borne_sup = get_bound_coef(x_sorted, y_lower, y_upper, coef_mean[0])
    new_upper = build_trendline_expressions(coef_mean, coef_lower_borne_sup, coef_upper_borne_sup, y_lower, y_upper)
    upper_bound = new_upper[1]
    return upper_bound



def compute_lower_bound(df, trendlines):
    """
    Calcule la borne inférieure si nécessaire.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame des données nettoyées
    coef_lower : numpy.array
        Coefficients de la borne inférieure
    trendlines : list
        Liste des lignes de tendance
        
    Returns:
    --------
    dict or None
        Borne inférieure calculée ou None
    """

    mask = eval(trendlines['trendline'], {"np": np, "x": df["odometer"]})
    test = df[df['soh'] < mask]
    x_sorted, y_sorted = prepare_data_for_fitting(test)
    
    coef_mean_upper, _ = curve_fit(log_function, x_sorted, y_sorted, maxfev=10000, 
                                bounds=([.97, -np.inf, -np.inf], [1.03, np.inf, np.inf]))
    y_fit = log_function(x_sorted, *coef_mean_upper)
    y_lower, y_upper = compute_trendline_bounds(y_sorted, y_fit)
    coef_lower_borne_sup, coef_upper_borne_sup = get_bound_coef( x_sorted, y_lower, y_upper, coef_mean[0],)
    new_upper = build_trendline_expressions(coef_mean, coef_lower_borne_sup, coef_upper_borne_sup, y_lower, y_upper)
    upper_bound = new_upper[1]
    return upper_bound



def process_battery_data(df, dbeaver_df):
    """
    Traite les données de batterie pour un DataFrame donné.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame contenant les données de batterie avec colonnes 'odometer', 'SoH', 'model_id'
    dbeaver_df : pandas.DataFrame
        DataFrame de référence contenant les informations des modèles avec colonnes 'id', 'model_name', 'type'
        
    Returns:
    --------
    dict
        Dictionnaire contenant les résultats du traitement:
        - 'model': nom du modèle
        - 'trendlines': lignes de tendance principales
        - 'upper_bound': borne supérieure (si calculée)
        - 'lower_bound': borne inférieure (si calculée)
        - 'processed_df': DataFrame après nettoyage
    """
    
    # Étape 1: Nettoyage des données
    df_clean = clean_battery_data(df)
    
    # Étape 2: Récupération du nom du modèle
    model = get_model_name(df_clean, dbeaver_df)
    
    # Étape 3: Préparation des données pour le fitting
    x_sorted, y_sorted = prepare_data_for_fitting(df_clean)
    
    # Étape 4: Calcul de la ligne de tendance principale
    coef_mean, coef_lower, coef_upper, mean, upper, lower = compute_main_trendline(x_sorted, y_sorted)
    
    # Étape 5: Calcul des bornes si nécessaire
    upper_bound = compute_upper_bound(df_clean, trendlines, coef_mean)
    lower_bound = compute_lower_bound(df_clean, trendlines)
    
    # Étape 6: Génération du graphique
    graph(df_clean, mean, upper, lower, model)
    
    # Retour des résultats
    return {
        'model': model,
        'trendlines': trendlines,
        'upper_bound': upper_bound,
        'lower_bound': lower_bound,
        'processed_df': df_clean
    }


def process_all_battery_data(list_df, dbeaver_df):
    """
    Traite tous les DataFrames de la liste.
    
    Parameters:
    -----------
    list_df : list
        Liste des DataFrames à traiter
    dbeaver_df : pandas.DataFrame
        DataFrame de référence pour les informations des modèles
        
    Returns:
    --------
    list
        Liste des résultats pour chaque DataFrame traité
    """
    results = []
    
    for df in list_df:
        result = process_battery_data(df, dbeaver_df)
        results.append(result)
    
    return results



In [None]:
def generate_trendline_functions(df, odometer_column, soh_column):
    """_summary_

    Parameters:
    -----------
    df : pd.DataFrame
        Dataframe 
    Returns:
    --------
    list
        Liste des résultats pour chaque DataFrame traité
    """
    df_clean = clean_battery_data(df, odometer_column, soh_column)
    if df_clean.shape[0] < 20:
        return "Can't compute trendline"
    x_data, y_data = prepare_data_for_fitting(df_clean)
    coef_mean, coef_lower, coef_upper, mean, upper_bound, lower_bound = compute_main_trendline(x_data, y_data)
    if coef_upper[0] >= 0:
        upper_bound = compute_upper_bound(df_clean, mean, coef_mean)
    if  coef_lower[0] >= 0:
        lower_bound = compute_lower_bound(df_clean, mean)
    return mean, upper_bound, lower_bound

In [None]:
df

In [None]:

for model_car in df_merge['Modèle'].unique():
    print(model_car)
    for type_car in df_merge[df_merge['Modèle']==model_car].type.unique():
        mean, upper_bound, lower_bound = generate_trendline_functions(df_merge[(df_merge['Modèle']==model_car) & (df_merge['type']==type_car)], 'Odomètre (km)', "SoH")
        print(mean)

In [None]:
def uniform_vehicules_type(type_car, oem_name, model_name, db_df, battery_capacity=None):
    """Permet d'uniformiser les types de véhicules avec ceux présent dans la db 

    Args:
        row (pd.Series): avec les infos du vin min required column: oem, Modèle, Type 
        db_df (pd.DataFrame): dataframe avec les colonnes model_name, id, type, oem_name, capacity

    Returns:
        str: type du modèle présent sur dbeaver
    """

#__________ Faire tourner cette requête en dehors pour récupérer les infos nécessaires sur la db_________
# from core.sql_utils import *
# engine = get_sqlalchemy_engine()
# con = engine.connect()

# with engine.connect() as connection:
#     dbeaver_df = pd.read_sql(text("""SELECT vm.model_name, vm.id, vm.type, o.oem_name, b.capacity FROM vehicle_model vm
#                                   join OEM o on vm.oem_id=o.id
#                                   join battery b on b.id=vm.battery_id;"""), con)
#___________________________________________________________________________________________________________

    
    #On récupère les infos
    oem_name = oem_name.lower()
    model_name = model_name.lower()
    type_car = type_car.lower()
    # filtre sur l'oem 
    subset = db_df[db_df['oem_name'] == oem_name].copy()
    # Trouver la meilleure correspondance
    # Retourne le modèle le plus proche score_cutoff fixé a 0 pour le moment pour être sur d'avoir un retour
    match_model = process.extractOne(model_name, subset['model_name'], scorer=fuzz.token_sort_ratio)
    if match_model :
        match_model_name, score, index = match_model
        # filtre sur le nom du modèle
        subset = subset[subset['model_name']==match_model_name]
        # on cherche la batetrie avec la capacité la + proche
        try:
            battery_target = float(battery_capacity.replace('kWh', '').replace('kwh', '').strip())
            subset["distance"] = (subset["capacity"] - battery_target).abs()
            min_distance = subset["distance"].min()
            closest_rows = subset[subset["distance"] == min_distance]
            # Si +sieurs batterie -> type le plus ressemblant
            match_type = process.extractOne(type_car, closest_rows['type'], scorer=fuzz.token_sort_ratio)
            match_model_type, score, index = match_type
            return closest_rows.loc[index, "type"]
        
        # type le plus ressemblant sans batterie 
        except:
            match_type = process.extractOne(type_car, subset['type'], scorer=fuzz.token_sort_ratio)
            match_model_type, score, index = match_type
            return subset.loc[index, "type"]
        
    else:
        return None

In [None]:
dbeaver_df

In [None]:
df_sheet

In [None]:
df_sheet['OEM'] = df_sheet['OEM'].apply(str.lower)
df_sheet['Modèle'] = df_sheet['Modèle'].apply(str.lower)

In [None]:
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT vm.model_name, vm.id, vm.type, o.oem_name, b.capacity, vm.battery_id FROM vehicle_model vm
                                  join OEM o on vm.oem_id=o.id
                                  join battery b on b.id=vm.battery_id;"""), con)

df = load_excel_data(get_gspread_client(), "202505 - Courbes SoH", "Courbes OS")
df_sheet = pd.DataFrame(columns=df[0,:8], data=df[1:,:8])
df_sheet['SoH'] = df_sheet['SoH'].apply(lambda x:  x.replace('%', '').strip()).astype(float) / 100
df_sheet['Odomètre (km)'] = df_sheet['Odomètre (km)'].apply(lambda x:  str(x).replace(' ', '').strip()).astype(float)
df_sheet['type'] = df_sheet.apply(lambda row: uniform_vehicules_type(row['Type'], row['OEM'], str(row['Modèle']),dbeaver_df,  row['battery_capacity']), axis=1)
df_sheet['Modèle'] = df_sheet['Modèle'].apply(lambda x: x.lower())
df_merge = df_sheet.merge(dbeaver_df[['model_name', "type", 'battery_id']], left_on=['Modèle', "type"], right_on=['model_name', 'type'])
df_merge['type'] = df_merge.groupby(['model_name', 'battery_id'])['type'].transform('first')

for model_car in df_merge['Modèle'].unique()[:1]:
    print(model_car)
    for type_car in df_merge[df_merge['Modèle']==model_car].type.unique():
        print(type_car)
        df_clean = clean_battery_data(df_merge[(df_merge['Modèle']==model_car) & (df_merge['type']==type_car)], "SoH", 'Odomètre (km)')
        x_data, y_data = prepare_data_for_fitting(df_clean)
        print(x_data)
        print(y_data)
        coef_mean, coef_lower, coef_upper, mean, upper_bound, lower_bound = compute_main_trendline(x_data, y_data)
        if coef_upper[0] >= 0:
            print('ici')
            upper_bound = compute_upper_bound(df_clean, mean, coef_mean)
        if  coef_lower[0] >= 0:
            print('la')
            lower_bound = compute_lower_bound(df_clean, mean)
        # graph(df, mean, upper_bound, lower_bound, model)
        print(mean, upper_bound, lower_bound)

In [None]:
["edrive35", "73 kwh", "73 kwh dual motor", "sr awd", "performance awd", "wd"] 

In [None]:
mapping = {
    "73 kwh dual motor": "73 kwh",
    "120ah": "120 ah",
    "94ah": "94 ah",
    "64kwh": "64 kwh",
    "electric 45": "electric"
    
}

In [None]:
def uniform_vehicules_type(type_car, oem_name, model_name, db_df, battery_capacity=None):
    """Permet d'uniformiser les types de véhicules avec ceux présent dans la db 

    Args:
        row (pd.Series): avec les infos du vin min required column: oem, Modèle, Type 
        db_df (pd.DataFrame): dataframe avec les colonnes model_name, id, type, oem_name, capacity

    Returns:
        str: type du modèle présent sur dbeaver
    """

#__________ Faire tourner cette requête en dehors pour récupérer les infos nécessaires sur la db_________
# from core.sql_utils import *
# engine = get_sqlalchemy_engine()
# con = engine.connect()

# with engine.connect() as connection:
#     dbeaver_df = pd.read_sql(text("""SELECT vm.model_name, vm.id, vm.type, o.oem_name, b.capacity FROM vehicle_model vm
#                                   join OEM o on vm.oem_id=o.id
#                                   join battery b on b.id=vm.battery_id;"""), con)
#___________________________________________________________________________________________________________

    
    #On récupère les infos
    oem_name = oem_name.lower()
    model_name = model_name.lower()
    type_car = type_car.lower()
    # filtre sur l'oem 
    subset = db_df[db_df['oem_name'] == oem_name].copy()
    # Trouver la meilleure correspondance
    # Retourne le modèle le plus proche score_cutoff fixé a 0 pour le moment pour être sur d'avoir un retour
    match_model = process.extractOne(model_name, subset['model_name'], scorer=fuzz.token_sort_ratio)
    if match_model :
        match_model_name, score, index = match_model
        # filtre sur le nom du modèle
        subset = subset[subset['model_name']==match_model_name]
        # on cherche la batetrie avec la capacité la + proche
        try:
            battery_target = float(battery_capacity.replace('kWh', '').replace('kwh', '').strip())
            subset["distance"] = (subset["capacity"] - battery_target).abs()
            min_distance = subset["distance"].min()
            closest_rows = subset[subset["distance"] == min_distance]
            # Si +sieurs batterie -> type le plus ressemblant
            match_type = process.extractOne(type_car, closest_rows['type'], scorer=fuzz.token_sort_ratio)
            match_model_type, score, index = match_type
            return closest_rows.loc[index, "type"]
        
        # type le plus ressemblant sans batterie 
        except:
            match_type = process.extractOne(type_car, subset['type'], scorer=fuzz.token_sort_ratio)
            match_model_type, score, index = match_type
            return subset.loc[index, "type"]
        
    else:
        return None

In [None]:


#### Matching types ########
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT vm.model_name, vm.id, vm.type, vm.battery_id, o.oem_name, b.capacity  FROM vehicle_model vm
                                  join OEM o on vm.oem_id=o.id
                                  join battery b on b.id=vm.battery_id;"""), con)

df_sheet['type'] = df_sheet.apply(lambda row: uniform_vehicules_type(row['Type'], row['OEM'], str(row['Modèle']),dbeaver_df,  row['battery_capacity']), axis=1)
df_sheet['Modèle'] = df_sheet['Modèle'].apply(lambda x: x.lower())
df_merge = df_sheet.merge(dbeaver_df[['model_name', "type", 'battery_id']], left_on=['Modèle', "type"], right_on=['model_name', 'type'])
df_merge['type'] = df_merge.groupby(['model_name', 'battery_id'])['type'].transform('first')
for model_car in df_merge['Modèle'].unique():
    for type_car in df_merge[df_merge['Modèle']==model_car].type.unique():
        print(generate_trendline_functions(df_merge[(df_merge['Modèle']==model_car) & (df_merge['type']==type_car)], "Odomètre (km)", "SoH"))
            
        
        



In [None]:
from core.gsheet_utils import *
from rapidfuzz import process, fuzz
from core.sql_utils import *
from load.trendline.trendline_utils import *
def generate_trendline_functions(df, odometer_column, soh_column):
    """_summary_

    Parameters:
    -----------
    df : pd.DataFrame
        Dataframe with SoH and Odometer column
    soh_column: str
        Nom de la colonne qui contient les SoH
    odometer_column: str
        Nom de la colonne qui contient l'info sur l'odomètre
    Returns:
    --------
    tupple
        trendlines moyenne, borne supérieure et inférieure
    """
    df_clean = clean_battery_data(df, odometer_column, soh_column)
    if df_clean.shape[0] < 20:
        raise Exception("Not enough data to compute trendline")
    x_data, y_data = prepare_data_for_fitting(df_clean)
    coef_mean, coef_lower, coef_upper, mean, upper_bound, lower_bound = compute_main_trendline(x_data, y_data)
    if coef_upper[0] >= 0:
        upper_bound = compute_upper_bound(df_clean, mean, coef_mean)
    if  coef_lower[0] >= 0:
        lower_bound = compute_lower_bound(df_clean, mean, coef_mean)
    return mean, upper_bound, lower_bound

In [None]:
df = load_excel_data(get_gspread_client(), "202505 - Courbes SoH", "Courbes OS")
df_sheet = pd.DataFrame(columns=df[0,:8], data=df[1:,:8])
df_sheet["SoH"] = df_sheet["SoH"].apply(lambda x:  x.replace('%', '').strip()).astype(float) / 100
df_sheet["Odomètre (km)"] = df_sheet["Odomètre (km)"].apply(lambda x:  str(x).replace(' ', '').strip()).astype(float)
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT vm.model_name, vm.id, vm.type, vm.battery_id, o.oem_name, b.capacity  FROM vehicle_model vm
                                join OEM o on vm.oem_id=o.id
                                join battery b on b.id=vm.battery_id;"""), con)

### Matching type 
df_sheet['type'] = df_sheet.apply(lambda row: uniform_vehicules_type(row['Type'], row['OEM'], str(row['Modèle']), dbeaver_df,  row['battery_capacity']), axis=1)

df_sheet['Modèle'] = df_sheet['Modèle'].apply(lambda x: x.lower())
df_merge = df_sheet.merge(dbeaver_df[['model_name', "type", 'battery_id']], left_on=['Modèle', "type"], right_on=['model_name', 'type'])
df_merge['type'] = df_merge.groupby(['model_name', 'battery_id'])['type'].transform('first')
for model_car in df_merge['Modèle'].unique():
    for type_car in df_merge[df_merge['Modèle']==model_car].type.unique():
        if type_car:
            print(type_car)
            mean_trend, upper_boun, lower_bound = generate_trendline_functions(df_merge[(df_merge['Modèle']==model_car) & (df_merge['type']==type_car)], "SoH", "Odomètre (km)")
            #update_database_trendlines("vehicle_model", "type", type_car, mean_trend, upper_boun, lower_bound, False)
            print('oueeee')
            logging.info(f"Trendline mise à jour pour {type_car}")
        else: 
            logging.error(f"Erreur mise à jour trendline {type_car}: {e}")