# Trendlines Validation Method
Objective:
Find a reliable method to validate the calculated trend curves.
Current method in production:
- Validation based on a minimum of 20 different vehicles.

Current approach being explored:
- Use at least 50 different vehicles, including:
    - 20 vehicles with more than 100,000 km
    - 20 vehicles with less than 80,000 km


In [None]:
from core.gsheet_utils import *
from core.sql_utils import *
from core.sql_utils import get_connection, get_sqlalchemy_engine
import pandas as pd
from core.gsheet_utils import load_excel_data
from sqlalchemy import text
from results.trendline.trendline_utils import filtrer_trendlines
from results.trendline.main import generate_trendline_functions
from core.plt_utils import show_trendline
from core.sql_utils import *
from activation.config.mappings import mapping_vehicle_type

### Load data

In [None]:

engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT vm.model_name, vm.id, vm.type, m.make_name, b.capacity FROM vehicle_model vm
                                  join make m on m.id=vm.make_id
                                  join battery b on b.id=vm.battery_id;"""), con)


In [None]:

with engine.connect() as connection:
    prod_df = pd.read_sql(text("""SELECT v.vin, vm.model_name, vm.type, vd.odometer, vd.soh, vm.version  from vehicle_data vd
join vehicle v
on vd.vehicle_id=v.id
join vehicle_model vm
on v.vehicle_model_id=vm.id;"""), con)


In [None]:
df = load_excel_data("Courbes de tendance", "Courbes OS")
df_sheet = pd.DataFrame(columns=df[0,:8], data=df[1:,:8])
df_sheet['SoH'] = df_sheet['SoH'].apply(lambda x:  x.replace('%', '').strip()).astype(float) / 100

In [None]:
df_sheet['model_id'] = df_sheet.apply(lambda row: mapping_vehicle_type(row['Type'], row['OEM'], 
                                                                       row['Modèle'], dbeaver_df, row['battery_capacity']), axis=1)
df_sheet['model_id'] = df_sheet['model_id'].astype(str)

In [None]:
df_sheet['Odomètre (km)'] = df_sheet['Odomètre (km)'].apply(lambda x: float(x.replace(',', '')))

## Check conditioning

In [None]:
resultats = []
for modele, group in df_sheet.groupby(["Modèle", 'model_id']):
    nb_total_vins = group['lien'].nunique()
    nb_moins_50k = group[group['Odomètre (km)'] <= 50_000]['lien'].nunique()
    nb_plus_80k = group[group['Odomètre (km)'] >= 50_000]['lien'].nunique()
    
    if nb_total_vins >= 20:
        resultats.append(modele)
for i in resultats:
    print(i)

In [None]:
trend = generate_trendline_functions(df_sheet[df_sheet['Modèle']=='500'], 'Odomètre (km)', "SoH")

In [None]:
trend

In [None]:
show_trendline(df_sheet[df_sheet['Modèle']=='500'], trend[0], trend[1], trend[2], "500" ,'Odomètre (km)', "SoH")

In [None]:
resultats = []
for modele, group in prod_df.groupby(["model_name", 'type', 'version']):
    nb_total_vins = group['vin'].nunique()
    nb_moins_50k = group[group['odometer'] <= 80_000]['vin'].nunique()
    nb_plus_80k = group[group['odometer'] >= 100_000]['vin'].nunique()
    
    if nb_total_vins >= 50 and nb_moins_50k >= 20 and nb_plus_80k >= 20:
        resultats.append(modele)

for i in resultats:
    print(i)

Conlusion:

If we lower the threshold to 20 VINs under 50,000 km and 20 over 50,000 km, we can include two model types from scrapping (R110 and e-208 50kWh 1st gen) as well as Tesla and Zoe R135 from the bib SoH.

However, if we maintain the current approach, we only have Tesla data from the bib SoH.

