In [None]:
import pandas as pd
from core.sql_utils import *
import plotly.express as px
import plotly.graph_objects as go
from scipy.optimize import curve_fit
from datetime import datetime 
from transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries


In [None]:
def plot_log(df, column):
    def log_function(x, a):
        return 1 + a * np.log1p(x/1000) 
    fig = go.Figure()
    # create color
   # model_colors = {value: px.colors.qualitative.Plotly[i] for i, value in enumerate(df[column].unique())}
    for value in df[column].unique():
        df_model_temp = df[df[column]==value].dropna(subset='soh').sort_values('odometer').copy()
        # fir log function
        popt, _ = curve_fit(log_function, df_model_temp['odometer'], df_model_temp['soh'])
        x_vals = np.linspace(0.1,  240000, 500)
        y_vals = log_function(x_vals, *popt)

         # Couleur unique pour le modèle
       # color = model_colors[value] 

        # Génération des valeurs ajustées
        fig.add_traces(go.Scatter(x=x_vals, y=y_vals, name=f'{value} trend'))
        
    return fig

## Import data

In [None]:
drive = TeslaProcessedTimeSeries("tesla", force_update=True)

In [None]:
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT * FROM vehicle_data vd
            join vehicle v
            on v.id = vd.vehicle_id
            join vehicle_model vm 
            on vm.id = v.vehicle_model_id
            WHERE vm.model_name like '%model%';"""), con)



soh_df = dbeaver_df.groupby('vin', as_index=False, observed=True)[['soh', 'odometer']].last()

## Create dataframe

In [None]:
avg_speed_df = drive.groupby('vin',observed=True,  as_index=False).agg(
    avg_speed=('speed', 'mean'),
)

In [None]:
drive['odometer_diff'] = drive.groupby('vin',observed=True )['odometer'].diff().dropna().reset_index(drop=True)


In [None]:
drive['speed'] = drive['speed'].fillna(drive[drive['speed'] > 0].groupby('vin', observed=True)['speed'].transform('mean'))


In [None]:
drive_ride = drive[drive['odometer_diff'] > 0].copy()
drive_ride['time_speed'] = drive_ride['odometer_diff'] / drive_ride['speed']
avg_speed_df = drive_ride[drive_ride['speed'] > 0].groupby('vin', as_index=False, observed=True).agg( 
                                              total_distance=('odometer_diff', 'sum'),
                                              total_time=('time_speed', 'sum'),
                                              odometer_start=('odometer', 'min'),
                                              odometer_end=('odometer', 'max'),
                                              ).eval("avg_speed=total_distance / total_time").eval("diff_total_odometer=odometer_end-odometer_start")

In [None]:
avg_speed_df = avg_speed_df.merge(soh_df, on='vin')

In [None]:
avg_speed_df['cat_speed'] = avg_speed_df['avg_speed'].apply(lambda x: 'avg speed > 50' if x >= 50 else "avg speed < 50" )

## Graph and result

In [None]:
avg_speed_df['cat_speed'].value_counts()

In [None]:
fig = plot_log(avg_speed_df, 'cat_speed')
fig.update_layout(title='Impact of the speed on the battery degradation')
fig.update_xaxes(title='odometer')
fig.update_yaxes(title='SoH')