In [None]:
import pandas as pd
from core.sql_utils import *
import plotly.express as px
import plotly.graph_objects as go
from scipy.optimize import curve_fit
from datetime import datetime 

In [None]:
from transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries
from transform.raw_tss.tesla_raw_tss import get_raw_tss


## get data

In [None]:
def plot_log(df, column):
    def log_function(x, a):
        return 1 + a * np.log1p(x/1000) 
    fig = go.Figure()
    # create color
   # model_colors = {value: px.colors.qualitative.Plotly[i] for i, value in enumerate(df[column].unique())}
    for value in df[column].unique():
        df_model_temp = df[df[column]==value].dropna(subset='soh').sort_values('odometer').copy()
        # fir log function
        popt, _ = curve_fit(log_function, df_model_temp['odometer'], df_model_temp['soh'])
        x_vals = np.linspace(0.1,  240000, 500)
        y_vals = log_function(x_vals, *popt)

         # Couleur unique pour le modèle
       # color = model_colors[value] 

        # Génération des valeurs ajustées
        fig.add_traces(go.Scatter(x=x_vals, y=y_vals, name=f'{value} trend'))
        
    return fig

In [None]:
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT * FROM vehicle_data vd
            join vehicle v
            on v.id = vd.vehicle_id
            join vehicle_model vm 
            on vm.id = v.vehicle_model_id
            WHERE vm.model_name like '%model%';"""), con)



soh_df = dbeaver_df.groupby('vin', as_index=False, observed=True)[['soh', 'odometer']].last()

In [None]:
df = get_raw_tss()

In [None]:
df['date'] = df['timestamp'].apply(lambda x: datetime.utcfromtimestamp(x / 1000).strftime('%Y-%m-%d %H:%M:%S'))

In [None]:
df['date'] = pd.to_datetime(df['date'])
df.sort_values(['vin', 'date'], inplace=True)

In [None]:
df['time_diff'] = df.groupby('vin',observed=True )['date'].diff().dropna().reset_index(drop=True)

In [None]:
df['time_diff'] = df['time_diff'].dt.total_seconds()

In [None]:
df['time_at_soc'] = df['battery_level'] * df['time_diff']

In [None]:
avg_soc = df.groupby('vin', observed=True, as_index=False).agg(
    total_time_at_soc=("time_at_soc", 'sum'),
    total_time_diff=('time_diff', 'sum')).eval('soc_mean = total_time_at_soc/total_time_diff')

In [None]:
avg_soc = avg_soc.merge(soh_df, on='vin')

In [None]:
avg_soc.describe()

In [None]:
avg_soc['soc_cat'] = avg_soc['soc_mean'].apply(lambda x: "low soc" if x <= 40 else
                                               "mid soc" if 40 < x < 70 else "high soc")

## Graph and results

In [None]:
avg_soc['soc_cat'].value_counts()

In [None]:
fig = plot_log(avg_soc, 'soc_cat')
fig.update_layout(title='Impact of the soc on the battery degradation')
fig.update_xaxes(title='odometer')
fig.update_yaxes(title='SoH')