# Construction et analyse des intervalles de confiance pour le SoH des tesla


In [None]:
from transform.raw_results.config import *
from transform.processed_tss.ProcessedTimeSeries import TeslaProcessedTimeSeries
from core.pandas_utils import *
import plotly.graph_objects as go
import plotly.express as px
from transform.raw_results.renault_results import get_results 


Ce notebook permet de :
- Voir si on peut construire des intervalles de confiances 
- La répartition en fonction du nombre de charges

Conclusion :
- On a suffisemment de données de charge pour construire des IC
- On a 56 vin sur 68 (82%) qui ont des IC qui ont moins de 5 points donc des IC +/- 2.5% ou moins 
- il  n'y a que 3 vin qui ont des IC > +/- 5%


## data import

In [None]:
from core.sql_utils import *
engine = get_sqlalchemy_engine()
con = engine.connect()

with engine.connect() as connection:
    dbeaver_df = pd.read_sql(text("""SELECT * FROM vehicle_data vd
            join vehicle v
            on v.id = vd.vehicle_id
            join vehicle_model vm 
            on vm.id = v.vehicle_model_id
            join battery b 
            on b.id=vm.battery_id
            WHERE vm.model_name  like '%zoe%'"""), con)

In [None]:
dbeaver_df.dropna(subset='soh')[['vin']].nunique()

In [None]:
df = get_results().dropna(subset='soh')

In [None]:
df.vin.nunique()

In [None]:
px.scatter( df, y='soh', x='odometer', color='vin')

## IC build

In [None]:
def ic_computation(df):
    len_ = df.shape[0]
    m = df['soh'].mean()
    med = df['soh'].median()
    std = df['soh'].std()
    if len_ > 0:
        upper_bound = m + 1.96 * (std/np.sqrt(len_))
        lower_bound = m - 1.96 * (std/np.sqrt(len_))
        
        return (round(lower_bound, 4),round( upper_bound, 4)), len_, med
    return (np.nan, np.nan), len_, med

In [None]:
# filtre pour supprimer les outliers
ic_df = pd.DataFrame(df.dropna(subset='soh').groupby(['vin'], observed=False)[['soh']].apply(ic_computation))

In [None]:
l1, number_charges, median = zip(*ic_df[0].values)
lower , upper = zip(*list(l1))

In [None]:
ic_df['upper'] = list(upper)
ic_df['lower'] = list(lower)
ic_df["number_charges"] = list(number_charges)
ic_df["soh_median"] = list(median)

In [None]:
ic_df['ic_point_diff'] = ic_df['upper'] - ic_df['lower']

## Graphs

In [None]:
hist_values, bin_edges = pd.cut(ic_df['ic_point_diff'], bins=[0, .02, .05, .1, .2, .3, .4, .5, 1], right=False, retbins=True)
hist_counts = hist_values.value_counts().sort_index()
fig = go.Figure(data=[go.Bar(
    x=[f"{round(bin_edges[i], 3)} - {round(bin_edges[i+1], 3)}" for i in range(len(bin_edges)-1)],
    y=hist_counts.values,
    marker=dict(color='blue'),
    text=hist_counts
)])
fig.update_layout(
    title="répartitions des tailles d'IC",
    xaxis_title="Intervale",
    yaxis_title="Frecuence"
)
fig.show()

In [None]:
px.scatter(ic_df, x='number_charges', y='ic_point_diff', hover_data={'lower': True, 
                                                       'upper': True}, title='taille IC vs Nombre de charges')

In [None]:
charges_df = ic_df.groupby("number_charges", as_index=False).agg(
    mean_ic_point = ('ic_point_diff', 'mean'),
    median_ic_point = ('ic_point_diff', 'median'),
    max_ic_point = ('ic_point_diff', 'max'),
    
).dropna()