# BMW processed time series Exploratory Data Analysis
The goal of this notebook is to elaborate a method to calculate the SoH of the BMW fleet.  

## Setup

### Imports

In [None]:
from datetime import datetime as DT
import pytz

import pandas as pd
from pandas import DataFrame as DF
import plotly.express as px

from core.s3_utils import S3_Bucket
from core.config import *
from core.pandas_utils import *
from transform.processed_tss.main import get_processed_tss


### Data extraction

In [None]:
tss = get_processed_tss("bmw", force_update=True)
tss.columns
tss = tss.eval("in_charge_perf_idx = in_charge.cumsum()")


In [None]:
tss.head(10)

In [None]:
tss['odometer'] = tss.groupby('vin')['odometer'].apply(lambda group: group.fillna(method='bfill'))


## Time series EDA

Let's list the variables and the respective count ratio.

In [None]:
tss.count() / len(tss)


In [None]:
VIN = "WBY1Z610407A12415"
tss_unique = tss.query("vin == @VIN")

random_vins = np.random.choice(tss['vin'].unique(), size=5, replace=False)

tss_sample = tss[tss['vin'].isin(random_vins)]

## Print first graphs

In [None]:
px.scatter(tss, 
           x="soc", 
           y="estimated_range", 
           color="vin")

## SoH calculation

### First method on the estimated range


#### Few graphs before calculation 


In [None]:
px.scatter(tss, 
           x="soc", 
           y="estimated_range", 
           color="vin")

In [None]:
px.scatter(tss, 
           x="soc", 
           y="estimated_range", 
           color="in_charge")


-> No correlation between in_charge and estimated_range

In [None]:
px.scatter(tss_unique, 
           x="date", 
           y="soc", 
           color="in_charge_perf_idx")


### SoH calculation


In [None]:
tss["SoH"] = tss["estimated_range"] / (tss["soc"] * tss["range"]) * 100
tss_unique["SoH"] = tss_unique["estimated_range"] / (tss_unique["soc"] * tss_unique["range"]) * 100
tss_sample["SoH"] = tss_sample["estimated_range"] / (tss_sample["soc"] * tss_sample["range"]) * 100


#### SoC / SOH 


In [None]:
px.scatter(tss, 
           x="SoH", 
           y="soc", 
           color="vin")

#### In charge / SOH

In [None]:
px.scatter(tss_unique, 
           x="date", 
           y="SoH", 
           color="in_charge")

#### Charging 


In [None]:
px.scatter(tss.query("in_charge == True"), 
           x="date", 
           y="SoH", 
           color="charging_plug_connected")

#### SOH / odometer


In [None]:
px.scatter(tss, 
           x="odometer", 
           y="SoH", 
           color="vin")