In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from mvf_bto.data_loading import load_data
from mvf_bto.constants import * 
from mvf_bto.models.baseline_lstm import BaselineLSTM
from mvf_bto.preprocessing import create_discharge_inputs
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import MeanSquaredError

import numpy as np
import pandas as pd
import plotly
import plotly.graph_objects as go

## Loading Data

In [3]:
# data_path = "/Users/anoushkabhutani/PycharmProjects/10701-mvf-bto/data/2017-05-12_batchdata_updated_struct_errorcorrect.mat"
# data_path = "/home/amalss/Documents/CMU/Courses/10701/project/data/2017-05-12_batchdata_updated_struct_errorcorrect.mat"
data_path = "/Users/mac/Desktop/CMU/10701MachineLearning/project/10701-mvf-bto-backup/data/2017-05-12_batchdata_updated_struct_errorcorrect.mat"


In [4]:
data = load_data(file_path=data_path, num_cells=8)

100%|██████████| 8/8 [01:09<00:00,  8.70s/it]


## Preprocessing to create model inputs and targets

In [5]:
train_split = 0.7
test_split = 0.2
# by default uses validation_split = 1 - (train_split + test_split)

In [6]:
data.keys()

dict_keys(['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4', 'b1c5', 'b1c6', 'b1c7'])

In [7]:
print(REFERENCE_DISCHARGE_CAPACITIES)
(datasets, 
train_cell, 
test_cell, 
validation_cell) = create_discharge_inputs(data, train_split, 
                                            test_split, 
                                            forecast_horizon=3, 
                                            history_window=4, 
                                            q_eval=REFERENCE_DISCHARGE_CAPACITIES)

[0, 0.025, 0.075, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 0.85, 0.9, 0.95, 0.975, 0.98, 0.985, 0.99, 0.995, 0.998, 1.0]


100%|██████████| 1178/1178 [00:08<00:00, 131.67it/s]
100%|██████████| 1189/1189 [00:08<00:00, 137.64it/s]
100%|██████████| 1073/1073 [00:07<00:00, 142.52it/s]
100%|██████████| 635/635 [00:04<00:00, 133.79it/s]
100%|██████████| 869/869 [00:06<00:00, 144.31it/s]
100%|██████████| 1225/1225 [00:08<00:00, 137.92it/s]
100%|██████████| 1176/1176 [00:08<00:00, 134.62it/s]
100%|██████████| 1226/1226 [00:08<00:00, 137.52it/s]


In [8]:
print(train_cell,test_cell,validation_cell)

['b1c1', 'b1c0', 'b1c5', 'b1c6', 'b1c7'] ['b1c3'] ['b1c2', 'b1c4']


## Train Model

In [9]:
from asyncio import base_events


window_length = datasets["X_train"].shape[1]
n_features = datasets["X_train"].shape[2]
batch_input_shape = (datasets["batch_size"], window_length, n_features)
batch_size=datasets["batch_size"]
n_outputs = datasets["y_train"].shape[-1]
nf_steps = datasets["y_train"].shape[1]
print(window_length, n_features, batch_size, n_outputs, nf_steps)

y = datasets["y_train"][:, 0, 0]
# print(y)
idx = y < 2.9
weights = np.ones_like(y) * 1
weights[idx] = 2



3 4 13 2 3


In [10]:
skip = 70
print(REFERENCE_DISCHARGE_CAPACITIES)
pallete = plotly.colors.qualitative.Dark24*(len(datasets["X_train"])//batch_size)

fig = go.Figure()
for i in range(0, len(datasets["X_train"]), batch_size * skip):
    df_true = pd.DataFrame(datasets["y_test"][i : i + batch_size][:, 0, 0])
    fig.add_trace(
        go.Scatter(
            x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
            y=df_true[0].values*(VOLTAGE_MAX - VOLTAGE_MIN) + VOLTAGE_MIN,
            showlegend=True,
            mode="lines+markers",
            name = f"True Curve {i+1}",
            line_color=pallete[i//skip]
        )
    )

fig.update_yaxes(title="Voltage [V]")
fig.update_xaxes(title="State of Charge (Normalized Capacity)")
fig.update_layout(height=500)

[0, 0.025, 0.075, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 0.85, 0.9, 0.95, 0.975, 0.98, 0.985, 0.99, 0.995, 0.998, 1.0]


In [24]:
model = BaselineLSTM(batch_input_shape=batch_input_shape, n_outputs=n_outputs, nf_steps=nf_steps)

In [25]:
model.compile(optimizer="adam", loss="mse", metrics=[MeanSquaredError()])

es = EarlyStopping(
    monitor="val_mean_squared_error",
    min_delta=5e-5,
    patience=10,
    verbose=1,
    mode="auto",
    restore_best_weights=True,
)


In [26]:

history = model.fit(
    datasets["X_train"],
    datasets["y_train"],
    validation_data=(datasets["X_val"], datasets["y_val"]),
    epochs=25,
    batch_size=datasets["batch_size"],
    shuffle=False,
    callbacks=[es],
    verbose=1,
    sample_weight=weights
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 19: early stopping


In [27]:
# from tensorflow import keras
# model =  keras.models.load_model('/Users/mac/Desktop/CMU/10701MachineLearning/project/10701-mvf-bto/pre_train/midterm/Dh6p3ID20')

In [28]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=np.linspace(1, 50),
        y=history.history["loss"],
        showlegend=False,
        mode="markers+lines",
    )
)
fig.update_xaxes(title="Epochs")
fig.update_yaxes(title="Loss (MSE)")
fig.update_layout(height=500)

## Parity Plot of Training Error

In [29]:
# random plotting traing error at some interval = skip to not make the plot rendering too slow
batch_size = datasets["batch_size"]
skip = 70

fig = go.Figure()
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], showlegend=False, mode="markers+lines"))
for i in range(0, len(datasets["X_train"]), batch_size * skip):
    df_pred = pd.DataFrame(model.predict(datasets["X_train"][i : i + batch_size], verbose=0)[:, :, 0])
    df_train = pd.DataFrame(datasets["y_train"][i : i + batch_size][:, :, 0])

    fig.add_trace(
        go.Scatter(
            x=df_pred[0].values,
            y=df_train[0].values,
            showlegend=False,
            mode="markers+lines",
        )
    )

    #t+1 predictions
    # fig.add_trace(
    #     go.Scatter(
    #         x=df_pred[1].values,
    #         y=df_train[1].values,
    #         showlegend=False,
    #         mode="markers+lines",
    #     )
    # )

fig.update_yaxes(title="Normalized Voltage Target")
fig.update_xaxes(title="Normalized Voltage Prediction")
fig.update_layout(height=500)

In [30]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], showlegend=False, mode="markers+lines"))
for i in range(0, len(datasets["X_train"]), batch_size * skip):
    df_pred = pd.DataFrame(model.predict(datasets["X_train"][i : i + batch_size], verbose=0)[:, :, 1])
    df_train = pd.DataFrame(datasets["y_train"][i : i + batch_size][:, :, 1])
    fig.add_trace(
        go.Scatter(
            x=df_pred[0].values,
            y=df_train[0].values,
            showlegend=False,
            mode="markers+lines",
        )
    )

fig.update_yaxes(title="Normalized Temperature Target")
fig.update_xaxes(title="Normalized Temperature Prediction")
fig.update_layout(height=500)

In [31]:
skip = 70

pallete = plotly.colors.qualitative.Dark24*(len(datasets["X_train"])//batch_size)

fig = go.Figure()
for i in range(0, len(datasets["X_train"]), batch_size * skip):
    df_true = pd.DataFrame(datasets["y_train"][i : i + batch_size][:, 0, 0])
    for j in range(nf_steps):
        df_pred = pd.DataFrame(model.predict(datasets["X_train"][i : i + batch_size], verbose=0)[:, j, 0])
        # df_pred_t2 = pd.DataFrame(model.predict(datasets["X_test"][i : i + batch_size], verbose=0)[:, 1, 0])
        # print(datasets["X_train"][i])
        fig.add_trace(
            go.Scatter(
                x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
                y=df_pred[0].values*(VOLTAGE_MAX - VOLTAGE_MIN) + VOLTAGE_MIN,
                showlegend=True,
                mode="markers",
                name = f"Predicted Curve t %d {i+1}, %f"%(j, datasets["X_train"][i][0, -1]),
                marker_color=pallete[i//skip]
            )
        )
       
    fig.add_trace(
        go.Scatter(
            x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
            y=df_true[0].values*(VOLTAGE_MAX - VOLTAGE_MIN) + VOLTAGE_MIN,
            showlegend=True,
            mode="lines+markers",
            name = f"True Curve {i+1}",
            line_color=pallete[i//skip]
        )
    )

fig.update_yaxes(title="Voltage [V]")
fig.update_xaxes(title="State of Charge (Normalized Capacity)")
fig.update_layout(height=500)

## Parity Plot of Test Error

In [18]:
skip = 20

fig = go.Figure()
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], showlegend=False, mode="markers+lines"))
for i in range(0, len(datasets["X_test"]), batch_size * skip):
    df_pred = pd.DataFrame(model.predict(datasets["X_test"][i : i + batch_size], verbose=0)[:, :, 0])
    df_train = pd.DataFrame(datasets["y_test"][i : i + batch_size][:, :, 0])
    fig.add_trace(
        go.Scatter(
            x=df_pred[0].values,
            y=df_train[0].values,
            showlegend=False,
            mode="markers+lines",
        )
    )

fig.update_yaxes(title="Normalized Voltage Target")
fig.update_xaxes(title="Normalized Voltage Prediction")
fig.update_layout(height=500)

In [19]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], showlegend=False, mode="markers+lines"))
for i in range(0, len(datasets["X_test"]), batch_size * skip):
    df_pred = pd.DataFrame(model.predict(datasets["X_test"][i : i + batch_size], verbose=0)[:, :, 1])
    df_train = pd.DataFrame(datasets["y_test"][i : i + batch_size][:, :, 1])
    fig.add_trace(
        go.Scatter(
            x=df_pred[0].values,
            y=df_train[0].values,
            showlegend=False,
            mode="markers+lines",
        )
    )

fig.update_yaxes(title="Normalized Temperature Target")
fig.update_xaxes(title="Normalized Temperature Prediction")
fig.update_layout(height=500)

## True vs Predicted Traces (Test Set)

In [32]:
skip = 20

pallete = plotly.colors.qualitative.Dark24*4

fig = go.Figure()
for i in range(0, batch_size*skip, batch_size * skip):
    df_true = pd.DataFrame(datasets["y_test"][i : i + batch_size][:, 0, 0])
    for j in range(nf_steps):
        df_pred = pd.DataFrame(model.predict(datasets["X_test"][i : i + batch_size], verbose=0)[:, j, 0])
        x = model.predict(datasets["X_test"][i : i + batch_size], verbose=0)
        fig.add_trace(
            go.Scatter(
                x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
                y=df_pred[0].values*(VOLTAGE_MAX - VOLTAGE_MIN) + VOLTAGE_MIN,
                showlegend=True,
                mode="markers",
                name = f"Predicted Curve t %d {i+1}"%j,
                marker_color=pallete[j]
            )
        )
    fig.add_trace(
        go.Scatter(
            x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
            y=df_true[0].values*(VOLTAGE_MAX - VOLTAGE_MIN) + VOLTAGE_MIN,
            showlegend=True,
            mode="lines+markers",
            name = f"True Curve {i+1}",
            line_color=pallete[-1]
        )
    )

fig.update_yaxes(title="Voltage [V]")
fig.update_xaxes(title="State of Charge (Normalized Capacity)")
fig.update_layout(height=500)

In [21]:
# from tensorflow import keras
# model = keras.models.load_model("/Users/mac/Desktop/CMU/10701MachineLearning/project/10701-mvf-bto/pre_train/midterm/Dh6p3ID40")

In [33]:
skip = 20

pallete = plotly.colors.qualitative.Dark24*4

fig = go.Figure()
for i in range(0, 0+batch_size*skip, batch_size * skip):
    df_true = pd.DataFrame(datasets["y_test"][i : i + batch_size][:, 0, 1])
    for j in range(nf_steps):
        df_pred = pd.DataFrame(model.predict(datasets["X_test"][i : i + batch_size], verbose=0)[:, j, 1])
        x = model.predict(datasets["X_test"][i : i + batch_size], verbose=0)
        fig.add_trace(
            go.Scatter(
                x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
                y=df_pred[0].values*(TEMPERATURE_MAX - TEMPERATURE_MIN) + TEMPERATURE_MIN,
                showlegend=True,
                mode="markers",
                name = f"Predicted Curve t %d {i+1}"%j,
                marker_color=pallete[j]
            )
        )
    fig.add_trace(
        go.Scatter(
            x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
            y=df_true[0].values*(TEMPERATURE_MAX-TEMPERATURE_MIN) + TEMPERATURE_MIN,
            showlegend=True,
            mode="lines+markers",
            name = f"True Curve {i+1}",
            line_color=pallete[-1]
        )
    )

fig.update_yaxes(title="Voltage [V]")
fig.update_xaxes(title="State of Charge (Normalized Capacity)")
fig.update_layout(height=500)

In [34]:
skip = 10
print(batch_size*skip)
print(len(datasets["X_test"])//skip)
pallete = plotly.colors.qualitative.Dark24*(len(datasets["X_test"])//skip)
fig = go.Figure()
for i in range(0, len(datasets["X_test"]), batch_size * skip):
    df_true = pd.DataFrame(datasets["y_test"][i : i + batch_size][:, 0, 1])
    for j in range(nf_steps):
        df_pred = pd.DataFrame(model.predict(datasets["X_test"][i : i + batch_size], verbose=0)[:, j, 1])
        fig.add_trace(
            go.Scatter(
                x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
                y=df_pred[0].values*(TEMPERATURE_MAX - TEMPERATURE_MIN) + TEMPERATURE_MIN,
                showlegend=True,
                mode="markers",
                name = f"Predicted Curve t%d {i+1}"%j,
                marker_color=pallete[int((1.0)*(i//skip))]
            )
        )
    
    fig.add_trace(
        go.Scatter(
            x=REFERENCE_DISCHARGE_CAPACITIES[window_length:-nf_steps],
            y=df_true[0].values*(TEMPERATURE_MAX - TEMPERATURE_MIN) + TEMPERATURE_MIN,
            showlegend=True,
            mode="lines+markers",
            name = f"True Curve {i+1}",
            line_color=pallete[i//skip]
        )
    )

fig.update_yaxes(title="Temperature [°C]")
fig.update_xaxes(title="State of Charge (Normalized Capacity)")
fig.update_layout(height=500)

130
1589


In [35]:
# Explanation on model name:
# First part: D stands for discharge, C stands for charge
# Second part: h stands for history, e.g., h6 means a history window of 6
# Third part: p stands for prediction, e.g., p3 means predicting 3 time steps into the future
# Fourth part: I stands for interpolation points, e.g., ID20 means using discharge interpolation with 20 points
# Fifth part: e stands for epoch, 250+early_stopping if not specified, e.g., e50 means 50 epochs trained
model.save("/Users/mac/Desktop/CMU/10701MachineLearning/project/10701-mvf-bto/pre_train/midterm/Dh6p3ID20e20")



INFO:tensorflow:Assets written to: /Users/mac/Desktop/CMU/10701MachineLearning/project/10701-mvf-bto/pre_train/midterm/Dh6p3ID20e20/assets


INFO:tensorflow:Assets written to: /Users/mac/Desktop/CMU/10701MachineLearning/project/10701-mvf-bto/pre_train/midterm/Dh6p3ID20e20/assets


In [33]:
# model.save("/Users/mac/Desktop/CMU/10701MachineLearning/project/10701-mvf-bto/pre_train/midterm/Dh6p3ID20/Dh6p3ID20.h5")