<a name='0'></a>
## Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp /content/drive/MyDrive/transformer_soc/rolling_and_plot_dc.py .
!cp /content/drive/MyDrive/transformer_soc/sim_data.csv .
!cp /content/drive/MyDrive/transformer_soc/transformer_helper_dc.py .

In [None]:
# from os import environ
# environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
# # removes tensorflow warnings triggered because of Tensorflow incompatibility with my Apple M1 chip.
# # ignore this when using a non Apple Silicon device, ie. Google Colab or the likes.

In [None]:
import numpy as np
import pandas as pd
# !pip install jupyterplot
from jupyterplot import ProgressPlot as PP

from global_dataclass import G
from battery_transformer import *
from transformer_helper_dc import *
from rolling_and_plot_dc import data_plot, rolling_split, normalize, validate

%reload_ext autoreload
%autoreload 2

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

## Table of Contents

- [Import](#0)
- [Preprocessing](#win)
- [Model](#model)
- [Learning Rate Scheduler](#loss)
- [Training](#train)
- [Validate](#val)

**Literature:**


According to [A Transformer-based Framework for Multivariate Time Series Representation Learning](https://dl.acm.org/doi/abs/10.1145/3447548.3467401):
Using **Batch Normalization is significantly more effective** for multivariate time-series than using the traditional Layer Normalization method found in NLP.

In addition, according to [Deep learning approach towards accurate state of charge estimation for lithium-ion batteries using self-supervised transformer model](https://www.nature.com/articles/s41598-021-98915-8#Sec9):
Using a transformer network while **forgoing the Decoder Layer** is more effective for the application of State-of-Charge estimation.

**Self-Attention**
$$
\text { Attention }(Q, K, V)=\operatorname{softmax}\left(\frac{Q K^{T}}{\sqrt{d_{k}}}+{M}\right) V
$$


**INPUT:** Voltage, Current, SOC at times:
$$t - window\_size - 1 \rightarrow t - 1 $$

**Note**

Cannot use embedding layers with battery data because of floating point values and negative values

<a id="win"></a>
# Preprocessing

In [None]:
# from google.colab import files
# file = pd.read_csv("/content/sim_data.csv")
#if using sim_data.csv:
file["soc"] *= 100.0

In [None]:
data_plot(data = [file],
          title="OCV v SOC",
          x = ["test time (sec)"],
          y = ["soc"],
          markers = "lines",
          color = "darkorchid",
          x_title = "Test Time (sec)",
          y_title = "SOC"
         )

In [None]:
file = normalize(file.loc[:,["current","voltage","soc"]].iloc[::G.slicing], G.capacity)
#uses sklearn.preprocessing

In [None]:
x_train, x_test, y_train, y_test = rolling_split(file, G.window_size, G.tgt_len, train=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
#uses sklearn.model_selection

train_dataloader = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(G.batch_size, drop_remainder=True)
test_dataloader = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(G.batch_size, drop_remainder=True)

In [None]:
for x, y in train_dataloader:
    print(f"Shape of X [window, features]: {x.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

<a id ="model"></a>
# Model

Build Model

In [None]:
tf.keras.backend.clear_session()
model = Transformer()
model.build((G.batch_size, G.window_size, G.num_features))
model.summary(expand_nested=True)

**Loading Already Saved Progress**

In [None]:
model.load_weights("/content/drive/MyDrive/transformer_soc/decoder/model_weights.tf")

# scheduler_state = np.load("/content/drive/MyDrive/transformer_soc/decoder/scheduler_state.npy")
# print(f"Saved learning_rate, T_cur, and T_i: {scheduler_state}")

# try:
#     G.learning_rate, G.T_cur, G.T_i = scheduler_state
# except NameError:
#     "global_dataclass.py has not been imported"

<a id = "loss"></a>
# LR Scheduler

**Learning Rate Scheduler**

Cosine Annealing with Warm Restarts proposed by Loshchilov et al. in [SGDR: Stochastic Gradient Descent with Warm Restarts](https://doi.org/10.48550/arXiv.1608.03983)

$$\mu_t = \mu_{min} + \frac{1}{2}(\mu_{max} - \mu_{min})\cdot (1 + \cos (\frac{T_{cur}}{T_i}\pi))$$

Where:
 - $\mu$ is the learning_rate, subscript $t$ is for time = $t$
 - $T_{cur}$ is the number of epochs since the last restart
 - $T_i$ is the number of epochs between two restarts

Note:
 - When $T_{cur} = T_i \rightarrow \mu_t = \mu_{min}$
 - When $T_{cur} = 0 \rightarrow \mu_t = \mu_{max}$

In [None]:
def schedule(batch, logs):
        '''
        This is a dummy function for the LearningRateScheduler Class
        Returns a new learning rate based on the schedule described below
        Call after every batch
        '''
        mu_i = G.min_learning_rate + 0.5 * (
                G.learning_rate - G.min_learning_rate) * (
                    1 + tf.math.cos(np.pi * G.T_cur / G.T_i))
        
        G.T_cur += G.batch_size / len(x_train)
        if np.isclose(G.T_cur, G.T_i):
            G.T_i *= G.T_mult
            G.T_cur = 0.0
        K.set_value(model.optimizer.learning_rate, mu_i)

### Save Model Progress Callbacks

In [None]:
# class SaveModel(tf.keras.callbacks.Callback):
#     def on_epoch_end(self, epoch, logs = None):
#         if epoch != 0 and epoch % 15 == 0:
#             self.model.save_weights("/content/drive/MyDrive/transformer_soc/model_weights.h5")

# model_save = SaveModel() #This is optional

### Early Stopping and Saving Best Model checkpoint Callbacks

In [None]:
# model_options = tf.saved_model.SaveOptions(experimental_io_device="/job:localhost")
# earlystopping = EarlyStopping(monitor='val_mean_absolute_percentage_error',
#                               patience=150,
#                               verbose=0,
#                               mode='min')
# mcp_save = ModelCheckpoint('/content/drive/MyDrive/transformer_soc/decoder/model_weights',
#                            save_format = "tf",
#                            save_best_only=True,
#                            monitor='val_mean_absolute_percentage_error',
#                            mode='min')
# #                            options = model_options)

**ProgressPlot Callback**

In [None]:
class ProgressCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs = None):
        train_loss = logs["loss"]
        train_acc = 100.0 - logs["mean_absolute_percentage_error"]
        pp.update([[train_loss],
                   [train_acc]])

<a id ="train"></a>

# Training 
reset the cos_anneal scheduler $\downarrow$

In [None]:
G.T_i = 1
G.T_mult = 2
G.T_cur = 0.0

`model.fit()` and `model.compile()` asset declaration $\downarrow$

In [None]:
loss_object = tf.keras.losses.LogCosh()

optimizer = tf.keras.optimizers.Adam(learning_rate = G.learning_rate,
                                     beta_1 = 0.9,
                                     beta_2 = 0.999
                                    )

#cos_anneal is for the model.fit() call
cos_anneal = tf.keras.callbacks.LambdaCallback(on_batch_end = schedule)

#progress plot callback
pp_update = ProgressCallback()

**Note:** can add `model_save` or `mcp_save` to the `callbacks` argument in `model.fit()`
it saves the model params, or saves model checkpoints to the google drive, respectively.
there is also the earlystopping callback but don't worry about it

In [None]:
#only run this cell once per model, notebook will crash if you compile an already compiled model
model.compile(optimizer, loss_object, metrics=["mean_absolute_percentage_error"])

In [None]:
pp = PP(plot_names = ["Mean Log Loss", "% Accuracy"],
        line_names = ["Train Loop"],
        x_label = "epochs"
       )
# Dont compile after training, it causes issues.
history = model.fit(train_dataloader,
                    epochs = 4,
                    verbose = 1,
                    callbacks = [cos_anneal, pp_update]
                    )

In [None]:
model.evaluate(test_dataloader,verbose = 1)

<a id = "val"></a>
# Validate

**Dev Set**

In [None]:
visualize_dev = validate(model, test_dataloader, dev = True)

**Entire Dataset**

In [None]:
x_set, y_set = rolling_split(file, G.window_size, G.tgt_len, train = False)

set_dataloader = tf.data.Dataset.from_tensor_slices((x_set, y_set)).batch(G.batch_size, drop_remainder=True)
for x,y in set_dataloader:
    print(x.shape, y.shape)
    break

visualize = validate(model, set_dataloader, dev = False)