# Testing Branch

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp /content/drive/MyDrive/pytorch_colab/rolling_and_plot.py .
!cp /content/drive/MyDrive/pytorch_colab/sim_data.csv .

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader #, Dataset
# from torch.nn.modules.activation import Sigmoid

# from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler
# from sklearn.model_selection import train_test_split

from dataclasses import dataclass

In [None]:
import numpy as np
import pandas as pd
!pip install jupyterplot
from jupyterplot import ProgressPlot as PP

from rolling_and_plot import data_plot, normalize, rolling_split, validate
from lstm_classes import *

%reload_ext autoreload
%autoreload 2

In [None]:
# Get cpu or gpu device for training.
device = torch.device("cpu")
print(f"Using {device} device")

# TOC

* [Preprocessing](#pre)

* [Data Loading](#dload)

* [Models](#model)

* [Training](#train)

* [Validate](#val)

<a id="pre"></a>
# PreProcessing 

## <a id="g">G class</a>

In [None]:
@dataclass
class G:
    capacity = 20 #Ampere hours
    num_features = 3 # current, voltage, soc
    lstm_nodes = 256
    window_time = 64 #seconds
    window_size = 16
    slicing = window_time // window_size
    batch_size = 16
    epochs = 128 # should use a power of `T_mult` if you're using cosine annealing, because the cycles restart on a power of `T_mult`
    learning_rate = 0.0035
    weight_decay = 0 # Do not implement weight decay alongside batch normalization

In [None]:
file = pd.read_csv("/content/sim_data.csv")
file["soc"] *= 100 #if sim_data.csv only

In [None]:
data_plot(data = [file],
          title="OCV v SOC",
          x = ["test time (sec)"],
          y = ["soc"],
          markers = "lines",
          color = "darkorchid",
          x_title = "Test Time (sec)",
          y_title = "SOC"
         )

In [None]:
file = normalize(file.loc[:,["current","voltage","soc"]].iloc[::G.slicing], G.capacity)

In [None]:
x_train, x_test, y_train, y_test = rolling_split(file, G.window_size)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

# Data Loader <a id="dload"></a>

In [None]:
train_dataloader = BatterySet(x_train, y_train)
test_dataloader = BatterySet(x_test, y_test)

train_dataloader = DataLoader(train_dataloader, batch_size=G.batch_size, shuffle=False, drop_last = True)
test_dataloader = DataLoader(test_dataloader, batch_size=G.batch_size, shuffle=False, drop_last = True)

In [None]:
for X,y in train_dataloader:
    print(f"Shape of X [window, features]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

In [None]:
# for batch, (x,y) in enumerate(test_dataloader.dataset):
#     print(batch,x,y)
#     break

# Creating Models <a id="model"></a>

Go to [G class](#g)

Can load a pretrained model, the cell is after the optimizer cell.<br>You need to run the cell right below this first though.

In [None]:
model = LSTMNetwork().to(device)
compiled_model = torch.compile(model)

`torch.compile` introduced in the nightly release (makes the model much more efficient). Can use additional arguments to either reduce overhead (and increase memory usage) or make the model as efficient as possible (takes a lot longer to compile).

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer, epoch):
    size = len(dataloader)
    train_loss, perc_error = 0.0, 0.0
    model.train()
    for batch, (x,y) in enumerate(dataloader):
        optimizer.zero_grad() #resets the gradient graph
        
        #forward
        predict = model(x)
        loss = loss_fn(predict, y).mean(0) # assert(loss.shape == (1))

        #backward
        loss.backward()

        train_loss += loss.item()

        optimizer.step()
        ##### For OneCycleLR:
        scheduler.step()
        ##### For CosineAnnealingWarmRestarts:
        # scheduler.step(epoch + (batch+1) // size)

        if loss.isnan():
            print("loss was NaN")
            break

        if batch % (size // 3) == 0:
            print(f"batch mean loss: {loss.item():>7f}  [{batch:4d}/{size:4d}]")

        with torch.no_grad(): #used to check bias and variance by comparing with test set
            perc_error += torch.mean(torch.abs(predict - y) / (y+ 1e-2) * 100, (0,1))
        
    train_loss /= size
    perc_error /= size
    print(f"Train Error: \nAverage Accuracy: {100 - perc_error}%, Avg Loss: {train_loss:>8f}\n")
    return train_loss, 100.0 - perc_error

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader)
    test_loss, perc_error = 0.0, 0.0
    model.eval()
    with torch.no_grad(): #doesnt update parameters (we are testing not training)
        for counter, (x,y) in enumerate(dataloader):
            predict = model(x).reshape(y.shape)
            test_loss += loss_fn(predict, y).mean(0).item()
            perc_error += torch.mean(torch.abs(predict - y) / (y+ 1e-2) * 100, (0,1))
           
            counter += 1
            if counter % (size // 2) == 0:
                print(f"{counter} / {size} tested")

    test_loss /= size
    perc_error /= size
    print(f"Test Error: \nAverage Accuracy: {100 - perc_error}%, Avg Loss: {test_loss:>8f}\n")
    return test_loss, 100.0 - perc_error

**Literature**

*Loss*<br>
The LogCoshLoss is the Loss function used by Hannan et al. in their article in the Journal *Nature*: [Deep learning approach towards accurate state of charge estimation for lithium-ion batteries using self-supervised transformer model](https://www.nature.com/articles/s41598-021-98915-8).

However they used a Transformer Network

*Learning Rate*<br>
The OneCycle learning rate scheduler with cosine annealing introduced by Leslie N. Smith in his paper [A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch size, momentum, and weight decay](https://doi.org/10.48550/arXiv.1803.09820), seems to be the best scheduler according to Fast.AI

 - The original 3-phase approach seems to work significantly better than the 2-phase method by Fast.AI

Cosine annealing with warm restarts proposed by Loshchilov et al. in [SGDR: Stochastic Gradient Descent with Warm Restarts](https://doi.org/10.48550/arXiv.1608.03983)

In [None]:
# loss_fn = nn.HuberLoss()
loss_fn = LogCoshLoss()

optimizer = torch.optim.Adam(compiled_model.parameters(),
                             lr = G.learning_rate,
                             weight_decay= G.weight_decay
                            )

#OneCycle scheduler needs step() to be called after every batch
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,
                                                G.learning_rate, #max_lr
                                                epochs = G.epochs,
                                                steps_per_epoch = len(train_dataloader),
                                                anneal_strategy = "cos", #cosine annealing
                                                div_factor = 35,
                                                three_phase = True,
                                                verbose = False
                                                )

#CosineAnnealing with WarmRestarts, step() can be called after every batch but it is dependent of epoch:
# scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,
#                                                                  T_0 = 1,
#                                                                  T_mult = 4,
#                                                                  eta_min = 7e-11,
#                                                                  verbose = False)

In [None]:
model.load_state_dict(
    torch.load("drive/MyDrive/pytorch_colab/sim_model_state_dict.pth",
    map_location = device)
    )
model.train()

# Training <a id="train"></a>

In [None]:
pp = PP(plot_names = ["Mean Log Loss", "% Accuracy"],
        line_names = ["Train Loop", "Test Loop"],
        x_label = "epochs"
       )

for epoch in range(1, G.epochs + 1):
    print(f"Epoch {epoch}/{G.epochs}\n----------------------------------------")
    train_loss, train_acc = train_loop(train_dataloader, compiled_model, loss_fn, optimizer, epoch)
    test_loss, test_acc = test_loop(test_dataloader, compiled_model, loss_fn)
    
    pp.update([[train_loss, test_loss], [train_acc, test_acc]])

    # if (epoch != 0) and (epoch % 50 == 0):
    #     torch.save(model.state_dict(), "drive/MyDrive/pytorch_colab/model.pth")
    #     print("Saved the model parameters\n")

print("Completed")

In [None]:
torch.save(compiled_model.state_dict(), "drive/MyDrive/pytorch_colab/sim_model_state_dict.pth")

# Validation <a id="val"></a>

**Dev Set**

In [None]:
visualize_dev = validate(compiled_model, test_dataloader, dev = True)

**Entire Dataset**

In [None]:
x_set, y_set = rolling_split(file, G.window_size, train = False)

set_dataloader = BatterySet(x_set, y_set)
set_dataloader = DataLoader(set_dataloader, batch_size=G.batch_size, shuffle=False, drop_last = True)

visualize = validate(compiled_model, set_dataloader, dev = False)