In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor, Lambda
from dataclasses import dataclass

In [None]:
import path_helper
import numpy as np
import pandas as pd
from battery_plot import *

In [None]:
# Get cpu or gpu device for training.
device = torch.device("mps")
print(f"Using {device} device")

# TOC

* [Data Loading](#dload)

* [Models](#model)

In [None]:
@dataclass
class G:
    split_time = None #for now, might be useless
    sequence_length = 5
    num_features = 3 # delta_t, current, voltage
    window_size = 300
    batch_size = 64
    epochs = 2

# Data Loader <a id="dload"></a>

In [None]:
file = pd.read_csv("/Users/attar/Desktop/Python/coop/ocv/JMFM_17_SOC_OCV_94plus5_Test_220624_soc.csv")

In [None]:
data_plot(data = [file],
          title="OCV v SOC",
          x = ["test time (sec)"],
          y = ["soc"],
          markers = "lines"
         )

In [None]:
mask = file["test time (sec)"].diff()
mask.iloc[0],mask.iloc[-1] = 1.0,1.0
file["delta t"] = mask

In [None]:
train = file[["delta t","current","voltage","soc" ]].loc[:111498]

test = file[["delta t","current","voltage","soc" ]].loc[111499:]

In [None]:
# np.array_split(train,len(train)//6) not applicable here

In [None]:
class BatterySet(Dataset):
    def __init__(self, dataframe):
        
        x, y = self.rolling(dataframe, G.window_size)
    
        self.logits = torch.from_numpy(x).to(device)
        self.labels = torch.from_numpy(y).to(device)
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (self.logits[idx], self.labels[idx])
    
    def rolling(self, df, window_size):
        '''implements rolling window sectioning'''

        df_x = [window.values
                for window
                in df[["delta t","current","voltage"]]
                    .rolling(window = window_size,
                            min_periods=window_size-2,
                            method = "table"
                            )][window_size:]

        df_y = [window.values
                for window
                in df["soc"]
                    .rolling(window = window_size,
                            min_periods=window_size-2,
                            method = "single"
                            )][window_size:]

        return np.array(df_x, dtype="float32"), np.array(df_y, dtype="float32")


In [None]:
train_dataloader = BatterySet(train)
test_dataloader = BatterySet(test)

In [None]:
train_dataloader = DataLoader(train_dataloader, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataloader, batch_size=64)

In [None]:
test_dataloader.dataset?

In [None]:
for X,y in train_dataloader.dataset:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

In [None]:
for batch, (x,y) in enumerate(test_dataloader.dataset):
    print(batch,x,y)
    break

# Creating Models <a id="model"></a>

In [None]:
nn.LSTM?

In [None]:
nn.BatchNorm1d?

In [None]:
# # Get cpu or gpu device for training.
# device = torch.device("mps")
# print(f"Using {device} device")

# Define model
class LSTMNetwork(nn.Module):
    
    def __init__(self):
        super(LSTMNetwork, self).__init__()
        
        self.flatten = nn.Flatten()
        self.lstm1 = nn.LSTM(G.num_features, G.batch_size, 1, batch_first = True)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(G.batch_size, G.batch_size // 4),
            nn.ReLU(),
            nn.Linear(G.batch_size // 4, 1)
            )
#         self.batch_norm = nn.BatchNorm1d(64)
        
#     def l2_normalize(self, x, dim = 1):
#         "apparently weight decay in the optimize functions does l2 regularization"
#         return nn.functional.normalize(x, p = 2.0 , dim = dim)
    
    def forward(self, x):
#         x = self.flatten(x)
        x_out, x_states = self.lstm1(x)
#         x_out = self.batch_norm(x_out)
        logits = self.linear_relu_stack(x_out)
        return logits

model = LSTMNetwork().to(device)
print(model)

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)

    for batch, (x,y) in enumerate(dataloader.dataset):
        #forward
        predict = model(x)
        loss = loss_fn(predict.reshape(y.shape), y)
        
        #backward
        optimizer.zero_grad() #resets the gradient graph, a pytorch shortcoming that is required
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(x)
            print(f"loss: {loss:>7f}  [{current:5d}/{size:5d}]")

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader.dataset)
    test_loss, correct = 0,0
    
    with torch.no_grad(): #doesnt update parameters (we are testing not training)
        for x,y in dataloader.dataset:
            predict = model(x)
            test_loss += loss_fn(predict.reshape(predict.shape[:2]), y).item()
            correct+= (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \nAccuracy: {100*correct:>0.1f}%, Avg Loss: {test_loss:>8f}\n")

In [None]:
loss_fn = nn.HuberLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr = 0.2,
                             weight_decay=1e-5)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor = 0.2, patience = 7, cooldown = 1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.2, last_epoch=- 1, verbose=False)

In [None]:
for epoch in range(G.epochs):
    print(f"Epoch {epoch+1}\n----------------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Completed")