In [1]:
import os


import torch

from datasets.LGDataset import LGDataset, get_dataloader

In [2]:
PROJECT_DIR = os.path.dirname(os.path.abspath(''))
DATA_DIR = 'data'

BATCH_SIZE = 2048

train_dataset = LGDataset(os.path.join(PROJECT_DIR, DATA_DIR, 'train.csv'))
val_dataset = LGDataset(os.path.join(PROJECT_DIR, DATA_DIR, "valid.csv"))

train_loader = get_dataloader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = get_dataloader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

x,y = next(iter(train_loader))
print(x.shape, y.shape)

torch.Size([2048, 5]) torch.Size([2048])


In [3]:
from models.soc_estimator import SoCEstimator

model = SoCEstimator(
    input_size=x.shape[1], 
    num_layers=5, 
    hidden_size=8, 
    is_residual=True
)

In [4]:
# train the model with pytorch lightning
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

# track the experiment
logger = TensorBoardLogger('logs', name='soc-estimator')

early_stop_callback = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=True,
    mode='min'
)

trainer = pl.Trainer(max_epochs=100, accelerator="auto", logger=logger, callbacks=[early_stop_callback])
trainer.fit(model, train_loader, val_loader)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type    | Params | Mode 
----------------------------------------------
0 | model     | Model   | 1.7 K  | train
1 | criterion | MSELoss | 0      | train
----------------------------------------------
1.7 K     Trainable params
0         Non-trainable params
1.7 K     Total params
0.007     Total estimated model params size

Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 12.45it/s]

/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


                                                                           

/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 328/328 [00:05<00:00, 61.55it/s, v_num=6]

Metric val_loss improved. New best score: 0.001


Epoch 1: 100%|██████████| 328/328 [00:05<00:00, 60.47it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.001


Epoch 3: 100%|██████████| 328/328 [00:05<00:00, 58.84it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.001


Epoch 4: 100%|██████████| 328/328 [00:05<00:00, 61.49it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.001


Epoch 5: 100%|██████████| 328/328 [00:05<00:00, 60.73it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 6: 100%|██████████| 328/328 [00:05<00:00, 61.56it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 11: 100%|██████████| 328/328 [00:05<00:00, 59.67it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 13: 100%|██████████| 328/328 [00:05<00:00, 57.57it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 18: 100%|██████████| 328/328 [00:05<00:00, 56.40it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 25: 100%|██████████| 328/328 [00:05<00:00, 58.02it/s, v_num=6]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 35: 100%|██████████| 328/328 [00:05<00:00, 58.13it/s, v_num=6]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.000. Signaling Trainer to stop.


Epoch 35: 100%|██████████| 328/328 [00:05<00:00, 58.04it/s, v_num=6]


In [15]:
model = SoCEstimator.load_from_checkpoint(
    "/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/src/logs/soc-estimator/version_5/checkpoints/epoch=36-step=12136.ckpt",
    input_size=x.shape[1],
    num_layers=5,
    hidden_size=8,
    is_residual=True,
)

In [19]:
model.model.load_state_dict(torch.load("../checkpoint.pt"))

<All keys matched successfully>

In [20]:
# test the model
trainer.test(model, val_loader)

# calculate rmse, mse, mae, maxe
from sklearn.metrics import mean_squared_error, mean_absolute_error, max_error
import numpy as np

model.eval()
y_pred = []
y_true = []
for x, y in val_loader:
    with torch.no_grad():
        y_pred.append(model(x).numpy())
        y_true.append(y.numpy())

y_pred = np.concatenate(y_pred)
y_true = np.concatenate(y_true)

rmse = np.sqrt(mean_squared_error(y_true, y_pred))*100
mse = mean_squared_error(y_true, y_pred)*100
mae = mean_absolute_error(y_true, y_pred)*100
maxe = max_error(y_true, y_pred)*100

print(f'RMSE: {rmse}, MSE: {mse}, MAE: {mae}, MAXE: {maxe}')
# # save the model
# torch.save(model.state_dict(), 'soc-estimator.pth')

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 20/20 [00:00<00:00, 105.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss         0.00011690160317812115
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
RMSE: 1.081211355975217, MSE: 0.011690179962897673, MAE: 0.7731814403086901, MAXE: 5.402797698974609


In [21]:
from utils import count_parameters

count_parameters(model)

1713

In [22]:
from torch import nn

# test model
model.to("cuda")
test_loss = 0
test_dataset = LGDataset(os.path.join(
    os.path.join(PROJECT_DIR, DATA_DIR), 
    "04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv"
))
test_loader = get_dataloader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# test the model
trainer.test(model, test_loader)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0:   0%|          | 0/24 [00:00<?, ?it/s]

Testing DataLoader 0: 100%|██████████| 24/24 [00:00<00:00, 73.67it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss         0.00012307759607210755
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.00012307759607210755}]

In [23]:
from typing import List
def test_model_on_multiple_temps(model, batch_size, temp_files:List[str])->None:

    # predefine the collective preds and labels placeholder
    y_preds_all = []
    y_trues_all = []

    model.cuda()

    # evaluate each file
    for file in temp_files:
        test_dataset = LGDataset(os.path.join(
        os.path.join(PROJECT_DIR, DATA_DIR), 
            "04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv"
         ))
        test_loader = get_dataloader(test_dataset, batch_size=batch_size, shuffle=False)

        y_preds = []
        y_trues = []
        # evaluate preds
        for x, y in test_loader:
            y_preds.append(model(x.to("cuda")).detach().cpu().numpy())
            y_trues.append(y.numpy())

        # concatenate all batches
        y_preds = np.concatenate(y_preds)
        y_trues = np.concatenate(y_trues)

        y_preds_all+=y_preds.tolist()
        y_trues_all+=y_trues.tolist()

        # evaluate model performance
        mse = np.mean((y_preds - y_trues) ** 2)
        rmse = np.sqrt(mse)
        mae = np.mean(np.abs(y_preds - y_trues))
        maxe = np.max(np.abs(y_preds - y_trues))
        print('File:', file)
        print(
            f"MSE%: {mse*100:.3f}, RMSE%: {rmse*100:.3f}, MAE%: {mae*100:.3f}, MAXE%: {maxe*100:.3f}"
        )

    print('Overall--')
    # evaluate model performance
    mse = np.mean((y_preds_all - y_trues_all) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_preds_all - y_trues_all))
    maxe = np.max(np.abs(y_preds_all - y_trues_all))
    print('File:', file)
    print(
        f"MSE%: {mse*100:.3f}, RMSE%: {rmse*100:.3f}, MAE%: {mae*100:.3f}, MAXE%: {maxe*100:.3f}"
    )

In [24]:
from typing import List


def test_model_on_multiple_temps(model, batch_size, temp_files: List[str]) -> None:

    # predefine the collective preds and labels placeholder
    y_preds_all = []
    y_trues_all = []

    model.cuda()

    # evaluate each file
    for file in temp_files:
        test_dataset = LGDataset(
            os.path.join(
                os.path.join(PROJECT_DIR, DATA_DIR),
                file,
            )
        )
        test_loader = get_dataloader(test_dataset, batch_size=batch_size, shuffle=False)

        y_preds = []
        y_trues = []
        # evaluate preds
        for x, y in test_loader:
            y_preds.append(model(x.to("cuda")).detach().cpu().numpy())
            y_trues.append(y.numpy())

        # concatenate all batches
        y_preds = np.concatenate(y_preds)
        y_trues = np.concatenate(y_trues)

        y_preds_all += y_preds.tolist()
        y_trues_all += y_trues.tolist()

        # evaluate model performance
        mse = np.mean((y_preds - y_trues) ** 2)
        rmse = np.sqrt(mse)
        mae = np.mean(np.abs(y_preds - y_trues))
        maxe = np.max(np.abs(y_preds - y_trues))
        print("File:", file)
        print(
            f"-> MSE%: {mse*100:.3f}, RMSE%: {rmse*100:.3f}, MAE%: {mae*100:.3f}, MAXE%: {maxe*100:.3f}"
        )
        print()
    
    # Evaluate overall performance
    print("Overall--")
    # evaluate model performance
    y_preds_all = np.array(y_preds_all)
    y_trues_all = np.array(y_trues_all)

    mse = np.mean((y_preds_all - y_trues_all) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_preds_all - y_trues_all))
    maxe = np.max(np.abs(y_preds_all - y_trues_all))
    print("File:", file)
    print(
        f"MSE%: {mse*100:.3f}, RMSE%: {rmse*100:.3f}, MAE%: {mae*100:.3f}, MAXE%: {maxe*100:.3f}"
    )

In [25]:
test_model_on_multiple_temps(
    model,
    batch_size=BATCH_SIZE,
    temp_files=[
        "01_TEST_LGHG2@n10degC_Norm_(05_Inputs).csv",
        "02_TEST_LGHG2@0degC_Norm_(05_Inputs).csv",
        "03_TEST_LGHG2@10degC_Norm_(05_Inputs).csv",
        "04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv",
    ],
)

File: 01_TEST_LGHG2@n10degC_Norm_(05_Inputs).csv
-> MSE%: 0.012, RMSE%: 1.081, MAE%: 0.773, MAXE%: 5.403

File: 02_TEST_LGHG2@0degC_Norm_(05_Inputs).csv
-> MSE%: 0.015, RMSE%: 1.226, MAE%: 0.788, MAXE%: 7.316

File: 03_TEST_LGHG2@10degC_Norm_(05_Inputs).csv
-> MSE%: 0.022, RMSE%: 1.472, MAE%: 0.947, MAXE%: 7.085

File: 04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv
-> MSE%: 0.012, RMSE%: 1.109, MAE%: 0.683, MAXE%: 9.750

Overall--
File: 04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv
MSE%: 0.015, RMSE%: 1.234, MAE%: 0.796, MAXE%: 9.750


In [None]:
#@TODO Structure the files
#@TODO Create scripts for downloading the dataset
#@TODO Create a guide for running on arduino
#@TODO Dockerize
#@TODO Push to git