In [1]:
import os


import torch

from datasets.LGDataset import LGDataset, get_dataloader

In [2]:
import yaml as yml

import utils

PROJECT_DIR = os.path.abspath(os.path.dirname(os.path.abspath("")))

config_path = os.path.join(PROJECT_DIR, "src", "config.yml")

with open(config_path, "r") as f:
    config = yml.safe_load(f)

dataset_path = os.path.join(PROJECT_DIR, config.get("lg_dataset_path"))

In [3]:
BATCH_SIZE = 2048

train_dataset = LGDataset(utils.fetch_data_files(dataset_path, "train"))
val_dataset = LGDataset(utils.fetch_data_files(dataset_path, "valid"))

train_loader = get_dataloader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = get_dataloader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

x,y = next(iter(train_loader))
print(x.shape, y.shape)

torch.Size([2048, 5]) torch.Size([2048])


In [4]:
utils.fetch_data_files(dataset_path, "test")

['/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/data/lg/test/02_TEST_LGHG2@0degC_Norm_(05_Inputs).csv',
 '/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/data/lg/test/01_TEST_LGHG2@n10degC_Norm_(05_Inputs).csv',
 '/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/data/lg/test/04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv',
 '/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/data/lg/test/03_TEST_LGHG2@10degC_Norm_(05_Inputs).csv']

In [5]:
from models.soc_estimator import SoCEstimator

model = SoCEstimator(
    input_size=x.shape[1], 
    num_layers=5, 
    hidden_size=8, 
    is_residual=True
)

In [6]:
# train the model with pytorch lightning
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

# track the experiment
logger = TensorBoardLogger('logs', name='soc-estimator')

early_stop_callback = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=True,
    mode='min'
)

trainer = pl.Trainer(max_epochs=100, accelerator="auto", logger=logger, callbacks=[early_stop_callback])
trainer.fit(model, train_loader, val_loader)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type    | Params | Mode 
----------------------------------------------
0 | model     | Model   | 1.7 K  | train
1 | criterion | MSELoss | 0      | train
----------------------------------------------
1.7 K     Trainable params
0         Non-trainable params
1.7 K     Total params
0.007     Total estimated model params size

Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 12.51it/s]

/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


                                                                           

/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 328/328 [00:05<00:00, 60.04it/s, v_num=9]

Metric val_loss improved. New best score: 0.002


Epoch 1: 100%|██████████| 328/328 [00:05<00:00, 63.13it/s, v_num=9]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.001


Epoch 2: 100%|██████████| 328/328 [00:05<00:00, 59.75it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.001


Epoch 3: 100%|██████████| 328/328 [00:05<00:00, 60.79it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.001


Epoch 5: 100%|██████████| 328/328 [00:05<00:00, 60.98it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.001


Epoch 6: 100%|██████████| 328/328 [00:05<00:00, 61.42it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.001


Epoch 7: 100%|██████████| 328/328 [00:05<00:00, 60.80it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.001


Epoch 8: 100%|██████████| 328/328 [00:05<00:00, 60.21it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 10: 100%|██████████| 328/328 [00:05<00:00, 60.93it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 11: 100%|██████████| 328/328 [00:05<00:00, 60.04it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 13: 100%|██████████| 328/328 [00:05<00:00, 62.82it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 14: 100%|██████████| 328/328 [00:05<00:00, 59.67it/s, v_num=9]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.000


Epoch 24: 100%|██████████| 328/328 [00:05<00:00, 62.82it/s, v_num=9]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.000. Signaling Trainer to stop.


Epoch 24: 100%|██████████| 328/328 [00:05<00:00, 62.74it/s, v_num=9]


In [7]:
# model = SoCEstimator.load_from_checkpoint(
#     "/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/src/logs/soc-estimator/version_5/checkpoints/epoch=36-step=12136.ckpt",
#     input_size=x.shape[1],
#     num_layers=5,
#     hidden_size=8,
#     is_residual=True,
# )

In [8]:
model.model.load_state_dict(torch.load("soc-estimator.pt"))

<All keys matched successfully>

In [9]:
# test the model
trainer.test(model, val_loader)

# calculate rmse, mse, mae, maxe
from sklearn.metrics import mean_squared_error, mean_absolute_error, max_error
import numpy as np

model.eval()
y_pred = []
y_true = []
for x, y in val_loader:
    with torch.no_grad():
        y_pred.append(model(x).numpy())
        y_true.append(y.numpy())

y_pred = np.concatenate(y_pred)
y_true = np.concatenate(y_true)

rmse = np.sqrt(mean_squared_error(y_true, y_pred))*100
mse = mean_squared_error(y_true, y_pred)*100
mae = mean_absolute_error(y_true, y_pred)*100
maxe = max_error(y_true, y_pred)*100

print(f'RMSE: {rmse}, MSE: {mse}, MAE: {mae}, MAXE: {maxe}')
# # save the model
# torch.save(model.state_dict(), 'soc-estimator.pth')

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 20/20 [00:00<00:00, 110.11it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss         0.00011690160317812115
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
RMSE: 1.081211355975217, MSE: 0.011690179962897673, MAE: 0.7731814403086901, MAXE: 5.402797698974609


In [10]:
utils.count_parameters(model)

1713

In [11]:
utils.test_model_on_multiple_temps(
    model,
    batch_size=BATCH_SIZE,
    test_files=utils.fetch_data_files(dataset_path, "test"),
)

File: /home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/data/lg/test/02_TEST_LGHG2@0degC_Norm_(05_Inputs).csv
	-MSE%: 0.015, RMSE%: 1.226, MAE%: 0.788, MAXE%: 7.316

File: /home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/data/lg/test/01_TEST_LGHG2@n10degC_Norm_(05_Inputs).csv
	-MSE%: 0.012, RMSE%: 1.081, MAE%: 0.773, MAXE%: 5.403

File: /home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/data/lg/test/04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv
	-MSE%: 0.012, RMSE%: 1.109, MAE%: 0.683, MAXE%: 9.750

File: /home/ahmedamaksoud/Desktop/projects/ITEC-EFADS/data/lg/test/03_TEST_LGHG2@10degC_Norm_(05_Inputs).csv
	-MSE%: 0.022, RMSE%: 1.472, MAE%: 0.947, MAXE%: 7.085

<Overall>
MSE%: 0.015, RMSE%: 1.234, MAE%: 0.796, MAXE%: 9.750


In [12]:
#@TODO Structure the files
#@TODO Create scripts for downloading the dataset
#@TODO Create a guide for running on arduino
#@TODO Dockerize
#@TODO Push to git