In [1]:
import sys
import copy
import pickle
import numpy as np
import pandas as pd

from tqdm import tqdm
from pathlib import Path
from collections import defaultdict
from IPython.display import display

sys.path.append(str(Path.cwd().parent))
from src.data import DataBundle, BatteryData
from src.builders import TRAIN_TEST_SPLITTERS
from src.utils.config import import_config

HOME = Path.cwd().parent

In [2]:
dataset = 'mix_20'
folder = HOME / 'workspaces/baselines/sklearn/discharge_model/' / dataset

In [3]:
%cd $HOME
config_file = next(folder.glob('config*.yaml'))
config = import_config(config_file, ['train_test_split'])['train_test_split']
train_cell_files, test_cell_files = TRAIN_TEST_SPLITTERS.build(config).split()

train_cells = [BatteryData.load(c) for c in tqdm(train_cell_files, desc='Loading train cells')]
test_cells = [BatteryData.load(c) for c in tqdm(test_cell_files, desc='Loading test cells')]

material_map = defaultdict(list)
for indx, cell in enumerate(test_cells):
    material_map[cell.cathode_material].append(indx)

/workspace/battery/nmi_configs


Loading train cells:   0%|          | 0/256 [00:00<?, ?it/s]

Loading train cells: 100%|██████████| 256/256 [01:26<00:00,  2.96it/s]
Loading test cells: 100%|██████████| 147/147 [01:02<00:00,  2.35it/s]


Calculate errors for different materials in `MIX-20` dataset.

In [4]:
def mean_absolute_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def mean_squared_logarithmic_error(y_true, y_pred):
    return np.mean((np.log1p(y_true) - np.log1p(y_pred)) ** 2)

def root_mean_squared_logarithmic_error(y_true, y_pred):
    return np.sqrt(mean_squared_logarithmic_error(y_true, y_pred))

def r_squared(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - ss_res / ss_tot

def adjusted_r_squared(y_true, y_pred, n=None, p=1):
    r2 = r_squared(y_true, y_pred)
    n = n or len(y_true)
    return 1 - ((1 - r2) * (n - 1) / (n - p - 1))

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    return 100 * np.mean(np.abs(y_true - y_pred) / ((np.abs(y_true) + np.abs(y_pred)) / 2))

def median_absolute_error(y_true, y_pred):
    return np.median(np.abs(y_true - y_pred))

def explained_variance_score(y_true, y_pred):
    var_y_true = np.var(y_true)
    return 1 - (np.var(y_true - y_pred) / var_y_true)

def normalized_mean_absolute_error(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred) / (np.max(y_true) - np.min(y_true)) * 100

def normalized_mean_squared_error(y_true, y_pred):
    return mean_squared_error(y_true, y_pred) / (np.max(y_true) - np.min(y_true))

def normalized_root_mean_squared_error(y_true, y_pred):
    return root_mean_squared_error(y_true, y_pred) / (np.max(y_true) - np.min(y_true)) * 100

metrics = {
    "Root Mean Squared Error": root_mean_squared_error,
    "Mean Absolute Error": mean_absolute_error,
    "Median Absolute Error": median_absolute_error,
    "Mean Absolute Percentage Error (%)": mean_absolute_percentage_error,
    "Symmetric Mean Absolute Percentage Error (%)": symmetric_mean_absolute_percentage_error,
    "Normalized Mean Absolute Error (%)": normalized_mean_absolute_error,
    "Normalized Root Mean Squared Error (%)": normalized_root_mean_squared_error
}

In [5]:
def format_scores(scores):
    scores = copy.deepcopy(scores)
    for name, s in scores.items():
        mean, std = np.mean(s), np.std(s)
        scores[name] = f'{mean:.0f}±{std:.0f}'
    return scores
def calc_scores(folder: str, metrics: dict, indeces: list) -> dict:
    # Load predictions
    predictions = []
    for data in (HOME / folder).glob('predictions*'):
        with open(data, 'rb') as f:
            predictions.append(pickle.load(f))

    # Restore the y_true and y_pred
    targets = []
    for pred  in predictions:
        data: DataBundle = pred['data']
        if data.label_transformation is not None:
            y_true = data.label_transformation.inverse_transform(data.test_data.label).cpu().numpy()
            y_pred = data.label_transformation.inverse_transform(pred['prediction'].cpu()).numpy()
        else:
            y_true = data.test_data.label.cpu().numpy()
            y_pred = pred['prediction'].cpu().numpy()
        targets.append((y_true[indeces], y_pred[indeces]))

    # Get scores
    scores = {}
    for name, func in metrics.items():
        scores[name] = []
        for y_true, y_pred in targets:
            scores[name].append(func(y_true, y_pred))
    
    scores = format_scores(scores)
    return scores


sklearn_baselines = {
    'dummy': 'Training Mean',
    'variance_model': '``Variance\'\' Model',
    'discharge_model': '``Discharge\'\' Model',
    'full_model': '``Full\'\' Model',
    'ridge': 'Ridge Regression',
    'pcr': 'PCR',
    'plsr': 'PLSR',
    'svm': 'SVM',
    'rf': 'Random Forest'
}
nn_baselines = {
    'cnn': 'CNN',
    'mlp': 'MLP',
    'lstm': 'LSTM'
}

scores = defaultdict(dict)
for material, test_indeces in material_map.items():
    # sklearn baselines
    for method, method_name in tqdm(sklearn_baselines.items(), desc='sklearn baselines'):
        folder = HOME / 'workspaces/baselines/sklearn' / method / dataset
        scores[material][method_name] = calc_scores(folder, metrics, test_indeces)

    # nn baselines
    for method, method_name in tqdm(nn_baselines.items(), desc='nn baselines'):
        folder = HOME / 'workspaces/baselines/nn_models' / method / dataset
        scores[material][method_name] = calc_scores(folder, metrics, test_indeces)

    # Ours
    folder = HOME / 'workspaces/ablation/feature_spaces/all_features' / dataset
    scores[material]['BatLiNet'] = calc_scores(folder, metrics, test_indeces)

sklearn baselines: 100%|██████████| 9/9 [00:00<00:00, 30.58it/s]
nn baselines: 100%|██████████| 3/3 [00:01<00:00,  2.83it/s]
sklearn baselines: 100%|██████████| 9/9 [00:00<00:00, 480.96it/s]
nn baselines: 100%|██████████| 3/3 [00:00<00:00, 13.61it/s]
sklearn baselines: 100%|██████████| 9/9 [00:00<00:00, 474.99it/s]
nn baselines: 100%|██████████| 3/3 [00:00<00:00, 12.82it/s]
sklearn baselines: 100%|██████████| 9/9 [00:00<00:00, 419.58it/s]
nn baselines: 100%|██████████| 3/3 [00:00<00:00, 12.60it/s]


In [7]:
# Function to extract the mean from the string
def extract_mean(value):
    try:
        return float(value.split('±')[0])
    except:
        return np.inf

# Function to highlight the minimum mean value in each row
def highlight_min(s):
    # Extract means for the row
    means = s.apply(extract_mean)
    # Get the index of the minimum mean
    min_index = means.idxmin()
    # Create a series to apply the highlighting
    is_min = pd.Series([''] * len(s), index=s.index)
    is_min[min_index] = 'background-color: black'
    return is_min

metric_abbr = [
    'RMSE', 'MAE', 'MAD',
    'MAPE', 'sMAPE', 'NMAE', 'NRMSE'
]
for material, data_scores in scores.items():
    print(material)
    # TODO: fill in NE results
    data_scores = pd.DataFrame(data_scores)
    # data_scores.to_excel(f'{material}.xlsx')
    data_scores.index = metric_abbr
    data_scores.to_latex(f'{material}.tex')
    display(data_scores.style.apply(highlight_min, axis=1))


LCO


Unnamed: 0,Training Mean,``Variance'' Model,``Discharge'' Model,``Full'' Model,Ridge Regression,PCR,PLSR,SVM,Random Forest,CNN,MLP,LSTM,BatLiNet
RMSE,392±0,269±0,304±0,329±0,890±0,575±0,144±0,105±0,146±0,93±30,386±95,75±35,47±8
MAE,388±0,263±0,246±0,310±0,744±0,540±0,135±0,80±0,84±0,74±17,231±50,55±22,36±6
MAD,399±0,242±0,261±0,344±0,645±0,498±0,145±0,59±0,43±0,61±13,70±20,38±11,25±6
MAPE,213±0,141±0,131±0,164±0,379±0,197±0,73±0,41±0,34±0,39±9,138±30,28±12,18±3
sMAPE,100±0,81±0,67±0,86±0,131±0,82±0,71±0,36±0,26±0,35±4,52±5,23±7,17±2
NMAE,201±0,136±0,128±0,161±0,386±0,80±0,70±0,41±0,44±0,38±9,120±26,29±11,19±3
NRMSE,203±0,140±0,158±0,170±0,461±0,86±0,75±0,54±0,75±0,48±16,200±49,39±18,25±4


NMC


Unnamed: 0,Training Mean,``Variance'' Model,``Discharge'' Model,``Full'' Model,Ridge Regression,PCR,PLSR,SVM,Random Forest,CNN,MLP,LSTM,BatLiNet
RMSE,451±0,439±0,409±0,105±0,416±0,372±0,285±0,120±0,90±0,41±4,119±42,131±112,23±3
MAE,450±0,435±0,406±0,102±0,390±0,367±0,279±0,113±0,55±0,30±2,89±33,57±26,17±2
MAD,456±0,445±0,408±0,106±0,435±0,359±0,285±0,117±0,26±0,25±3,57±31,24±4,14±2
MAPE,336±0,322±0,303±0,78±0,293±0,57±0,211±0,87±0,43±0,23±2,68±26,46±25,12±1
sMAPE,124±0,122±0,119±0,55±0,113±0,44±0,100±0,58±0,29±0,20±1,43±12,26±3,12±2
NMAE,433±0,419±0,390±0,98±0,375±0,153±0,269±0,108±0,53±0,29±2,85±32,55±25,17±2
NRMSE,433±0,422±0,394±0,101±0,400±0,155±0,274±0,116±0,86±0,39±4,115±41,126±108,23±3


LFP


Unnamed: 0,Training Mean,``Variance'' Model,``Discharge'' Model,``Full'' Model,Ridge Regression,PCR,PLSR,SVM,Random Forest,CNN,MLP,LSTM,BatLiNet
RMSE,640±0,656±0,1137735±0,491±0,760±0,778±0,537±0,523±0,300±0,7080±12299,569±31,396±55,229±14
MAE,473±0,475±0,108421±0,355±0,512±0,583±0,416±0,380±0,206±0,1061±1164,377±29,262±29,158±10
MAD,282±0,271±0,262±0,258±0,354±0,479±0,310±0,258±0,138±0,187±14,211±31,154±16,101±9
MAPE,44±0,45±0,7807±0,41±0,60±0,50±0,47±0,41±0,24±0,92±92,42±3,27±2,17±1
sMAPE,52±0,52±0,50±0,38±0,55±0,47±0,47±0,44±0,23±0,35±2,43±3,29±3,16±1
NMAE,24±0,24±0,5557±0,18±0,26±0,18±0,21±0,19±0,11±0,54±60,19±1,13±1,8±0
NRMSE,33±0,34±0,58315±0,25±0,39±0,23±0,28±0,27±0,15±0,363±630,29±2,20±3,12±1


NCA


Unnamed: 0,Training Mean,``Variance'' Model,``Discharge'' Model,``Full'' Model,Ridge Regression,PCR,PLSR,SVM,Random Forest,CNN,MLP,LSTM,BatLiNet
RMSE,291±0,211±0,149±0,205±0,2387±0,333±0,179±0,300±0,633±0,135±43,632±45,715±215,211±219
MAE,228±0,167±0,115±0,181±0,1707±0,307±0,163±0,254±0,490±0,120±43,443±42,442±162,141±112
MAD,166±0,165±0,91±0,157±0,475±0,303±0,112±0,276±0,395±0,112±43,176±105,170±170,86±36
MAPE,138±0,100±0,28±0,60±0,1132±0,73±0,65±0,138±0,116±0,41±18,93±8,98±34,73±94
sMAPE,58±0,49±0,31±0,100±0,109±0,47±0,73±0,66±0,66±0,40±13,54±4,55±13,37±11
NMAE,47±0,35±0,24±0,38±0,354±0,56±0,34±0,53±0,102±0,25±9,92±9,92±34,29±23
NRMSE,60±0,44±0,31±0,43±0,495±0,60±0,37±0,62±0,131±0,28±9,131±9,148±45,44±45
