In [1]:
import sys
import copy
import pickle
import numpy as np
import pandas as pd

from tqdm import tqdm
from pathlib import Path
from collections import defaultdict
from IPython.display import display

sys.path.append(str(Path.cwd().parent))
from src.data import DataBundle, BatteryData
from src.builders import TRAIN_TEST_SPLITTERS
from src.utils.config import import_config

HOME = Path.cwd().parent

In [2]:
dataset = 'mix_20'
folder = HOME / 'workspaces/baselines/sklearn/discharge_model/' / dataset

In [3]:
%cd $HOME
config_file = next(folder.glob('config*.yaml'))
config = import_config(config_file, ['train_test_split'])['train_test_split']
train_cell_files, test_cell_files = TRAIN_TEST_SPLITTERS.build(config).split()

train_cells = [BatteryData.load(c) for c in tqdm(train_cell_files, desc='Loading train cells')]
test_cells = [BatteryData.load(c) for c in tqdm(test_cell_files, desc='Loading test cells')]

material_map = defaultdict(list)
for indx, cell in enumerate(test_cells):
    material_map[cell.cathode_material].append(indx)

/workspace/battery/nmi_configs


Loading train cells:   0%|          | 0/256 [00:00<?, ?it/s]

Loading train cells:  77%|███████▋  | 197/256 [01:04<00:16,  3.62it/s]

Calculate errors for different materials in `MIX-20` dataset.

In [None]:
def mean_absolute_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def mean_squared_logarithmic_error(y_true, y_pred):
    return np.mean((np.log1p(y_true) - np.log1p(y_pred)) ** 2)

def root_mean_squared_logarithmic_error(y_true, y_pred):
    return np.sqrt(mean_squared_logarithmic_error(y_true, y_pred))

def r_squared(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - ss_res / ss_tot

def adjusted_r_squared(y_true, y_pred, n=None, p=1):
    r2 = r_squared(y_true, y_pred)
    n = n or len(y_true)
    return 1 - ((1 - r2) * (n - 1) / (n - p - 1))

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    return 100 * np.mean(np.abs(y_true - y_pred) / ((np.abs(y_true) + np.abs(y_pred)) / 2))

def median_absolute_error(y_true, y_pred):
    return np.median(np.abs(y_true - y_pred))

def explained_variance_score(y_true, y_pred):
    var_y_true = np.var(y_true)
    return 1 - (np.var(y_true - y_pred) / var_y_true)

def normalized_mean_absolute_error(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred) / (np.max(y_true) - np.min(y_true)) * 100

def normalized_mean_squared_error(y_true, y_pred):
    return mean_squared_error(y_true, y_pred) / (np.max(y_true) - np.min(y_true))

metrics = {
    "Mean Absolute Error": mean_absolute_error,
    # "Mean Squared Error": mean_squared_error,
    "Root Mean Squared Error": root_mean_squared_error,
    # "Mean Squared Logarithmic Error": mean_squared_logarithmic_error,
    # "Root Mean Squared Logarithmic Error": root_mean_squared_logarithmic_error,
    # "R-squared": r_squared,
    # "Adjusted R-squared": adjusted_r_squared,
    "Mean Absolute Percentage Error": mean_absolute_percentage_error,
    "Symmetric Mean Absolute Percentage Error": symmetric_mean_absolute_percentage_error,
    "Median Absolute Error": median_absolute_error,
    # "Explained Variance Score": explained_variance_score,
    "Normalized Mean Absolute Error": normalized_mean_absolute_error,
    "Normalized Mean Squared Error": normalized_mean_squared_error
}

In [None]:
def format_scores(scores):
    scores = copy.deepcopy(scores)
    for name, s in scores.items():
        mean, std = np.mean(s), np.std(s)
        scores[name] = f'{mean:.0f}±{std:.0f}'
    return scores
def calc_scores(folder: str, metrics: dict, indeces: list) -> dict:
    # Load predictions
    predictions = []
    for data in (HOME / folder).glob('predictions*'):
        with open(data, 'rb') as f:
            predictions.append(pickle.load(f))

    # Restore the y_true and y_pred
    targets = []
    for pred  in predictions:
        data: DataBundle = pred['data']
        if data.label_transformation is not None:
            y_true = data.label_transformation.inverse_transform(data.test_data.label).cpu().numpy()
            y_pred = data.label_transformation.inverse_transform(pred['prediction'].cpu()).numpy()
        else:
            y_true = data.test_data.label.cpu().numpy()
            y_pred = pred['prediction'].cpu().numpy()
        targets.append((y_true[indeces], y_pred[indeces]))

    # Get scores
    scores = {}
    for name, func in metrics.items():
        scores[name] = []
        for y_true, y_pred in targets:
            scores[name].append(func(y_true, y_pred))
    
    scores = format_scores(scores)
    return scores


sklearn_baselines = {
    'dummy': 'Training Mean',
    'variance_model': '``Variance\'\' Model',
    'discharge_model': '``Discharge\'\' Model',
    'full_model': '``Full\'\' Model',
    'ridge': 'Ridge Regression',
    'pcr': 'PCR',
    'plsr': 'PLSR',
    'svm': 'SVM',
    'rf': 'Random Forest'
}
nn_baselines = {
    'cnn': 'CNN',
    'mlp': 'MLP',
    'lstm': 'LSTM'
}

scores = defaultdict(dict)
for material, test_indeces in material_map.items():
    # sklearn baselines
    for method, method_name in tqdm(sklearn_baselines.items(), desc='sklearn baselines'):
        folder = HOME / 'workspaces/baselines/sklearn' / method / dataset
        scores[material][method_name] = calc_scores(folder, metrics, test_indeces)

    # nn baselines
    for method, method_name in tqdm(nn_baselines.items(), desc='nn baselines'):
        folder = HOME / 'workspaces/baselines/nn_models' / method / dataset
        scores[material][method_name] = calc_scores(folder, metrics, test_indeces)

    # Ours
    folder = HOME / 'workspaces/ablation/feature_spaces/all_features' / dataset
    scores[material]['BatLiNet'] = calc_scores(folder, metrics, test_indeces)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
sklearn baselines: 100%|██████████| 9/9 [00:00<00:00, 2137.65it/s]


workspaces/baselines/sklearn/dummy/mix_20
workspaces/baselines/sklearn/variance_model/mix_20
workspaces/baselines/sklearn/discharge_model/mix_20
workspaces/baselines/sklearn/full_model/mix_20
workspaces/baselines/sklearn/ridge/mix_20
workspaces/baselines/sklearn/pcr/mix_20
workspaces/baselines/sklearn/plsr/mix_20
workspaces/baselines/sklearn/svm/mix_20
workspaces/baselines/sklearn/rf/mix_20


nn baselines: 100%|██████████| 3/3 [00:00<00:00, 1732.23it/s]
sklearn baselines: 100%|██████████| 9/9 [00:00<00:00, 2438.86it/s]


workspaces/baselines/sklearn/dummy/mix_20
workspaces/baselines/sklearn/variance_model/mix_20
workspaces/baselines/sklearn/discharge_model/mix_20
workspaces/baselines/sklearn/full_model/mix_20
workspaces/baselines/sklearn/ridge/mix_20
workspaces/baselines/sklearn/pcr/mix_20
workspaces/baselines/sklearn/plsr/mix_20
workspaces/baselines/sklearn/svm/mix_20
workspaces/baselines/sklearn/rf/mix_20


nn baselines: 100%|██████████| 3/3 [00:00<00:00, 2472.57it/s]
sklearn baselines: 100%|██████████| 9/9 [00:00<00:00, 2687.51it/s]


workspaces/baselines/sklearn/dummy/mix_20
workspaces/baselines/sklearn/variance_model/mix_20
workspaces/baselines/sklearn/discharge_model/mix_20
workspaces/baselines/sklearn/full_model/mix_20
workspaces/baselines/sklearn/ridge/mix_20
workspaces/baselines/sklearn/pcr/mix_20
workspaces/baselines/sklearn/plsr/mix_20
workspaces/baselines/sklearn/svm/mix_20
workspaces/baselines/sklearn/rf/mix_20


nn baselines: 100%|██████████| 3/3 [00:00<00:00, 2415.15it/s]
sklearn baselines: 100%|██████████| 9/9 [00:00<00:00, 2621.44it/s]


workspaces/baselines/sklearn/dummy/mix_20
workspaces/baselines/sklearn/variance_model/mix_20
workspaces/baselines/sklearn/discharge_model/mix_20
workspaces/baselines/sklearn/full_model/mix_20
workspaces/baselines/sklearn/ridge/mix_20
workspaces/baselines/sklearn/pcr/mix_20
workspaces/baselines/sklearn/plsr/mix_20
workspaces/baselines/sklearn/svm/mix_20
workspaces/baselines/sklearn/rf/mix_20


nn baselines: 100%|██████████| 3/3 [00:00<00:00, 2660.24it/s]


In [21]:
dataset

'mix_20'

In [17]:
# Function to extract the mean from the string
def extract_mean(value):
    try:
        return float(value.split('±')[0])
    except:
        return np.inf

# Function to highlight the minimum mean value in each row
def highlight_min(s):
    # Extract means for the row
    means = s.apply(extract_mean)
    # Get the index of the minimum mean
    min_index = means.idxmin()
    # Create a series to apply the highlighting
    is_min = pd.Series([''] * len(s), index=s.index)
    is_min[min_index] = 'background-color: black'
    return is_min

for material, data_scores in scores.items():
    print(material)
    # TODO: fill in NE results
    data_scores = pd.DataFrame(data_scores)
    display(data_scores.style.apply(highlight_min, axis=1))


LCO


  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f944ff20a60>>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


ValueError: Function <function highlight_min at 0x7f84532de050> created invalid columns labels.
Usually, this is the result of the function returning a Series which contains invalid labels, or returning an incorrectly shaped, list-like object which cannot be mapped to labels, possibly due to applying the function along the wrong axis.
Result columns has shape: (14,)
Expected columns shape:   (13,)

<pandas.io.formats.style.Styler at 0x7f92b3f4fee0>

NMC


  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()


ValueError: Function <function highlight_min at 0x7f84532de050> created invalid columns labels.
Usually, this is the result of the function returning a Series which contains invalid labels, or returning an incorrectly shaped, list-like object which cannot be mapped to labels, possibly due to applying the function along the wrong axis.
Result columns has shape: (14,)
Expected columns shape:   (13,)

<pandas.io.formats.style.Styler at 0x7f83e3032e30>

LFP


  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()


ValueError: Function <function highlight_min at 0x7f84532de050> created invalid columns labels.
Usually, this is the result of the function returning a Series which contains invalid labels, or returning an incorrectly shaped, list-like object which cannot be mapped to labels, possibly due to applying the function along the wrong axis.
Result columns has shape: (14,)
Expected columns shape:   (13,)

<pandas.io.formats.style.Styler at 0x7f83c025bdc0>

NCA


  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()
  min_index = means.idxmin()


ValueError: Function <function highlight_min at 0x7f84532de050> created invalid columns labels.
Usually, this is the result of the function returning a Series which contains invalid labels, or returning an incorrectly shaped, list-like object which cannot be mapped to labels, possibly due to applying the function along the wrong axis.
Result columns has shape: (14,)
Expected columns shape:   (13,)

<pandas.io.formats.style.Styler at 0x7f838444fbb0>