# Init

In [None]:
import sys
import os

# Add Codes/src to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../src')))

from ingestion_utils import nasa_process_all_files_parallel,isu_process_all_files_parallel, oxford_process_all_files_parallel, compute_gradient_features_parallel, interpolate_qv_parallel, v_range_config

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def plot_raw_signals(df_interp, source, cell_id, cycle_index):
    row = df_interp[
        (df_interp["source"] == source)
        & (df_interp["cell_id"] == cell_id)
        & (df_interp["cycle_index"] == cycle_index)
    ]

    if row.empty:
        print("❌ No matching entry found.")
        return

    row = row.iloc[0]

    V = np.array(row["voltage"])
    Q = np.array(row["charge"])
    I = np.array(row["current"])
    T = np.array(row["timestamp"])

    # Sort for raw Q–V display
    qv_order = np.argsort(V)
    V_q_sorted = V[qv_order]
    Q_sorted = Q[qv_order]
    I_sorted = I[qv_order]

    # Extract interpolated Q and I
    q_interp = np.array([row[f"q_interp_{i}"] for i in range(len(row)) if f"q_interp_{i}" in row])
    i_interp = np.array([row[f"i_interp_{i}"] for i in range(len(row)) if f"i_interp_{i}" in row])

    # Voltage grid based on v_range_config
    vmin, vmax = v_range_config[source]
    V_interp = np.linspace(vmin, vmax, len(q_interp))

    fig, axes = plt.subplots(2, 2, figsize=(12, 8))
    fig.suptitle(f"Raw Signals - Source: {source}, Cell: {cell_id}, Cycle: {cycle_index}", fontsize=14)

    # Q vs V
    axes[0, 0].plot(V_q_sorted, Q_sorted, label="Raw Q–V", linewidth=2)
    axes[0, 0].plot(V_interp, q_interp, label="Interpolated Q–V", linestyle="--", alpha=0.8)
    axes[0, 0].set_xlabel("Voltage (V)")
    axes[0, 0].set_ylabel("Charge (Ah)")
    axes[0, 0].set_title("Q vs V")
    axes[0, 0].legend()

    # I vs V
    axes[0, 1].plot(V_q_sorted, I_sorted, label="Raw I–V")
    axes[0, 1].plot(V_interp, i_interp, label="Interpolated I–V", linestyle="--", alpha=0.8)
    axes[0, 1].set_xlabel("Voltage (V)")
    axes[0, 1].set_ylabel("Current (A)")
    axes[0, 1].set_title("I vs V")
    axes[0, 1].legend()

    # Q vs Time
    axes[1, 0].plot(T, Q)
    axes[1, 0].set_xlabel("Time (s)")
    axes[1, 0].set_ylabel("Charge (Ah)")
    axes[1, 0].set_title("Q vs Time")

    # I vs Time
    axes[1, 1].plot(T, I)
    axes[1, 1].set_xlabel("Time (s)")
    axes[1, 1].set_ylabel("Current (A)")
    axes[1, 1].set_title("I vs Time")

    for ax in axes.flatten():
        ax.grid(True)

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

# Ingest

In [None]:
df_nasa_raw = nasa_process_all_files_parallel(
    base_folder="../../../Datasets/5.+Battery+Data+Set/5. Battery Data Set",
    rated_capacity=2.0,
    max_workers=10
)

df_nasa_raw.to_pickle("../../../Datasets/nasa_raw.pkl")
df_nasa_raw.head()

In [None]:
df_isu_raw = isu_process_all_files_parallel(
    base_path="../../../Datasets/RPT_json/",
    valid_cells_path="Valid_cells.csv",
    rated_capacity=0.25,
    max_workers=10
)

df_isu_raw.to_pickle("../../../Datasets/isu_raw.pkl")
df_isu_raw.head()


In [None]:
df_oxford_raw = oxford_process_all_files_parallel(
    base_folder="../../../Datasets/Oxford Battery Degradation Dataset 1",
    max_workers=10
)

df_oxford_raw.to_pickle("../../../Datasets/oxford_raw.pkl")
df_oxford_raw.head()


# Load raw pickles and merge

In [None]:
# === Load pickled raw datasets ===
df_nasa = pd.read_pickle("../../../Datasets/nasa_raw.pkl")
df_oxford = pd.read_pickle("../../../Datasets/oxford_raw.pkl")
df_isu = pd.read_pickle("../../../Datasets/isu_raw.pkl")

# === Merge all into one ===
df_merged = pd.concat([df_nasa, df_oxford, df_isu], ignore_index=True)

# === Save merged version ===
df_merged.to_pickle("../../../Datasets/raw_merged_all.pkl")

# Load merged pickle

In [None]:
df_merged = pd.read_pickle("../../../Datasets/raw_merged_all.pkl")

In [None]:
print(df_merged[df_merged["source"] == "nasa"]["cell_id"].unique())
print(df_merged[df_merged["source"] == "isu"]["cell_id"].unique())
print(df_merged[df_merged["source"] == "oxford"]["cell_id"].unique())

In [None]:
print(df_merged[(df_merged["source"] == "nasa") & (df_merged["cell_id"] == "B0046")]["cycle_index"].unique())

In [None]:
df_merged_with_grad = compute_gradient_features_parallel(df_merged, max_workers=10)

In [None]:
df_grad_interp = interpolate_qv_parallel(df_merged_with_grad, max_workers=10)

In [None]:
df_grad_interp.to_pickle("../../../Datasets/grad_interp_merged_all.pkl")

In [None]:
df_grad_interp["source"].unique()

In [None]:
df_grad_interp.info()

In [None]:
plot_raw_signals(df_grad_interp, source="nasa", cell_id="B0005", cycle_index=3)

In [None]:
plot_raw_signals(df_grad_interp, source="isu", cell_id="G1C1", cycle_index=1)

In [None]:
plot_raw_signals(df_grad_interp, source="oxford", cell_id="Cell2", cycle_index=200)

# Pack

In [6]:
df_isu_raw = pd.read_pickle("../../../Datasets/isu_raw.pkl")
df_isu_raw["group"] = df_isu_raw['cell_id'].str.extract(r'G(\d+)C\d+')[0].astype(int)
df_isu_raw["cell"] = df_isu_raw['cell_id'].str.extract(r'G\d+C(\d+)')[0].astype(int)
df_isu_raw.head()

Unnamed: 0,source,cell_id,cycle_index,timestamp,voltage,current,charge,temperature,energy,capacity,rated_capacity,soh,group,cell
0,isu,G3C2,0,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[4.1858, 4.1843, 4.183, 4.1824, 4.1818, 4.1812...","[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0...","[0.0, 8.333333333333334e-06, 2.222222222222222...","[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 3...","[0.0, 3.486111111111111e-05, 9.297222222222222...",0.279864,0.25,1.119455,3,2
1,isu,G3C2,1,"[0.0, 3.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, ...","[4.1877, 4.1827, 4.1821, 4.1803, 4.1784, 4.176...","[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0...","[0.0, 5.277777777777778e-05, 6.944444444444444...","[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 3...","[0.0, 0.00022083333333333333, 0.00029052777777...",0.275718,0.25,1.102872,3,2
2,isu,G3C2,2,"[0.0, 3.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, ...","[4.188, 4.183, 4.1818, 4.1796, 4.1781, 4.1759,...","[0.0499, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0...","[0.0, 4.025e-05, 6.941666666666667e-05, 0.0001...","[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 3...","[0.0, 0.0001685, 0.00029049999999999996, 0.000...",0.271475,0.25,1.085899,3,2
3,isu,G3C2,3,"[0.0, 3.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, ...","[4.1874, 4.1824, 4.1815, 4.179, 4.1772, 4.1753...","[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0...","[0.0, 4.4444444444444447e-05, 6.94444444444444...","[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 3...","[0.0, 0.00018594444444444444, 0.00029049999999...",0.267508,0.25,1.070033,3,2
4,isu,G3C2,4,"[0.0, 2.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, ...","[4.1877, 4.1827, 4.1809, 4.1781, 4.1759, 4.174...","[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0...","[0.0, 3.0555555555555554e-05, 6.94444444444444...","[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 3...","[0.0, 0.00012786111111111112, 0.00029047222222...",0.264197,0.25,1.056787,3,2


In [7]:
cell_count_per_group = df_isu_raw.groupby('group')['cell'].nunique().sort_index()
cell_count_per_group

group
1     4
2     4
3     4
4     4
5     4
     ..
60    4
61    3
62    4
63    4
64    4
Name: cell, Length: 63, dtype: int64

In [None]:
df_isu_grad = compute_gradient_features_parallel(df_isu_raw, max_workers=10)

In [None]:
df_isu_grad_interp = interpolate_qv_parallel(df_isu_grad, max_workers=10)
df_isu_grad_interp.head()

In [None]:
df_isu_grad_interp.to_pickle("../../../Datasets/grad_interp_isu.pkl")

# Pack from pickle

In [1]:
import pandas as pd

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../src')))
from pack_synth import synthesize_pack_dataset_advanced

In [2]:
df_isu_grad_interp = pd.read_pickle("../../../Datasets/grad_interp_isu.pkl")

In [3]:
df_isu_grad_interp.columns.tolist()

['source',
 'cell_id',
 'cycle_index',
 'timestamp',
 'voltage',
 'current',
 'charge',
 'temperature',
 'energy',
 'capacity',
 'rated_capacity',
 'soh',
 'group',
 'cell',
 'dqdt_min',
 'dqdt_max',
 'dqdt_mean',
 'dqdt_std',
 'dvdt_min',
 'dvdt_max',
 'dvdt_mean',
 'dvdt_std',
 'didt_min',
 'didt_max',
 'q_interp_0',
 'i_interp_0',
 'q_interp_1',
 'i_interp_1',
 'q_interp_2',
 'i_interp_2',
 'q_interp_3',
 'i_interp_3',
 'q_interp_4',
 'i_interp_4',
 'q_interp_5',
 'i_interp_5',
 'q_interp_6',
 'i_interp_6',
 'q_interp_7',
 'i_interp_7',
 'q_interp_8',
 'i_interp_8',
 'q_interp_9',
 'i_interp_9',
 'q_interp_10',
 'i_interp_10',
 'q_interp_11',
 'i_interp_11',
 'q_interp_12',
 'i_interp_12',
 'q_interp_13',
 'i_interp_13',
 'q_interp_14',
 'i_interp_14',
 'q_interp_15',
 'i_interp_15',
 'q_interp_16',
 'i_interp_16',
 'q_interp_17',
 'i_interp_17',
 'q_interp_18',
 'i_interp_18',
 'q_interp_19',
 'i_interp_19',
 'q_interp_20',
 'i_interp_20',
 'q_interp_21',
 'i_interp_21',
 'q_interp

In [4]:
pack_df = synthesize_pack_dataset_advanced(df_isu_grad_interp)

100%|██████████| 1709/1709 [00:37<00:00, 45.26it/s]


In [5]:
pack_df.to_pickle("../../../Datasets/pack_df_isu.pkl")