In [None]:
# ===============================
# 1. LOAD DEL FILE .MAT
# ===============================
ox = scipy.io.loadmat('datasets/battery.mat', simplify_cells=True)

# pulizia metadata
for k in ["__header__", "__version__", "__globals__"]:
    ox.pop(k, None)

# ora ox = {"Cell1": {...}, "Cell2": {...}, ...}


# ===============================
# 2. ESTRAGGO LA CAPACITÀ DI OGNI CICLO (C1ch)
# ===============================
capacities = {}        # capacities["CellX"] = [cap_cycle0, cap_cycle1, ...]
for cell_name, cell_data in ox.items():
    caps = []
    for cycle_name, cycle_data in cell_data.items():

        # Prendo la capacità a fine carica C1ch (in mAh)
        q = cycle_data['C1ch']['q']
        cap = q[-1]                     # ultima entry è la capacità del ciclo
        caps.append(cap)

    capacities[cell_name] = caps


# ===============================
# 3. COMPUTE SOH PER OGNI CICLO
# ===============================
soh = {}   # soh["CellX"] = [SoH_cycle0, SoH_cycle1, ...]
for cell, caps in capacities.items():
    soh[cell] = [c / caps[0] for c in caps]   # SoH = q_k / q_0


# ===============================
# 4. COSTRUZIONE DEL DATAFRAME COMPLETO
# ===============================
# ogni riga contiene:
# - nome batteria & ciclo
# - serie temporale di voltaggio (v)
# - serie temporale di temperatura (T)
# - la label SoH


records = {}

for cell_name, cell_data in ox.items():

    cycle_idx = 0
    for cycle_name, cycle_data in cell_data.items():

        v_series = pd.Series(cycle_data['C1ch']['v'])
        T_series = pd.Series(cycle_data['C1ch']['T'])

        key = f"{cycle_name}_{cell_name}"

        records[key] = {
            "cell": cell_name,
            "cycle": cycle_name,
            "voltage": v_series,
            "temperature": T_series,
            "capacity": capacities[cell_name][cycle_idx],
            "SoH": soh[cell_name][cycle_idx]
        }

        cycle_idx += 1


df = pd.DataFrame.from_dict(records, orient="index")
print(df.head())
print("\nNUMERO CAMPIONI TOTALI =", len(df))

In [None]:
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error
import matplotlib.pyplot as plt
from sktime.transformations.panel.padder import PaddingTransformer
from sktime.classification.feature_based import RandomIntervalClassifier
from sktime.performance_metrics.forecasting import mean_absolute_error,mean_absolute_percentage_error

# ===========================
# TRAIN/TEST SPLIT
# ===========================

np.random.seed(2)
msk = np.random.rand(len(df)) < 0.85

train_df = df[msk]
test_df  = df[~msk]

# y = SoH come percentuale intera (0-100)
y_train = (train_df['SoH'] * 100).astype(int)
y_test  = (test_df['SoH'] * 100).astype(int)

# X = solo colonne time-series
x_train = train_df[['voltage', 'temperature']].copy()
x_test  = test_df[['voltage', 'temperature']].copy()

print("Train shape:", x_train.shape, y_train.shape)
print("Test shape:",  x_test.shape,  y_test.shape)

# ===========================
# CLASSIFICATORE
# ===========================

padded_clf = PaddingTransformer() * RandomIntervalClassifier(
    n_intervals=5, 
    random_state=1
)

padded_clf.fit(x_train, y_train)

# predict
y_pred = padded_clf.predict(x_test)

# normalize for metrics
y_test_norm = y_test / 100
y_pred_norm = y_pred / 100

print("MAE% =", mean_absolute_percentage_error(y_test_norm, y_pred_norm))
print("MAE = ", mean_absolute_error(y_test_norm, y_pred_norm))
print("RMSE =", np.sqrt(np.mean((y_test_norm - y_pred_norm)**2)))

# ===========================
# PLOT: real vs pred
# ===========================

plt.figure(figsize=(10,4))
plt.title("SoH real vs pred")
plt.plot(y_test.values, label="real")
plt.plot(y_pred, label="pred")
plt.legend()
plt.show()
