In [None]:
import pandas as pd
import datetime
from pathlib import Path
from scipy.io import loadmat
from typing import Literal



In [None]:
def load_data(path: Path | str, target_operation: Literal["charge", "discharge"]) -> pd.DataFrame:
    path = Path(path)
    mat = loadmat(path)  # load mat-file
    data = mat[path.stem]["cycle"][0, 0]

    parsed_data = []
    for cycle in data:
        for operation_id, operation in enumerate(cycle):
            operation_type = operation["type"][0]
            if operation_type != target_operation:
                continue

            for fields in operation["data"]:
                if operation_type == "charge":
                    operation_data = pd.DataFrame({
                        k: fields[k][0][0]
                        for k in fields.dtype.fields
                    })
                elif operation_type == "discharge":
                    operation_data = pd.DataFrame({
                        k: fields[k][0][0]
                        for k in fields.dtype.fields
                        if k != "Capacity"
                    })
                    operation_data["Capacity"] = fields["Capacity"][0][0, 0]      

            operation_data["operation_id"] = operation_id
            operation_data["temperature"] = operation["ambient_temperature"][0, 0]
            operation_data["type"] = operation_type
            operation_data["start_time"] = datetime.datetime(*(int(t) for t in operation["time"][0]))

            parsed_data.append(operation_data)
    
    return pd.concat(parsed_data, ignore_index=True)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme()

In [None]:
paths = sorted(Path("5. Battery Data Set/1. BatteryAgingARC-FY08Q4").glob("*.mat"))

for p in paths:
    data = load_data(p, "discharge")

    sns.scatterplot(data=data, x="start_time", y="Capacity")
    plt.title(Path(p).stem)
    plt.show()

In [None]:
for p in paths:
    data = load_data(p, "discharge")
    for feature in data.columns[:5]:
        sns.scatterplot(data=data, x="Time", y=feature, hue="Capacity", alpha=0.2)
        plt.title(f"{Path(p).stem}: {feature}")
        plt.show()



In [None]:
data = pd.concat({Path(p).stem: load_data(p, "discharge") for p in paths})
data.index.names = ["file", "index"]

In [None]:
grouped_data = data.groupby(["file", "operation_id"]).agg({
    "Time": "max",
    "Capacity": "first",
})

In [None]:
sns.scatterplot(data=grouped_data, x="Time", y="Capacity", hue="file")
plt.legend([])
plt.show()

In [None]:
import scipy.integrate

In [None]:
cycle_statistics = []
for group, group_data in data.groupby(["file", "operation_id"]):
    power = group_data["Voltage_measured"] * group_data["Current_measured"]
    total_power = scipy.integrate.trapezoid(power, group_data["Time"])
    total_current = scipy.integrate.trapezoid(group_data["Current_measured"], group_data["Time"])
    cycle_statistics.append({
        "file": group[0],
        "operation_id": group[1],
        "total_power": total_power,
        "total_current": total_current,
        "capacity": group_data["Capacity"].iloc[0],
    })
cycle_statistics = pd.DataFrame(cycle_statistics)

In [None]:
import scipy.stats

In [None]:
sns.scatterplot(data=cycle_statistics, x="total_power", y="capacity", hue="file")
plt.legend([])
plt.show()
scipy.stats.linregress(cycle_statistics["total_power"], cycle_statistics["capacity"])

In [None]:
sns.scatterplot(data=cycle_statistics, x="total_current", y="capacity", hue="file")
plt.legend([])
plt.show()
scipy.stats.linregress(cycle_statistics["total_current"], cycle_statistics["capacity"])