In [57]:
import os
import numpy as np
import pandas as pd
from scipy.io import loadmat

def process_mat_file_to_dataframe(path):
    """
    Processes the .mat file containing battery data and converts it into a pandas DataFrame.

    Args:
        path (str): Path to the .mat file.
        dataset_name (str): Name of the dataset key (e.g., 'B0005', 'B0006').

    Returns:
        pd.DataFrame: A DataFrame containing the processed battery data.
    """

    dataset_name = os.path.splitext(os.path.basename(path))[0]
    
    mat_data = loadmat(path)
    
    if dataset_name not in mat_data:
        raise ValueError(f"Dataset name '{dataset_name}' not found in the file. Available keys: {list(mat_data.keys())}")
    
    battery_data = mat_data[dataset_name]
    data_rows = []
    
    for cycle_idx, cycle_data in enumerate(battery_data[0][0]['cycle'][0]):
        cycle = cycle_idx + 1
        cycle_type = cycle_data[0][0] 
        ambient_temperature = cycle_data[1][0][0]  
        time_metadata = cycle_data[2][0] 
        
        voltage_measured = cycle_data[3][0][0][0] 
        current_measured = cycle_data[3][0][0][1]  
        temperature_measured = cycle_data[3][0][0][2]
        current_charge = cycle_data[3][0][0][3]  
        voltage_charge = cycle_data[3][0][0][4]  
        time_data = cycle_data[3][0][0][5]
        capacity = cycle_data[3][0][0][6] if len(cycle_data[3][0][0]) > 6 else None  
        
        num_entries = len(cycle_data[3][0][0][5][0])
        
        for idx in range(num_entries):
            data_rows.append({
                'Batt_name': dataset_name,
                'cycle': cycle,
                'type': cycle_type,
                'ambient_temperature': ambient_temperature,
                'time': time_metadata,
                'Voltage_measured': voltage_measured[0][idx].real.astype(float),
                'Current_measured': np.abs(current_measured[0][idx]),
                'Temperature_measured': temperature_measured[0][idx].real.astype(float),
                'Current_charge': np.abs(current_charge[0][idx]),
                'Voltage_charge': voltage_charge[0][idx].real.astype(float),
                'Time': time_data[0][idx],
                'Capacity': capacity if capacity else None
            })

    df = pd.DataFrame(data_rows)
    return df


In [58]:
path = "/home/workspace/data/B0005.mat"

try:
    df = process_mat_file_to_dataframe(path)
    print(df.head())
except ValueError as e:
    print(e)

  Batt_name  cycle    type  ambient_temperature  \
0     B0005      1  charge                   24   
1     B0005      1  charge                   24   
2     B0005      1  charge                   24   
3     B0005      1  charge                   24   
4     B0005      1  charge                   24   

                                    time  Voltage_measured  Current_measured  \
0  [2008.0, 4.0, 2.0, 13.0, 8.0, 17.921]          3.873017          0.001201   
1  [2008.0, 4.0, 2.0, 13.0, 8.0, 17.921]          3.479394          4.030268   
2  [2008.0, 4.0, 2.0, 13.0, 8.0, 17.921]          4.000588          1.512731   
3  [2008.0, 4.0, 2.0, 13.0, 8.0, 17.921]          4.012395          1.509063   
4  [2008.0, 4.0, 2.0, 13.0, 8.0, 17.921]          4.019708          1.511318   

   Temperature_measured  Current_charge  Voltage_charge    Time Capacity  
0             24.655358           0.000           0.003   0.000     None  
1             24.666480           4.036           1.570   2.53

In [59]:
def process_multiple_files_and_save(file_paths, output_path_csv, output_path_mat):
    """
    Process multiple .mat files and save the combined data into a new file.

    Args:
        file_paths (list): List of paths to .mat files to process.
        output_path_csv (str): Path to save the combined DataFrame as a CSV file.
        output_path_mat (str): Path to save the combined DataFrame as a .mat file.
    """
    all_data = []

    for path in file_paths:
        df = process_mat_file_to_dataframe(path)
        all_data.append(df)

    final_df = pd.concat(all_data, ignore_index=True)

    final_df.to_csv(output_path_csv, index=False)

    # from scipy.io import savemat
    # savemat(output_path_mat, {'combined_data': final_df.to_dict(orient='list')})


In [60]:
file_paths = [
    "/home/workspace/data/B0005.mat",
    "/home/workspace/data/B0006.mat",
    "/home/workspace/data/B0007.mat",
    "/home/workspace/data/B0025.mat",
    "/home/workspace/data/B0026.mat",
    "/home/workspace/data/B0027.mat",
    "/home/workspace/data/B0028.mat"
]

output_path_csv = "/home/workspace/data/combined_data.csv"
output_path_mat = "/path/to/output/combined_data.mat"

process_multiple_files_and_save(file_paths, output_path_csv, output_path_mat)

In [61]:
import ast

df = pd.read_csv('/home/workspace/data/combined_data.csv')
discharge = df[(df['type'] == 'discharge')]
# discharge['Capacity'] = discharge['Capacity'].apply(lambda x: ast.literal_eval(x)[0][0] if isinstance(x, str) else x).astype(float)

discharge.loc[:, 'Capacity'] = discharge['Capacity'].apply(
    lambda x: ast.literal_eval(x)[0][0] if isinstance(x, str) else x).astype(float)

discharge.head(1)

Unnamed: 0,Batt_name,cycle,type,ambient_temperature,time,Voltage_measured,Current_measured,Temperature_measured,Current_charge,Voltage_charge,Time,Capacity
789,B0005,2,discharge,24,[2.0080e+03 4.0000e+00 2.0000e+00 1.5000e+01 2...,4.191492,0.004902,24.330034,0.0006,0.0,0.0,1.856487


In [62]:
discharge.to_csv('/home/workspace/data/combined_discharge.csv', index=False)