In [5]:
import numpy as np

def extract_features_from_sample_battery(file_path):
    """
    Reads a sample battery file where each line is a comma-separated q_d_n value 
    (e.g., "0.8855053186416626,") and extracts features from these values.
    
    Extracted features include:
      - slope_last_k_cycles: Slope over the last k cycles for each k in the list.
      - mean_grad_last_k_cycles: Mean gradient (via np.gradient) over the last k cycles.
      - trimmed_q_d_n_avg: Average value of the trimmed q_d_n array.
      - total_cycles: Total number of cycles, computed from the length of the trimmed q_d_n array.
    
    Parameters:
        file_path (str): Path to the comma-separated text file containing q_d_n values.
        
    Returns:
        dict: A dictionary containing the extracted features.
    """
    # Load the q_d_n values from file, handling comma separation per line.
    q_d_n_values = []
    with open(file_path, 'r') as f:
        for line in f:
            # Remove leading/trailing whitespace and trailing commas
            value_str = line.strip().rstrip(',')
            if value_str:
                q_d_n_values.append(float(value_str))
    
    # Convert the list to a numpy array
    q_d_n_array = np.array(q_d_n_values)
    
    # Trim trailing zeros from the q_d_n array (assumes zeros at the end indicate no data)
    trimmed_q_d_n = np.trim_zeros(q_d_n_array, 'b')
    
    # Compute total cycles as the length of the trimmed array
    total_cycles = len(trimmed_q_d_n)
    
    # Compute the average of the trimmed q_d_n values (if available)
    trimmed_q_d_n_avg = float(np.mean(trimmed_q_d_n)) if total_cycles > 0 else np.nan

    # Define k values for which features are computed
    k_values = [10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
    
    # Initialize the dictionary to hold features
    features = {}
    
    # For each k, compute the slope over the last k cycles and the mean gradient
    for k in k_values:
        if total_cycles > k:
            # Slope: difference between the last value and the value k cycles ago divided by k
            slope = (trimmed_q_d_n[-1] - trimmed_q_d_n[-k]) / k
            
            # Mean gradient over the last k cycles using numpy.gradient
            grad = np.gradient(trimmed_q_d_n[-k:], 1)
            mean_grad = float(np.mean(grad))
        else:
            slope = np.nan
            mean_grad = np.nan
        
        features[f'slope_last_{k}_cycles'] = slope
        features[f'mean_grad_last_{k}_cycles'] = mean_grad

    # Add average of trimmed_q_d_n and total cycles to the feature set
    features['trimmed_q_d_n_avg'] = trimmed_q_d_n_avg
    features['total_cycles'] = total_cycles

    return features


In [7]:
# Example usage:
features = extract_features_from_sample_battery("/home/jaf/battery-lifespan-kg/b3c3.txt")
print(features)


{'slope_last_10_cycles': -0.0004921257495880127, 'mean_grad_last_10_cycles': -0.0005299955606460572, 'slope_last_50_cycles': -0.0007295429706573487, 'mean_grad_last_50_cycles': -0.0007365322113037109, 'slope_last_100_cycles': -0.000748140811920166, 'mean_grad_last_100_cycles': -0.0007513692975044251, 'slope_last_200_cycles': -0.0006169137358665466, 'mean_grad_last_200_cycles': -0.0006169727444648742, 'slope_last_300_cycles': -0.00047541677951812745, 'mean_grad_last_300_cycles': -0.0004754801591237386, 'slope_last_400_cycles': -0.0003808070719242096, 'mean_grad_last_400_cycles': -0.0003807681798934936, 'slope_last_500_cycles': -0.00031944239139556886, 'mean_grad_last_500_cycles': -0.000319430947303772, 'slope_last_600_cycles': -0.0002761176228523254, 'mean_grad_last_600_cycles': -0.0002763838569323222, 'slope_last_700_cycles': -0.00024357242243630545, 'mean_grad_last_700_cycles': -0.00024350694247654507, 'slope_last_800_cycles': -0.00021820046007633208, 'mean_grad_last_800_cycles': -0.0