# Battery Degradation Analysis of NASA Li-On Batteries

In [44]:
import pandas as pd
import os

# Path to your data directory and metadata file
data_directory = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/data'
metadata_path = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata.csv'

# Load the metadata CSV
metadata = pd.read_csv(metadata_path)

# Filter out the discharge cycles and drop rows where 'Capacity' might be NaN or not applicable
discharge_metadata = metadata[(metadata['type'] == 'discharge') & metadata['Capacity'].notna()]

# Convert 'Capacity' to float where possible and drop rows where conversion failed
discharge_metadata['Capacity'] = pd.to_numeric(discharge_metadata['Capacity'], errors='coerce').dropna()

# Define the end-of-life threshold for capacity
eol_threshold = 1.4

# Initialize an empty DataFrame to store RUL data for all batteries
rul_data = pd.DataFrame()

# Iterate over each unique battery_id in discharge_metadata
for battery_id in discharge_metadata['battery_id'].unique():
    battery_data = discharge_metadata[discharge_metadata['battery_id'] == battery_id]
    
    # Find the EOL cycle number for this battery
    eol_cycle = battery_data[battery_data['Capacity'] <= eol_threshold].index.min()
    
    # Calculate RUL for each cycle and store the results
    battery_data['RUL'] = eol_cycle - battery_data.index
    battery_data['RUL'] = battery_data['RUL'].clip(lower=0)
    
    # Store the RUL data for this battery in the rul_data DataFrame
    rul_data = pd.concat([rul_data, battery_data[['battery_id', 'test_id', 'RUL']]], ignore_index=True)

# Merge the RUL data back into the original metadata DataFrame
metadata_with_rul = pd.merge(metadata, rul_data, how='left', on=['battery_id', 'test_id'])

# Save the updated metadata with RUL data to a new CSV file
new_metadata_path = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_with_rul.csv'
metadata_with_rul.to_csv(new_metadata_path, index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  discharge_metadata['Capacity'] = pd.to_numeric(discharge_metadata['Capacity'], errors='coerce').dropna()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  battery_data['RUL'] = eol_cycle - battery_data.index
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  battery_data['RUL'] = battery_data['RUL'].clip(

In [45]:
import pandas as pd
import os

# Paths
data_directory = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/data'
metadata_file = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_with_rul.csv'

# Read metadata
metadata = pd.read_csv(metadata_file)

# Filter discharge files
discharge_files = metadata[metadata['type'] == 'discharge']['filename']
charge_files = metadata[metadata['type'] == 'charge']['filename']
impedance_files = metadata[metadata['type'] == 'impedance']['filename']

# Initialize DataFrame for aggregated features
aggregated_features = pd.DataFrame()

# Process each discharge file
for filename in discharge_files:
    file_path = os.path.join(data_directory, filename)
    discharge_data = pd.read_csv(file_path)
    
    # Calculate statistical features for the charge dataset
    features = discharge_data.agg({
        'Voltage_load': ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]],
        'Current_load': ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]],
        'Temperature_measured': ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]]
    }).rename(index={'<lambda_0>': 'delta'}).transpose()
    
    features['filename'] = filename
    aggregated_features = pd.concat([aggregated_features, features.reset_index().melt(id_vars='index', value_vars=['mean', 'std', 'delta'])], ignore_index=True)

for filename in charge_files:
    file_path = os.path.join(data_directory, filename)
    charge_data = pd.read_csv(file_path)
    
    # Calculate statistical features for the charge dataset
    features = charge_data.agg({
        'Voltage_charge': ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]],
        'Current_charge': ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]],
        'Temperature_measured': ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]]
    }).rename(index={'<lambda_0>': 'delta'}).transpose()
    
    features['filename'] = filename
    aggregated_features = pd.concat([aggregated_features, features.reset_index().melt(id_vars='index', value_vars=['mean', 'std', 'delta'])], ignore_index=True)

for filename in impedance_files:
    file_path = os.path.join(data_directory, filename)
    impedance_data = pd.read_csv(file_path)
    
    # Calculate statistical features for the impedance dataset
    features = impedance_data.agg({
        'Battery_impedance': ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]],
        'Rectified_Impedance': ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]]
    }).rename(index={'<lambda_0>': 'delta'}).transpose()
    
    features['filename'] = filename
    aggregated_features = pd.concat([aggregated_features, features.reset_index().melt(id_vars='index', value_vars=['mean', 'std', 'delta'])], ignore_index=True)

# Pivot aggregated_features to wide format
wide_features = aggregated_features.pivot_table(index='filename', columns=['Parameter', 'Statistic'], values='Value').reset_index()

# Merge the features with metadata
metadata_enriched = pd.merge(metadata, wide_features, on='filename', how='left')

# Save the enriched metadata to a new CSV
metadata_enriched.to_csv('/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_rul_stats.csv', index=False)


KeyError: "The following 'value_vars' are not present in the DataFrame: ['delta']"

In [46]:
import pandas as pd
import os

def calculate_features(filenames, data_directory, params, is_complex=False):
    aggregated_features = []
    for filename in filenames:
        file_path = os.path.join(data_directory, filename)
        data = pd.read_csv(file_path)

        if is_complex:
            for param in params:
                data[param] = data[param].apply(lambda x: complex(x.replace('i', 'j')) if isinstance(x, str) else x)
                # Assuming you want the magnitude for complex numbers
                data[param] = data[param].apply(abs)

        # Perform calculations
        calculations = {param: ['mean', 'std', lambda x: x.iloc[-1] - x.iloc[0]] for param in params}
        feature = data.agg(calculations).rename(index={'<lambda_0>': 'delta'})
        feature = feature.unstack().reset_index()
        feature.columns = ['Statistic', 'Parameter', 'Value']
        feature['filename'] = filename

        aggregated_features.append(feature)

    return pd.concat(aggregated_features, ignore_index=True)

# Paths
data_directory = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/data'
metadata_file = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_with_rul.csv'

# Load metadata
metadata = pd.read_csv(metadata_file)

# Define params for each type
discharge_params = ['Voltage_measured', 'Current_measured', 'Temperature_measured','Current_load','Voltage_load']
charge_params = ['Voltage_measured', 'Current_measured', 'Temperature_measured','Current_charge','Voltage_charge']
impedance_params = ['Sense_current','Battery_current','Battery_impedance', 'Rectified_Impedance']

# Calculate features
discharge_features = calculate_features(metadata[metadata['type'] == 'discharge']['filename'], data_directory, discharge_params)
charge_features = calculate_features(metadata[metadata['type'] == 'charge']['filename'], data_directory, charge_params)
impedance_features = calculate_features(metadata[metadata['type'] == 'impedance']['filename'], data_directory, impedance_params, is_complex=True)

# Combine all features
all_features = pd.concat([discharge_features, charge_features, impedance_features], ignore_index=True)


# Pivot 'all_features' DataFrame to wide format
wide_features = all_features.pivot_table(index='filename', 
                                         columns=['Parameter', 'Statistic'], 
                                         values='Value').reset_index()

# Flatten the MultiIndex columns after pivoting
wide_features.columns = ['_'.join(col).strip() if col[1] else col[0] for col in wide_features.columns.values]

# Load the original metadata to ensure we can merge accurately
metadata = pd.read_csv(metadata_file)

# Merge the wide feature set with the metadata, matching on 'filename'
metadata_enriched = pd.merge(metadata, wide_features, on='filename', how='left')

# Specify the path where you want to save the enriched metadata CSV file
output_file = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_enriched_with_features.csv'

# Save the merged DataFrame to a CSV file
metadata_enriched.to_csv(output_file, index=False)

print(f'Saved enriched metadata with features to {output_file}')



Saved enriched metadata with features to /Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_enriched_with_features.csv


In [1]:
import pandas as pd
import numpy as np

# Path to the metadata file
metadata_path = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_enriched_with_features.csv'
# Base path to the data files
data_folder_path = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/data/'

# Function to calculate energy from a dataframe
def calculate_energy(df):
    df['Power'] = df['Voltage_measured'] * df['Current_measured']
    energy = np.trapz(df['Power'], df['Time'])
    return energy

# Function to process each cycle pair
def process_cycle_pair(charge_file, discharge_file):
    charge_df = pd.read_csv(f"{data_folder_path}{charge_file}")
    discharge_df = pd.read_csv(f"{data_folder_path}{discharge_file}")
    charge_energy = calculate_energy(charge_df)
    discharge_energy = calculate_energy(discharge_df)
    efficiency = discharge_energy / charge_energy if charge_energy > 0 else None
    return efficiency

# Function to find the closest preceding charge cycle based on time
def find_matching_charge_cycle(discharge_row, charge_cycles):
    discharge_time = pd.to_datetime(discharge_row['start_time'])
    battery_id = discharge_row['battery_id']
    test_id = discharge_row['test_id']

    # Filter for the same battery and test
    potential_matches = charge_cycles[(charge_cycles['battery_id'] == battery_id) & 
                                      (charge_cycles['test_id'] == test_id)]
    
    # Convert start times to datetime for comparison
    potential_matches['start_time'] = pd.to_datetime(potential_matches['start_time'])
    # Filter for charge cycles before the discharge cycle
    before_discharge = potential_matches[potential_matches['start_time'] < discharge_time]
    
    if not before_discharge.empty:
        # Get the closest preceding charge cycle
        closest_charge = before_discharge.iloc[before_discharge['start_time'].sub(discharge_time).abs().argsort()[:1]]
        return closest_charge.iloc[0]
    return None

def main():
    metadata_df = pd.read_csv(metadata_path)
    efficiencies = [None] * len(metadata_df)

    charge_cycles = metadata_df[metadata_df['type'] == 'charge'].copy()
    discharge_cycles = metadata_df[metadata_df['type'] == 'discharge']

    for index, discharge_row in discharge_cycles.iterrows():
        matching_charge_row = find_matching_charge_cycle(discharge_row, charge_cycles)
        if matching_charge_row is not None:
            efficiency = process_cycle_pair(matching_charge_row['filename'], discharge_row['filename'])
            efficiencies[index] = efficiency

    metadata_df['Efficiency'] = efficiencies
    metadata_df.to_csv('/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_with_efficiency.csv', index=False)

if __name__ == "__main__":
    main()


In [2]:
import pandas as pd
import numpy as np

# Path to the metadata file
metadata_path = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/metadata_enriched_with_features.csv'
# Base path to the data files
data_folder_path = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/data/'

# Load the metadata DataFrame
metadata_df = pd.read_csv(metadata_path)

# Initialize a column for average impedance magnitude
metadata_df['Impedance_Magnitude_Avg'] = np.nan

# Corrected function to extract the real part of a complex number stored as a string
def extract_real_part_corrected(complex_str):
    try:
        complex_num = complex(str(complex_str).replace('j', 'j').replace(' ', ''))
        return complex_num.real
    except ValueError:
        return np.nan

# Function to calculate the magnitude of impedance from a complex number
def calculate_impedance_magnitude_corrected(complex_str):
    try:
        # Ensure correct handling of the input as a string and convert to complex number
        complex_number = complex(complex_str)
        # Calculate and return the magnitude
        magnitude = abs(complex_number)
        return magnitude
    except ValueError:
        # Handle potential conversion errors gracefully
        return np.nan

# Process each row in the metadata DataFrame
for index, row in metadata_df.iterrows():
    if row['type'] == 'impedance' and pd.notnull(row['filename']):
        # Construct the full path to the current data file
        current_file = f"{data_folder_path}{row['filename']}"
        # Load the data file
        df = pd.read_csv(current_file)
        # Apply the corrected logic to calculate impedance magnitude
        df['Rectified_Impedance_Real'] = df['Rectified_Impedance'].apply(extract_real_part_corrected)
        df['Rectified_Impedance_Magnitude'] = df['Rectified_Impedance_Real'].apply(calculate_impedance_magnitude_corrected)
        # Calculate the average magnitude and update the metadata DataFrame
        average_magnitude = df['Rectified_Impedance_Magnitude'].mean()
        metadata_df.at[index, 'Impedance_Magnitude_Avg'] = average_magnitude

# Sort the metadata_df by battery_id, test_id, and type
metadata_df.sort_values(by=['battery_id', 'test_id', 'type'], inplace=True)

# Calculate the change rate of impedance magnitude
metadata_df['Impedance_Magnitude_Change_Rate'] = metadata_df.groupby(['battery_id', 'test_id'])['Impedance_Magnitude_Avg'].diff()

# Display the updated DataFrame (or a portion of it)
print(metadata_df[['battery_id', 'test_id', 'type', 'Impedance_Magnitude_Avg', 'Impedance_Magnitude_Change_Rate']].head())

# Define the path for the output file
output_file_path = '/Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/updated_metadata.csv'

# Write the updated metadata DataFrame to a CSV file
metadata_df.to_csv(output_file_path, index=False)

print(f"Updated metadata has been written to: {output_file_path}")


     battery_id  test_id       type  Impedance_Magnitude_Avg  \
5120      B0005        0     charge                      NaN   
5121      B0005        1  discharge                      NaN   
5122      B0005        2     charge                      NaN   
5123      B0005        3  discharge                      NaN   
5124      B0005        4     charge                      NaN   

      Impedance_Magnitude_Change_Rate  
5120                              NaN  
5121                              NaN  
5122                              NaN  
5123                              NaN  
5124                              NaN  
Updated metadata has been written to: /Users/aparnakakarlapudi/Desktop/Practicum/cleaned_dataset/updated_metadata.csv


Rate of Change: Calculate the rate of change for voltage, current, and temperature between consecutive cycles. This could help in identifying trends in battery performance degradation.

Capacity Fade over Cycles: Since capacity fade is a direct indicator of battery health, calculating the rate of capacity fade over cycles can be a powerful feature. This involves calculating the difference in capacity between consecutive discharge cycles.

Impedance Growth: Similar to capacity fade, the growth rate of impedance (both Battery_impedance and Rectified_impedance) can be indicative of battery health. Calculate the change in impedance over cycles.

Cycle Count: The number of cycles completed by a battery can also be a simple yet effective feature, as batteries degrade over time with more cycles.

Temperature Variability: Since extreme temperatures can affect battery life, calculating the variability or average temperature during charge and discharge cycles might provide insights into conditions leading to faster degradation.