# Merge & Align NASA + Oxford Battery Datasets
This notebook loads processed NASA and Oxford battery datasets, aligns their features, merges them, and visualizes combined degradation curves.

In [5]:
# --- 1. Setup ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
print("Libraries loaded.")

Libraries loaded.


In [None]:
# --- 2. Load Processed Datasets ---
df_nasa = pd.read_csv('data/processed/battery_degradation.csv')
df_oxford = pd.read_csv('data/processed/oxford_battery_degradation.csv')

# Add dataset identifier
df_nasa['dataset'] = 'NASA'
df_oxford['dataset'] = 'Oxford'

print("Loaded datasets:")
print(f"NASA: {len(df_nasa)} rows")
print(f"Oxford: {len(df_oxford)} rows")

TypeError: '<=' not supported between instances of 'str' and 'float'

In [11]:
# --- 3. Inspect Column Names ---
print("\nNASA columns:")
print(df_nasa.columns.tolist())

print("\nOxford columns:")
print(df_oxford.columns.tolist())


NASA columns:
['battery_id', 'cycle_index', 'voltage', 'current', 'temperature', 'time', 'capacity', 'norm_capacity', 'dataset']

Oxford columns:
['Battery_ID', 'Cycle_Index', 'Capacity_Ah', 'Voltage_V', 'Current_A', 'Temperature_C', 'Time_s', 'Cycle_Type', 'initial_capacity', 'norm_capacity', 'RUL', 'dataset']


In [9]:
# --- 4. Standardize Column Names ---
# Rename Oxford columns to match NASA format
df_oxford.rename(columns={
    'Cycle_Index': 'cycle_index',
    'Capacity_Ah': 'capacity',
    'Battery_ID': 'battery_id'
}, inplace=True)

# Select only relevant columns
cols_to_keep = ['battery_id', 'cycle_index', 'capacity', 'norm_capacity', 'RUL', 'dataset']
df_nasa = df_nasa[cols_to_keep]
df_oxford = df_oxford[cols_to_keep]

print("Columns aligned.")

KeyError: "['RUL'] not in index"

In [None]:
# --- 5. Combine Datasets ---
df_combined = pd.concat([df_nasa, df_oxford], ignore_index=True)
print(f"Merged dataset size: {len(df_combined)} rows")

In [None]:
# --- 6. Visualize Combined Degradation Curves ---
plt.figure(figsize=(14, 8))
datasets_to_plot = ['NASA', 'Oxford']
for dataset in datasets_to_plot:
    subset = df_combined[df_combined['dataset'] == dataset].groupby('cycle_index')['norm_capacity'].mean().reset_index()
    plt.plot(subset['cycle_index'], subset['norm_capacity'], label=dataset)

plt.axhline(y=0.8, color='r', linestyle='--', label='Failure Threshold (80%)')
plt.xlabel('Cycle Index')
plt.ylabel('Normalized Capacity')
plt.title('Degradation Curve Comparison: NASA vs Oxford')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# --- 7. Save Merged Dataset ---
output_path = 'data/processed/merged_battery_degradation.csv'
df_combined.to_csv(output_path, index=False)
print(f"Merged dataset saved to {output_path}")