In [2]:
import os
import pandas as pd

# Load NASA and Oxford processed data
nasa_dir = '../data/NASA/preprocessed/'
oxford_dir = '../data/Oxford/'
all_data = []

# Load NASA files
for filename in os.listdir(nasa_dir):
    if filename.endswith('_processed.csv'):
        df = pd.read_csv(os.path.join(nasa_dir, filename))
        # Ensure Voltage, Current, Temperature are scalars (convert if arrays)
        for col in ['Voltage (V)', 'Current (A)', 'Temperature (°C)']:
            if df[col].str.match(r'\[\[.*\]\]').any():  # Check for nested arrays
                df[col] = df[col].apply(lambda x: float(eval(x)[0][0]) if isinstance(x, str) and x.startswith('[[') else x)
        df['Dataset'] = 'NASA'
        all_data.append(df)
        print(f"Loaded NASA file: {filename}, Rows: {len(df)}")

# Load Oxford files
for filename in os.listdir(oxford_dir):
    if filename.endswith('_processed.csv'):
        df = pd.read_csv(os.path.join(oxford_dir, filename))
        # Ensure Voltage, Current, Temperature are scalars
        for col in ['Voltage (V)', 'Current (A)', 'Temperature (°C)']:
            if df[col].str.match(r'\[\[.*\]\]').any():
                df[col] = df[col].apply(lambda x: float(eval(x)[0][0]) if isinstance(x, str) and x.startswith('[[') else x)
        df['Dataset'] = 'Oxford'
        all_data.append(df)
        print(f"Loaded Oxford file: {filename}, Rows: {len(df)}")

# Combine into one DataFrame
combined_df = pd.concat(all_data, ignore_index=True)

# Print basic stats
print(f"\nTotal rows in combined dataset: {len(combined_df)}")
print("\nColumns:", combined_df.columns.tolist())
print("\nSample data head:")
print(combined_df.head())
print("\nMissing values per column:")
print(combined_df.isnull().sum())
print("\nUnique batteries (by Dataset):")
print(combined_df['Dataset'].value_counts())

# Save combined dataset
combined_df.to_csv('../data/combined_dataset.csv', index=False)
print("Combined dataset saved to ../data/combined_dataset.csv")

Loaded NASA file: B0005_processed.csv, Rows: 168
Loaded NASA file: B0006_processed.csv, Rows: 168
Loaded NASA file: B0007_processed.csv, Rows: 168
Loaded NASA file: B0018_processed.csv, Rows: 132
Loaded NASA file: B0025_processed.csv, Rows: 28
Loaded NASA file: B0026_processed.csv, Rows: 28
Loaded NASA file: B0027_processed.csv, Rows: 28
Loaded NASA file: B0028_processed.csv, Rows: 28
Loaded NASA file: B0029_processed.csv, Rows: 40
Loaded NASA file: B0030_processed.csv, Rows: 40
Loaded NASA file: B0031_processed.csv, Rows: 40
Loaded NASA file: B0032_processed.csv, Rows: 40
Loaded NASA file: B0033_processed.csv, Rows: 197
Loaded NASA file: B0034_processed.csv, Rows: 197
Loaded NASA file: B0036_processed.csv, Rows: 197
Loaded NASA file: B0038_processed.csv, Rows: 47
Loaded NASA file: B0039_processed.csv, Rows: 47
Loaded NASA file: B0040_processed.csv, Rows: 47
Loaded NASA file: B0041_processed.csv, Rows: 67
Loaded NASA file: B0042_processed.csv, Rows: 112
Loaded NASA file: B0043_processe