In [1]:
import pandas as pd
import numpy as np


In [2]:
df=pd.read_csv("nasa_battery_master.csv")

In [3]:
numeric_cols = [
    'voltage_measured', 'current_measured', 'temperature_measured',
    'current_load', 'voltage_load', 'time',
    'sense_current', 'battery_current', 'current_ratio',
    'current_charge', 'voltage_charge'
]

In [4]:
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [5]:
df = df.drop(columns=['battery_impedance', 'rectified_impedance'], errors='ignore')

In [6]:
df['delta_t'] = df.groupby("file_id")['time'].diff().fillna(0)

In [7]:
df['discharge_capacity'] = (df['current_measured'] * df['delta_t']) / 3600

In [8]:
df['charge_capacity'] = (df['current_charge'] * df['delta_t']) / 3600


In [8]:
agg_features.columns = ['_'.join(col).strip('_') for col in agg_features.columns.values]

In [9]:
capacity_features = df.groupby("file_id").agg({
    'discharge_capacity': 'sum',
    'charge_capacity': 'sum'
}).reset_index()

In [10]:
capacity_features.rename(columns={
    'discharge_capacity': 'cycle_discharge_capacity_Ah',
    'charge_capacity': 'cycle_charge_capacity_Ah'
}, inplace=True)

In [11]:
agg_features = df.groupby("file_id").agg({
    'voltage_measured': ['mean', 'std', 'min', 'max'],
    'current_measured': ['mean', 'std', 'min', 'max'],
    'temperature_measured': ['mean', 'std', 'min', 'max'],
    'voltage_charge': ['mean', 'max'],
    'current_charge': ['mean', 'max'],
    'time': ['max']  # total duration of cycle
}).reset_index()

In [12]:
agg_features.columns = ['file_id'] + [
    f"{col}_{stat}" for col, stat in agg_features.columns.tolist()[1:]
]


In [13]:
final_features = pd.merge(agg_features, capacity_features, on="file_id", how="left")
final_features['cycle_efficiency'] = (
    final_features['cycle_discharge_capacity_Ah'] / final_features['cycle_charge_capacity_Ah']
)

In [14]:
final_features.to_csv("phase2_features.csv", index=False)

In [15]:
print("✅ Phase 2 feature engineering complete. Shape:", final_features.shape)
print(final_features.head())

✅ Phase 2 feature engineering complete. Shape: (7565, 21)
   file_id  voltage_measured_mean  voltage_measured_std  voltage_measured_min  \
0        1               3.475266              0.284626              2.470612   
1        2               3.407773              0.044809              3.332557   
2        3               4.193521              0.054022              3.486189   
3        4               4.200429              0.007882              4.187199   
4        5               3.476559              0.272495              2.477662   

   voltage_measured_max  current_measured_mean  current_measured_std  \
0              4.246764              -0.952767              0.201448   
1              3.482989               0.000045              0.000784   
2              4.214595               0.520792              0.525708   
3              4.213659               0.024929              0.015197   
4              4.186636              -0.983889              0.106708   

   current_measured_mi