In [1]:
# 02_feature_engineering.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set up
sns.set(style="darkgrid")
INPUT_PATH = Path("../data/processed/agri_sensor_data_cleaned.csv")
OUTPUT_PATH = Path("../data/processed/agri_features.csv")

# Load cleaned data
df = pd.read_csv(INPUT_PATH, parse_dates=['timestamp'])
print("✅ Cleaned dataset loaded. Shape:", df.shape)

# Sort by machine and time
df = df.sort_values(by=['machine_id', 'timestamp'])

# Create rolling window features
window_size = 30  # 30 seconds
sensor_cols = ['vibration_level', 'motor_current', 'motor_temp', 'torque', 'rpm']

for col in sensor_cols:
    df[f'{col}_mean'] = df.groupby('machine_id')[col].transform(lambda x: x.rolling(window_size, min_periods=1).mean())
    df[f'{col}_std'] = df.groupby('machine_id')[col].transform(lambda x: x.rolling(window_size, min_periods=1).std())
    df[f'{col}_min'] = df.groupby('machine_id')[col].transform(lambda x: x.rolling(window_size, min_periods=1).min())
    df[f'{col}_max'] = df.groupby('machine_id')[col].transform(lambda x: x.rolling(window_size, min_periods=1).max())

# Fill NaNs from std
df.fillna(0, inplace=True)

# Encode categorical operating_mode
df = pd.get_dummies(df, columns=['operating_mode'])

# Drop columns not used for modeling
df_model = df.drop(columns=['timestamp'])  # keep only numerical features for now

# Save engineered dataset
df_model.to_csv(OUTPUT_PATH, index=False)
print(f"💾 Feature-engineered dataset saved to {OUTPUT_PATH}")
print("📊 Final shape:", df_model.shape)


✅ Cleaned dataset loaded. Shape: (32400, 11)
💾 Feature-engineered dataset saved to ..\data\processed\agri_features.csv
📊 Final shape: (32400, 33)
