In [1]:
# In notebooks/07_Motor_Clustering_Analysis.ipynb
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

print("Loading motor dataset for clustering...")
df = pd.read_csv('../data/ai4i2020.csv')

# --- Feature Engineering for Clustering ---
# We will characterize each motor by its average torque and rotational speed.
# This helps identify motors that are overworked vs. those running normally.
# We use Product ID to identify each unique machine.
motor_summary = df.groupby('Product ID').agg(
    avg_torque=('Torque [Nm]', 'mean'),
    avg_speed=('Rotational speed [rpm]', 'mean'),
    max_tool_wear=('Tool wear [min]', 'max')
).reset_index()

print("Engineered features for motor fleet summary:")
print(motor_summary.head())

# --- K-Means Clustering ---
features_for_clustering = motor_summary[['avg_torque', 'avg_speed', 'max_tool_wear']]

scaler = StandardScaler()
scaled_features = scaler.fit_transform(features_for_clustering)

# We'll look for 3 operational clusters: e.g., 'Standard Use', 'High Stress', 'High Wear'
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
motor_summary['cluster'] = kmeans.fit_predict(scaled_features)

print("\nMotor clustering complete.")

# --- Save the results ---
output_path = '../data/clustered_motor_data.csv'
motor_summary.to_csv(output_path, index=False)
print(f"\n✅ Clustered motor data saved to {output_path}")

Loading motor dataset for clustering...
Engineered features for motor fleet summary:
  Product ID  avg_torque  avg_speed  max_tool_wear
0     H29424        23.9     1782.0             24
1     H29425        44.3     1423.0             29
2     H29432        54.5     1306.0             50
3     H29434        42.7     1375.0             58
4     H29441        24.6     1811.0             77

Motor clustering complete.

✅ Clustered motor data saved to ../data/clustered_motor_data.csv
