# Feature Importance Analysis
Analyze the importance of features for predicting machine failures.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import os

# Set plotting style
sns.set_theme(style="whitegrid")

output_dir = "analysis_results"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
df = pd.read_csv("predictive_maintenance.csv")

# Preprocessing
df_base = df.drop(['UDI', 'Product ID'], axis=1)
type_map = {'L': 0, 'M': 1, 'H': 2}
df_base['Type'] = df_base['Type'].map(type_map)

# Feature Engineering
df_base['Temperature_Difference'] = df_base['Process temperature [K]'] - df_base['Air temperature [K]']
df_base['Power'] = df_base['Rotational speed [rpm]'] * df_base['Torque [Nm]']

In [None]:
def analyze_importance(df_input, target_col, leakage_col, title, filename):
    X = df_input.drop([target_col, leakage_col], axis=1)
    y = df_input[target_col]
    
    if y.dtype == 'object':
        le = LabelEncoder()
        y = le.fit_transform(y)

    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X, y)

    importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': rf.feature_importances_})
    importance_df = importance_df.sort_values(by='Importance', ascending=False)

    plt.figure(figsize=(10, 6))
    sns.barplot(x='Importance', y='Feature', data=importance_df, palette='viridis', hue='Feature', legend=False)
    plt.title(title)
    plt.show()
    
    return importance_df

In [None]:
print("Binary Failure Importance:")
binary_importance = analyze_importance(df_base, 'Target', 'Failure Type', "Binary Failure Importance", "binary_feature_importance.png")
display(binary_importance)

print("\nMulti-class Failure Importance:")
multiclass_importance = analyze_importance(df_base, 'Failure Type', 'Target', "Multi-class Failure Importance", "multiclass_feature_importance.png")
display(multiclass_importance)

In [None]:
features_only = df_base.drop(['Target', 'Failure Type'], axis=1)
plt.figure(figsize=(10, 8))
sns.heatmap(features_only.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Feature Correlation Heatmap")
plt.show()