In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = 'predictive_maintenance_balanced.csv'
try:
    data = pd.read_csv(file_path)
    print(f"Successfully loaded '{file_path}'")
except FileNotFoundError:
    print(f"Error: File '{file_path}' not found. Please check the file path.")
    exit()

# Separate features and target
X = data.drop(columns=['Failure Type'])
y = data['Failure Type']

# Encode categorical features
label_encoders = {}
for column in X.columns:
    if X[column].dtype == 'object':
        le = LabelEncoder()
        X[column] = le.fit_transform(X[column])
        label_encoders[column] = le

# Encode target labels
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Scale numeric features only
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\n Model Accuracy: {accuracy:.2f}")

# Classification report with proper class names
target_names = target_encoder.classes_
report = classification_report(y_test, y_pred, target_names=target_names)
print("\n🔎 Classification Report:")
print(report)

# Get real-time input from the user
print("\nEnter current machine data:")
new_input = {}
for column in X.columns:
    if column in label_encoders:  # Categorical input
        choices = label_encoders[column].classes_
        print(f"Options for '{column}': {choices}")
        value = input(f"Enter {column}: ")
        if value not in choices:
            print(f"Invalid input. '{value}' not in {choices}. Please try again.")
            exit()
        value = label_encoders[column].transform([value])[0]
    else:
        while True:
            try:
                value = float(input(f"Enter {column}: "))  # Numeric input
                break
            except ValueError:
                print(f"Invalid input. Please enter a numeric value for '{column}'.")
    
    new_input[column] = value

# Convert input to DataFrame
new_data = pd.DataFrame([new_input])

# Scale the input data
new_data_scaled = scaler.transform(new_data)

# Predict the status
prediction = model.predict(new_data_scaled)
predicted_status = target_encoder.inverse_transform(prediction)[0]

print("\n🔎 Machine Status Prediction:")
print(f"➡️ Current machine status: **{predicted_status}**")
if predicted_status == 'No Failure':
    print("The machine is SAFE to operate.")
else:
    print(f"⚠️ Potential failure detected: **{predicted_status}**")

# Add the new input and prediction to the dataset
new_row = {**new_input, 'Failure Type': predicted_status, 'Current Status': predicted_status}
data = pd.concat([data, pd.DataFrame([new_row])], ignore_index=True)

# Save the updated dataset
updated_file_path = 'updated_machine_data.csv'
data.to_csv(updated_file_path, index=False)
print(f"\nUpdated data saved to: **{updated_file_path}**")

✅ Successfully loaded 'predictive_maintenance_balanced.csv'

 Model Accuracy: 0.67

🔎 Classification Report:
                    precision    recall  f1-score   support

      Heat Failure       0.37      0.35      0.36        31
Mechanical Failure       0.26      0.50      0.34        26
        No Failure       1.00      1.00      1.00       101
     Power Failure       0.47      0.21      0.30        42

          accuracy                           0.67       200
         macro avg       0.53      0.52      0.50       200
      weighted avg       0.70      0.67      0.67       200


Enter current machine data:
Options for 'Product ID': ['P00001' 'P00002' 'P00003' 'P00004' 'P00005' 'P00006' 'P00007' 'P00008'
 'P00009' 'P00010' 'P00011' 'P00012' 'P00013' 'P00014' 'P00015' 'P00016'
 'P00017' 'P00018' 'P00019' 'P00020' 'P00021' 'P00022' 'P00023' 'P00024'
 'P00025' 'P00026' 'P00027' 'P00028' 'P00029' 'P00030' 'P00031' 'P00032'
 'P00033' 'P00034' 'P00035' 'P00036' 'P00037' 'P00038' 'P0003