# ECU Failure Prediction Notebook

### Step 1: Load the Dataset

In [1]:

# Load the dataset
import pandas as pd
ecu_data = pd.read_csv("ecu dataset.csv")

# Display basic information about the dataset
print("Dataset loaded successfully. Here's the structure:")
ecu_data.info()
ecu_data.head()
    

Dataset loaded successfully. Here's the structure:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 21 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ECU_ID                      10000 non-null  object 
 1   Manufacturing_Line_ID       10000 non-null  object 
 2   Process_Stage               10000 non-null  object 
 3   Temperature                 10000 non-null  float64
 4   Pressure                    10000 non-null  float64
 5   Speed                       10000 non-null  float64
 6   Alignment_Tolerance         10000 non-null  float64
 7   Voltage                     10000 non-null  float64
 8   Current                     10000 non-null  float64
 9   Defect_Rate_in_Batch        10000 non-null  float64
 10  Material_Quality_Indicator  10000 non-null  object 
 11  Assembly_Errors             10000 non-null  int64  
 12  Quality_Check_Results       10000 non-

Unnamed: 0,ECU_ID,Manufacturing_Line_ID,Process_Stage,Temperature,Pressure,Speed,Alignment_Tolerance,Voltage,Current,Defect_Rate_in_Batch,...,Assembly_Errors,Quality_Check_Results,Production_Time,Rework_Count,Factory_Temperature,Humidity_Levels,Operator_ID,Historical_Failure_Rate,Anomaly_Score,Manufacturing_Error
0,ECU_1,Line_A,Assembly,237.14,0.81,27.62,0.084,4.61,0.95,3.17,...,0,Fail,17.78,0,33.07,44.93,Operator_18,1.25,0.33,1
1,ECU_2,Line_C,Packaging,213.35,1.24,82.5,0.051,5.0,1.04,0.85,...,4,Pass,11.02,0,23.59,66.79,Operator_18,1.44,0.97,0
2,ECU_3,Line_A,Soldering,258.0,1.03,31.02,0.092,3.48,1.21,3.95,...,3,Pass,10.4,1,34.99,47.3,Operator_7,5.84,0.47,0
3,ECU_4,Line_B,Soldering,240.16,1.22,59.72,0.091,3.27,0.42,0.52,...,0,Fail,8.7,0,24.95,36.29,Operator_14,3.32,0.17,1
4,ECU_5,Line_A,Packaging,287.25,0.82,55.3,0.08,4.8,0.41,0.54,...,5,Fail,13.1,2,32.55,32.56,Operator_17,0.67,0.1,0


### Step 2: Data Preprocessing

In [2]:

# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Prepare features and target
X = ecu_data[['Temperature', 'Pressure', 'Speed', 'Voltage', 'Alignment_Tolerance', 'Defect_Rate_in_Batch', 'Assembly_Errors']]
y = ecu_data['Manufacturing_Error']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
    

### Step 3: Train and Evaluate Models

In [3]:

# Import classifiers
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, BaggingClassifier
from sklearn.metrics import accuracy_score

# Train AdaBoost model
ada_model = AdaBoostClassifier(random_state=42)
ada_model.fit(X_train_scaled, y_train)
ada_pred = ada_model.predict(X_test_scaled)
ada_acc = accuracy_score(y_test, ada_pred)

# Train Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_scaled, y_train)
rf_pred = rf_model.predict(X_test_scaled)
rf_acc = accuracy_score(y_test, rf_pred)

# Train Bagging model
bag_model = BaggingClassifier(random_state=42)
bag_model.fit(X_train_scaled, y_train)
bag_pred = bag_model.predict(X_test_scaled)
bag_acc = accuracy_score(y_test, bag_pred)


# Display accuracies
print(f"AdaBoost Accuracy: {ada_acc * 100:.2f}%")
print(f"Random Forest Accuracy: {rf_acc * 100:.2f}%")
print(f"Bagging Accuracy: {bag_acc * 100:.2f}%")
    



AdaBoost Accuracy: 49.73%
Random Forest Accuracy: 49.00%
Bagging Accuracy: 50.07%


In [4]:
from joblib import dump
dump(scaler, "scaler.joblib")  # Save the scaler
dump(ada_model, "ada_model.joblib")  # Save the AdaBoost model
dump(rf_model, "rf_model.joblib")  # Save the Random Forest model
dump(bag_model, "bag_model.joblib")  # Save the Bagging model
print("Models and scaler saved successfully!")

Models and scaler saved successfully!


### Step 4: Generate Results Table

In [5]:

# Generate a results table
results = pd.DataFrame({
    'Model': ['AdaBoost', 'Random Forest', 'Bagging'],
    'Accuracy (%)': [ada_acc * 100, rf_acc * 100, bag_acc * 100]
})

# Include failure probabilities for each ECU
ecu_failure_probabilities = pd.DataFrame({
    'ECU_ID': ecu_data['ECU_ID'],
    'Failure_Probability (AdaBoost)': ada_model.predict_proba(scaler.transform(X))[:, 1],
    'Failure_Probability (Random Forest)': rf_model.predict_proba(scaler.transform(X))[:, 1],
    'Failure_Probability (Bagging)': bag_model.predict_proba(scaler.transform(X))[:, 1]
})

# Display results
print("Model Accuracy:")
print(results)
print("ECU Failure Probabilities:")
print(ecu_failure_probabilities.head())
    

Model Accuracy:
           Model  Accuracy (%)
0       AdaBoost     49.733333
1  Random Forest     49.000000
2        Bagging     50.066667
ECU Failure Probabilities:
  ECU_ID  Failure_Probability (AdaBoost)  Failure_Probability (Random Forest)  \
0  ECU_1                        0.498936                                 0.37   
1  ECU_2                        0.489198                                 0.13   
2  ECU_3                        0.500135                                 0.11   
3  ECU_4                        0.500970                                 0.54   
4  ECU_5                        0.498176                                 0.20   

   Failure_Probability (Bagging)  
0                            0.4  
1                            0.2  
2                            0.1  
3                            0.6  
4                            0.3  


In [6]:

# Generate a table similar to the requested format
ecu_output = ecu_data.copy()
ecu_output['Failure_Probability (AdaBoost)'] = ada_model.predict_proba(scaler.transform(X))[:, 1]
ecu_output['Failure_Probability (Random Forest)'] = rf_model.predict_proba(scaler.transform(X))[:, 1]
ecu_output['Failure_Probability (Bagging)'] = bag_model.predict_proba(scaler.transform(X))[:, 1]

# Add Status column based on failure probabilities (example thresholds)
threshold = 0.5
ecu_output['Status'] = ecu_output[['Failure_Probability (AdaBoost)', 'Failure_Probability (Random Forest)', 'Failure_Probability (Bagging)']].mean(axis=1).apply(
    lambda x: 'Failure' if x > threshold else 'No Issue'
)

# Select and format relevant columns for the final output
final_output = ecu_output[['ECU_ID', 'Temperature', 'Pressure', 'Speed', 'Voltage', 
                           'Failure_Probability (AdaBoost)', 'Failure_Probability (Random Forest)', 
                           'Failure_Probability (Bagging)', 'Status']]

# Display the final table
print("Final ECU Prediction Table:")
print(final_output.head(10))  # Display the top 10 rows for a preview


Final ECU Prediction Table:
   ECU_ID  Temperature  Pressure  Speed  Voltage  \
0   ECU_1       237.14      0.81  27.62     4.61   
1   ECU_2       213.35      1.24  82.50     5.00   
2   ECU_3       258.00      1.03  31.02     3.48   
3   ECU_4       240.16      1.22  59.72     3.27   
4   ECU_5       287.25      0.82  55.30     4.80   
5   ECU_6       171.73      1.10  24.40     3.49   
6   ECU_7       180.75      1.05  60.74     4.05   
7   ECU_8       188.35      1.29  20.37     3.08   
8   ECU_9       184.59      1.40  67.11     3.29   
9  ECU_10       230.32      1.27  86.51     4.61   

   Failure_Probability (AdaBoost)  Failure_Probability (Random Forest)  \
0                        0.498936                                 0.37   
1                        0.489198                                 0.13   
2                        0.500135                                 0.11   
3                        0.500970                                 0.54   
4                        0.49

In [8]:
import numpy as np
import pandas as pd
!pip install tabulate
from tabulate import tabulate

# Generate new random data
new_data = pd.DataFrame({
    "ECU_No": [f"ECU_{i+1}" for i in range(5)],
    "Temperature": np.random.uniform(200, 300, 5),
    "Pressure": np.random.uniform(0.5, 1.5, 5),
    "Speed": np.random.uniform(20, 100, 5),
    "Voltage": np.random.uniform(3.0, 5.0, 5),
    "Alignment_Tolerance": np.random.uniform(0.05, 0.1, 5),
    "Defect_Rate_in_Batch": np.random.uniform(0.1, 5.0, 5),
    "Assembly_Errors": np.random.randint(0, 10, 5),
})

# Convert DataFrame to a list of lists for tabulate
table_data = new_data.values.tolist()

# Define column headers
headers = new_data.columns.tolist()

# Print the table with borders
print("Generated random values for new ECUS:\n")
print(tabulate(table_data, headers=headers, tablefmt='fancy_grid', showindex=False))



Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
Generated random values for new ECUS:

╒══════════╤═══════════════╤════════════╤═════════╤═══════════╤═══════════════════════╤════════════════════════╤═══════════════════╕
│ ECU_No   │   Temperature │   Pressure │   Speed │   Voltage │   Alignment_Tolerance │   Defect_Rate_in_Batch │   Assembly_Errors │
╞══════════╪═══════════════╪════════════╪═════════╪═══════════╪═══════════════════════╪════════════════════════╪═══════════════════╡
│ ECU_1    │       299.18  │   0.611543 │ 38.9979 │   3.91686 │             0.0982565 │               0.612233 │                 2 │
├──────────┼───────────────┼────────────┼─────────┼───────────┼───────────────────────┼────────────────────────┼───────────────────┤
│ ECU_2    │       249.057 │   1.21011  │ 25.8188 │   4.33177 │             0.08445

In [9]:
from joblib import load


scaler = load("scaler.joblib")  # Adjust the filename as per your saved scaler
ada_model = load("ada_model.joblib")  # Adjust filename for AdaBoost model
rf_model = load("rf_model.joblib")  # Adjust filename for Random Forest model
bag_model = load("bag_model.joblib")  # Adjust filename for Bagging model


In [10]:
new_data_scaled = scaler.transform(new_data.drop(columns=["ECU_No"]))

# Predict failure probabilities using all three models
from tabulate import tabulate

# Predict failure probabilities using all three models
ada_prob = ada_model.predict_proba(new_data_scaled)[:, 1]
rf_prob = rf_model.predict_proba(new_data_scaled)[:, 1]
bag_prob = bag_model.predict_proba(new_data_scaled)[:, 1]

# Combine predictions into a single DataFrame
prediction_results = new_data.copy()
prediction_results["AdaBoost_Probability"] = ada_prob
prediction_results["RandomForest_Probability"] = rf_prob
prediction_results["Bagging_Probability"] = bag_prob

# Add overall status column (average probability and status)
prediction_results["Overall_Probability"] = prediction_results[["AdaBoost_Probability", "RandomForest_Probability", "Bagging_Probability"]].mean(axis=1)
prediction_results["Status"] = prediction_results["Overall_Probability"].apply(lambda x: f"Failed ({x:.2f})" if x > 0.5 else f"Not Failed ({x:.2f})")

# Convert the DataFrame to a list of lists
table_data = prediction_results.values.tolist()

# Define column headers
headers = prediction_results.columns.tolist()

# Print the table with borders
print("\nPredicted Failure Probabilities and Status:")
print(tabulate(table_data, headers=headers, tablefmt='fancy_grid', showindex=False))





Predicted Failure Probabilities and Status:
╒══════════╤═══════════════╤════════════╤═════════╤═══════════╤═══════════════════════╤════════════════════════╤═══════════════════╤════════════════════════╤════════════════════════════╤═══════════════════════╤═══════════════════════╤═══════════════════╕
│ ECU_No   │   Temperature │   Pressure │   Speed │   Voltage │   Alignment_Tolerance │   Defect_Rate_in_Batch │   Assembly_Errors │   AdaBoost_Probability │   RandomForest_Probability │   Bagging_Probability │   Overall_Probability │ Status            │
╞══════════╪═══════════════╪════════════╪═════════╪═══════════╪═══════════════════════╪════════════════════════╪═══════════════════╪════════════════════════╪════════════════════════════╪═══════════════════════╪═══════════════════════╪═══════════════════╡
│ ECU_1    │       299.18  │   0.611543 │ 38.9979 │   3.91686 │             0.0982565 │               0.612233 │                 2 │               0.499443 │                       0.51 │    

In [11]:
from tabulate import tabulate

# Select specific columns to display
selected_columns = ["ECU_No", "Overall_Probability", "Status"]
filtered_results = prediction_results[selected_columns]

# Convert the filtered DataFrame to a list of lists
table_data = filtered_results.values.tolist()

# Define column headers
headers = filtered_results.columns.tolist()

# Print the table with borders
print("\nSelected Columns - Predicted Failure Probabilities and Status:")
print(tabulate(table_data, headers=headers, tablefmt='fancy_grid', showindex=False))



Selected Columns - Predicted Failure Probabilities and Status:
╒══════════╤═══════════════════════╤═══════════════════╕
│ ECU_No   │   Overall_Probability │ Status            │
╞══════════╪═══════════════════════╪═══════════════════╡
│ ECU_1    │              0.403148 │ Not Failed (0.40) │
├──────────┼───────────────────────┼───────────────────┤
│ ECU_2    │              0.523673 │ Failed (0.52)     │
├──────────┼───────────────────────┼───────────────────┤
│ ECU_3    │              0.450179 │ Not Failed (0.45) │
├──────────┼───────────────────────┼───────────────────┤
│ ECU_4    │              0.389887 │ Not Failed (0.39) │
├──────────┼───────────────────────┼───────────────────┤
│ ECU_5    │              0.549866 │ Failed (0.55)     │
╘══════════╧═══════════════════════╧═══════════════════╛
