In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib

In [4]:
# Load the training data
df_train = pd.read_csv('training_data.csv')

# Encode categorical target variable 'Failure Code'
label_encoder = LabelEncoder()
df_train['Failure Code'] = label_encoder.fit_transform(df_train['Failure Code'])

# Split features and target
X = df_train.drop(columns=['Failure Code'])
y = df_train['Failure Code']

# Split data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)

# Train the RandomForest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=0)
rf_model.fit(X_train, y_train)

# Validate the model
y_valid_pred = rf_model.predict(X_valid)
print(f'Validation Accuracy: {accuracy_score(y_valid, y_valid_pred):.2f}')

# Save the model
joblib.dump(rf_model, 'tbf_random_forest_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

Validation Accuracy: 0.34


['label_encoder.pkl']

In [5]:
# Load the test data
df_test = pd.read_csv('test_data.csv')

# Load the model and label encoder
rf_model = joblib.load('tbf_random_forest_model.pkl')
label_encoder = joblib.load('label_encoder.pkl')

# Prepare the test features by dropping unnecessary columns
X_test_new = df_test.drop(columns=['Timestamp'])

# Make predictions on the new test data
y_test_pred = rf_model.predict(X_test_new)

# Decode the predicted labels back to their original form
df_test['Predicted Failure Code'] = label_encoder.inverse_transform(y_test_pred)

# Save the results to a new CSV file
df_test.to_csv('test_data_predictions.csv', index=False)

print("Predictions made and saved to 'test_data_predictions.csv'")

Predictions made and saved to 'test_data_predictions.csv'
