In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

data_path = '/content/maitenance_record_data.csv'
df = pd.read_csv(data_path)

# Explore dataset
print(df.head())


# Assume 'Timestamp' is a feature that needs to be converted to datetime format
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# 'Status' is the target variable (Normal/Faulty)
# Encode categorical variables
le = LabelEncoder()
df['Status'] = le.fit_transform(df['Status'])

# Feature engineering for timestamps (example: extracting hour of the day)
df['HourOfDay'] = df['Timestamp'].dt.hour


# Handling missing values (replace NaNs with the mean value)
imputer = SimpleImputer(strategy='mean')
df['Temperature'] = imputer.fit_transform(df[['Temperature']])
df['Pressure'] = imputer.fit_transform(df[['Pressure']])

# Define features and target variable
features = ['Temperature', 'Pressure', 'HourOfDay']
target = 'Status'

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)

# Train a machine learning model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_result = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_report_result)

             Timestamp  Temperature    Pressure       Status
0  2019-01-01 00:00:00    33.973713  966.241086  NormalState
1  2019-01-01 00:01:00    28.893886  992.774066  NormalState
2  2019-01-01 00:02:00    35.181508  960.379004  FaultyState
3  2019-01-01 00:03:00    42.184239  984.601924  NormalState
4  2019-01-01 00:04:00    28.126773  905.319267  NormalState
Accuracy: 0.7675
Classification Report:
               precision    recall  f1-score   support

           0       0.17      0.05      0.08        77
           1       0.81      0.94      0.87       323

    accuracy                           0.77       400
   macro avg       0.49      0.50      0.47       400
weighted avg       0.68      0.77      0.72       400



In [7]:
df.head()

Unnamed: 0,Timestamp,Temperature,Pressure,Status,HourOfDay
0,2019-01-01 00:00:00,33.973713,966.241086,1,0
1,2019-01-01 00:01:00,28.893886,992.774066,1,0
2,2019-01-01 00:02:00,35.181508,960.379004,0,0
3,2019-01-01 00:03:00,42.184239,984.601924,1,0
4,2019-01-01 00:04:00,28.126773,905.319267,1,0


In [8]:
df.describe()

Unnamed: 0,Temperature,Pressure,Status,HourOfDay
count,2000.0,2000.0,2000.0,2000.0
mean,30.360673,999.677875,0.7955,9.45
std,7.907725,50.267703,0.403437,6.882673
min,4.069861,849.024392,0.0,0.0
25%,25.018705,964.543702,1.0,4.0
50%,30.357533,999.997335,1.0,8.0
75%,35.463821,1033.277184,1.0,15.0
max,60.821852,1196.311885,1.0,23.0
