#AI&ML project

In [31]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [32]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [33]:
start_timestamp = datetime(2022, 1, 1)
num_data_points = 10000

In [34]:
timestamps = [start_timestamp + timedelta(minutes=i) for i in range(num_data_points)]
temperature = np.random.normal(loc=25, scale=2, size=num_data_points)
vibration = np.random.normal(loc=0.05, scale=0.02, size=num_data_points)
pressure = np.random.normal(loc=10, scale=0.5, size=num_data_points)
failure = np.zeros(num_data_points)

In [35]:
# Introduce failures at random points
failure_indices = np.random.choice(range(num_data_points), size=int(num_data_points * 0.05), replace=False)
failure[failure_indices] = 1

In [36]:
data = pd.DataFrame({
    'timestamp': timestamps,
    'temperature': temperature,
    'vibration': vibration,
    'pressure': pressure,
    'failure': failure
})

In [37]:
data.to_csv("equipment_data.csv", index=False)
print(data)

               timestamp  temperature  vibration   pressure  failure
0    2022-01-01 00:00:00    21.171526   0.070728  10.388036      0.0
1    2022-01-01 00:01:00    24.824404  -0.005716  10.000725      0.0
2    2022-01-01 00:02:00    24.515724   0.039314  10.685094      0.0
3    2022-01-01 00:03:00    25.979885   0.066488  10.945059      0.0
4    2022-01-01 00:04:00    25.910084   0.060493   9.968855      0.0
...                  ...          ...        ...        ...      ...
9995 2022-01-07 22:35:00    24.745044   0.045591  10.170936      0.0
9996 2022-01-07 22:36:00    24.091993   0.034440  10.158923      0.0
9997 2022-01-07 22:37:00    21.330539   0.047807  10.430351      0.0
9998 2022-01-07 22:38:00    24.322498   0.035947  10.463671      0.0
9999 2022-01-07 22:39:00    25.551310   0.060291  10.140781      0.0

[10000 rows x 5 columns]


In [38]:
print(data.head(5))

            timestamp  temperature  vibration   pressure  failure
0 2022-01-01 00:00:00    21.171526   0.070728  10.388036      0.0
1 2022-01-01 00:01:00    24.824404  -0.005716  10.000725      0.0
2 2022-01-01 00:02:00    24.515724   0.039314  10.685094      0.0
3 2022-01-01 00:03:00    25.979885   0.066488  10.945059      0.0
4 2022-01-01 00:04:00    25.910084   0.060493   9.968855      0.0


In [39]:
print(data.tail())

               timestamp  temperature  vibration   pressure  failure
9995 2022-01-07 22:35:00    24.745044   0.045591  10.170936      0.0
9996 2022-01-07 22:36:00    24.091993   0.034440  10.158923      0.0
9997 2022-01-07 22:37:00    21.330539   0.047807  10.430351      0.0
9998 2022-01-07 22:38:00    24.322498   0.035947  10.463671      0.0
9999 2022-01-07 22:39:00    25.551310   0.060291  10.140781      0.0


In [40]:
print(data.describe())

                 timestamp   temperature     vibration      pressure  \
count                10000  10000.000000  10000.000000  10000.000000   
mean   2022-01-04 11:19:30     25.010855      0.050314      9.998335   
min    2022-01-01 00:00:00     17.346394     -0.017783      7.890719   
25%    2022-01-02 17:39:45     23.675585      0.036736      9.666010   
50%    2022-01-04 11:19:30     25.020949      0.050233      9.996657   
75%    2022-01-06 04:59:15     26.337693      0.063614     10.337442   
max    2022-01-07 22:39:00     32.514068      0.121665     11.858168   
std                    NaN      2.001768      0.019885      0.497387   

            failure  
count  10000.000000  
mean       0.050000  
min        0.000000  
25%        0.000000  
50%        0.000000  
75%        0.000000  
max        1.000000  
std        0.217956  


In [41]:
# Preprocess the data
data.dropna(inplace=True)

In [42]:
# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

In [25]:
X = data.drop("failure", axis=1)  # Input features (sensor readings)
y = data["failure"]  # Target variable (failure label)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
# Extract relevant features from the sensor data
# (e.g., temperature, vibration, pressure, etc.)
relevant_features = ["temperature", "vibration", "pressure"]

X_train = X_train[relevant_features]
X_test = X_test[relevant_features]

In [27]:
window_size = 5
X_train_rolling = X_train.rolling(window_size).mean().dropna()
X_test_rolling = X_test.rolling(window_size).mean().dropna()

In [28]:
print("Length of x_test:", len(X_test))
print("Length of x_pred:", len(X_train))

Length of x_test: 2000
Length of x_pred: 8000


In [30]:
if len(X_test) == len(X_train):
    accuracy = accuracy_score(X_test, X_train)
    precision = precision_score(X_test, X_train)
    recall = recall_score(X_test, X_train)
    f1 = f1_score(X_test, X_train)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)
else:
    print("Error: Length mismatch between X_test and X_pred.")
    print("Unique labels in X_test:", set(X_test))
    print("Unique labels in X_pred:", set(X_train))

Error: Length mismatch between X_test and X_pred.
Unique labels in X_test: {'temperature', 'pressure', 'vibration'}
Unique labels in X_pred: {'temperature', 'pressure', 'vibration'}
