In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [3]:
start_timestamp = datetime(2022, 1, 1)
num_data_points = 10000

In [4]:
timestamps = [start_timestamp + timedelta(minutes=i) for i in range(num_data_points)]
temperature = np.random.normal(loc=25, scale=2, size=num_data_points)
vibration = np.random.normal(loc=0.05, scale=0.02, size=num_data_points)
pressure = np.random.normal(loc=10, scale=0.5, size=num_data_points)
failure = np.zeros(num_data_points)

In [29]:
failure_indices = np.random.choice(range(num_data_points), size=int(num_data_points * 0.05), replace=False)
failure[failure_indices] = 1

In [6]:
data = pd.DataFrame({
    'timestamp': timestamps,
    'temperature': temperature,
    'vibration': vibration,
    'pressure': pressure,
    'failure': failure
})

In [7]:
data.to_csv("equipment_data.csv", index=False)
print(data)

               timestamp  temperature  vibration   pressure  failure
0    2022-01-01 00:00:00    24.662295   0.039057  10.621236      0.0
1    2022-01-01 00:01:00    25.075357   0.038726  10.196793      0.0
2    2022-01-01 00:02:00    22.537829   0.021363  10.073450      0.0
3    2022-01-01 00:03:00    27.859912   0.036075  10.299577      0.0
4    2022-01-01 00:04:00    23.924996   0.070164  10.247103      0.0
...                  ...          ...        ...        ...      ...
9995 2022-01-07 22:35:00    24.834012   0.019992  10.354457      0.0
9996 2022-01-07 22:36:00    24.546222   0.054940   9.838168      0.0
9997 2022-01-07 22:37:00    25.116250   0.058844  10.122268      0.0
9998 2022-01-07 22:38:00    22.984082   0.052274  10.580420      1.0
9999 2022-01-07 22:39:00    23.450589   0.061834   9.823904      0.0

[10000 rows x 5 columns]


In [8]:
print(data.head(5))

            timestamp  temperature  vibration   pressure  failure
0 2022-01-01 00:00:00    24.662295   0.039057  10.621236      0.0
1 2022-01-01 00:01:00    25.075357   0.038726  10.196793      0.0
2 2022-01-01 00:02:00    22.537829   0.021363  10.073450      0.0
3 2022-01-01 00:03:00    27.859912   0.036075  10.299577      0.0
4 2022-01-01 00:04:00    23.924996   0.070164  10.247103      0.0


In [9]:
print(data.tail())

               timestamp  temperature  vibration   pressure  failure
9995 2022-01-07 22:35:00    24.834012   0.019992  10.354457      0.0
9996 2022-01-07 22:36:00    24.546222   0.054940   9.838168      0.0
9997 2022-01-07 22:37:00    25.116250   0.058844  10.122268      0.0
9998 2022-01-07 22:38:00    22.984082   0.052274  10.580420      1.0
9999 2022-01-07 22:39:00    23.450589   0.061834   9.823904      0.0


In [10]:
print(data.describe())

                 timestamp   temperature     vibration      pressure  \
count                10000  10000.000000  10000.000000  10000.000000   
mean   2022-01-04 11:19:30     25.016591      0.050151     10.006214   
min    2022-01-01 00:00:00     16.835200     -0.024159      8.080225   
25%    2022-01-02 17:39:45     23.640699      0.036854      9.674099   
50%    2022-01-04 11:19:30     25.005331      0.049951     10.006640   
75%    2022-01-06 04:59:15     26.357805      0.063437     10.337070   
max    2022-01-07 22:39:00     34.163492      0.121193     12.002308   
std                    NaN      1.991941      0.019694      0.496265   

            failure  
count  10000.000000  
mean       0.050000  
min        0.000000  
25%        0.000000  
50%        0.000000  
75%        0.000000  
max        1.000000  
std        0.217956  


In [25]:
data.dropna(inplace=True)

In [26]:
from sklearn.model_selection import train_test_split

In [28]:
X = data.drop("failure", axis=1) 
y = data["failure"] 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [27]:
relevant_features = ["temperature", "vibration", "pressure"]

X_train = X_train[relevant_features]
X_test = X_test[relevant_features]

In [15]:
window_size = 5
X_train_rolling = X_train.rolling(window_size).mean().dropna()
X_test_rolling = X_test.rolling(window_size).mean().dropna()

In [21]:
print("Length of x_train:", len(X_train))
print("Length of x_test:", len(X_test))

Length of x_train: 8000
Length of x_test: 2000


In [22]:
model = RandomForestClassifier()

In [23]:
model.fit(X_train, y_train)

In [24]:
model.score(X_test, y_test)

0.95