In [26]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report

In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [5]:
start_timestamp = datetime(2022, 1, 1)
num_data_points = 10000

In [6]:
timestamps = [start_timestamp + timedelta(minutes=i) for i in range(num_data_points)]
temperature = np.random.normal(loc=25, scale=2, size=num_data_points)
vibration = np.random.normal(loc=0.05, scale=0.02, size=num_data_points)
pressure = np.random.normal(loc=10, scale=0.5, size=num_data_points)
failure = np.zeros(num_data_points)

In [7]:
# Introduce failures at random points
failure_indices = np.random.choice(range(num_data_points), size=int(num_data_points * 0.05), replace=False)
failure[failure_indices] = 1

In [8]:
data = pd.DataFrame({
    'timestamp': timestamps,
    'temperature': temperature,
    'vibration': vibration,
    'pressure': pressure,
    'failure': failure
})

In [9]:
data.to_csv("equipment_data.csv", index=False)
print(data)

               timestamp  temperature  vibration   pressure  failure
0    2022-01-01 00:00:00    21.359217   0.039388  10.228243      0.0
1    2022-01-01 00:01:00    24.997027   0.054571   9.758846      0.0
2    2022-01-01 00:02:00    25.034067   0.061803   9.884107      0.0
3    2022-01-01 00:03:00    23.415780   0.038937   9.780311      0.0
4    2022-01-01 00:04:00    24.847688   0.045556  10.054325      0.0
...                  ...          ...        ...        ...      ...
9995 2022-01-07 22:35:00    23.358670   0.065711  10.022821      0.0
9996 2022-01-07 22:36:00    27.624000   0.072418   9.473266      0.0
9997 2022-01-07 22:37:00    26.696844   0.089884   9.730082      0.0
9998 2022-01-07 22:38:00    22.700617   0.036354   9.341462      0.0
9999 2022-01-07 22:39:00    26.037708   0.023009   9.669300      1.0

[10000 rows x 5 columns]


In [10]:
print(data.head(5))

            timestamp  temperature  vibration   pressure  failure
0 2022-01-01 00:00:00    21.359217   0.039388  10.228243      0.0
1 2022-01-01 00:01:00    24.997027   0.054571   9.758846      0.0
2 2022-01-01 00:02:00    25.034067   0.061803   9.884107      0.0
3 2022-01-01 00:03:00    23.415780   0.038937   9.780311      0.0
4 2022-01-01 00:04:00    24.847688   0.045556  10.054325      0.0


In [11]:
print(data.tail())

               timestamp  temperature  vibration   pressure  failure
9995 2022-01-07 22:35:00    23.358670   0.065711  10.022821      0.0
9996 2022-01-07 22:36:00    27.624000   0.072418   9.473266      0.0
9997 2022-01-07 22:37:00    26.696844   0.089884   9.730082      0.0
9998 2022-01-07 22:38:00    22.700617   0.036354   9.341462      0.0
9999 2022-01-07 22:39:00    26.037708   0.023009   9.669300      1.0


In [12]:
print(data.describe())

                 timestamp   temperature     vibration      pressure  \
count                10000  10000.000000  10000.000000  10000.000000   
mean   2022-01-04 11:19:30     25.008529      0.050037     10.003679   
min    2022-01-01 00:00:00     17.366776     -0.023760      8.092192   
25%    2022-01-02 17:39:45     23.668585      0.036632      9.670623   
50%    2022-01-04 11:19:30     25.007642      0.049918     10.009572   
75%    2022-01-06 04:59:15     26.396790      0.063639     10.337373   
max    2022-01-07 22:39:00     32.772576      0.117704     12.108954   
std                    NaN      2.038490      0.019835      0.501093   

            failure  
count  10000.000000  
mean       0.050000  
min        0.000000  
25%        0.000000  
50%        0.000000  
75%        0.000000  
max        1.000000  
std        0.217956  


In [13]:
# Preprocess the data
data.dropna(inplace=True)

In [14]:
# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

In [15]:
X = data.drop("failure", axis=1)  # Input features (sensor readings)
y = data["failure"]  # Target variable (failure label)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
# Extract relevant features from the sensor data
# (e.g., temperature, vibration, pressure, etc.)
relevant_features = ["temperature", "vibration", "pressure"]

X_train = X_train[relevant_features]
X_test = X_test[relevant_features]

In [17]:
window_size = 5
X_train_rolling = X_train.rolling(window_size).mean().dropna()
X_test_rolling = X_test.rolling(window_size).mean().dropna()

In [18]:
print("Length of x_test:", len(X_test))
print("Length of x_pred:", len(X_train))

Length of x_test: 2000
Length of x_pred: 8000


In [19]:
model = RandomForestClassifier()

In [20]:
model.fit(X_train, y_train)

In [29]:
model.score(X_test, y_test)

0.943