In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set the initial timestamp and number of data points
start_timestamp = datetime(2022, 1, 1)
num_data_points = 10000

# Generate synthetic sensor data
timestamps = [start_timestamp + timedelta(minutes=i) for i in range(num_data_points)]
temperature = np.random.normal(loc=25, scale=2, size=num_data_points)
vibration = np.random.normal(loc=0.05, scale=0.02, size=num_data_points)
pressure = np.random.normal(loc=10, scale=0.5, size=num_data_points)
failure = np.zeros(num_data_points)

# Introduce failures at random points
failure_indices = np.random.choice(range(num_data_points), size=int(num_data_points * 0.05), replace=False)
failure[failure_indices] = 1

# Create the dataframe
data = pd.DataFrame({
    'timestamp': timestamps,
    'temperature': temperature,
    'vibration': vibration,
    'pressure': pressure,
    'failure': failure
})


In [None]:
data.to_csv("sensor_data.csv", index=False)

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv("sensor_data.csv")

# Perform exploratory data analysis (EDA)
print(data.head())  # Display the first few rows of the dataset
print(data.describe())  # Summary statistics of the dataset

# Preprocess the data
data.dropna(inplace=True)  # Drop rows with missing values
# Handle outliers (e.g., using z-score or interquartile range)
# Perform feature scaling if necessary

# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

X = data.drop("failure", axis=1)  # Input features (sensor readings)
y = data["failure"]  # Target variable (failure label)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

             timestamp  temperature  vibration   pressure  failure
0  2022-01-01 00:00:00    22.093898   0.058400   9.585372      1.0
1  2022-01-01 00:01:00    27.310995   0.030276  10.047233      0.0
2  2022-01-01 00:02:00    23.876631   0.039579  10.662682      0.0
3  2022-01-01 00:03:00    24.409262   0.043558   9.806565      0.0
4  2022-01-01 00:04:00    25.586816   0.049481   9.694482      0.0
        temperature     vibration      pressure       failure
count  10000.000000  10000.000000  10000.000000  10000.000000
mean      24.983058      0.050502      9.995674      0.050000
std        1.993757      0.019911      0.501742      0.217956
min       17.190676     -0.040095      8.041751      0.000000
25%       23.638974      0.037181      9.661926      0.000000
50%       24.977948      0.050587      9.994367      0.000000
75%       26.300035      0.063994     10.329022      0.000000
max       33.216929      0.129960     11.854854      1.000000
