In [1]:
import pandas as pd
import numpy as np

# Seed for reproducibility
np.random.seed(456)

# Parameters for half an hours intervals over 1 month
half_an_hours_per_day = 48
days = 7
num_records = half_an_hours_per_day * days 
sampling_rate_min = 30  # Sampling every 30 minutes

# Time index: one reading every minute for 30 days
time_index = pd.date_range(start='2025-03-01', periods=num_records, freq=f'{sampling_rate_min}min')

# Simulated ranges based on realistic sensor outputs
vibration_amplitude = np.random.normal(loc=2.0, scale=0.5, size=num_records)  # g-force
frequency = np.random.normal(loc=50.0, scale=5.0, size=num_records)           # Hz
humidity = np.random.uniform(low=30, high=70, size=num_records).astype(int)             # %
temperature = np.random.normal(loc=40, scale=5, size=num_records).astype(int)        # °C

# Create DataFrame
sensor_data = pd.DataFrame({
    "timestamp": time_index,
    "Machine_name": "Machine_1",
    "vibration_amplitude_g": vibration_amplitude,
    "frequency_hz": frequency,
    "humidity_percent": humidity,
    "temperature_celsius": temperature
})

sensor_data.head()



Unnamed: 0,timestamp,Machine_name,vibration_amplitude_g,frequency_hz,humidity_percent,temperature_celsius
0,2025-03-01 00:00:00,Machine_1,1.665936,55.745588,54,29
1,2025-03-01 00:30:00,Machine_1,1.750895,52.202482,30,34
2,2025-03-01 01:00:00,Machine_1,2.309288,46.044938,42,30
3,2025-03-01 01:30:00,Machine_1,2.284346,37.482894,58,44
4,2025-03-01 02:00:00,Machine_1,2.675255,54.8093,39,37


In [2]:
sensor_data.shape

(336, 6)

In [3]:
# # Save to CSV
# sensor_data.to_csv('sensor_data.csv', index=False)

In [4]:
# Simulate faults in the dataset
sensor_data_with_faults = sensor_data.copy()

# Inject faults: randomly assign 20% of the rows as faulty (1), rest as normal (0)
num_faults = int(0.2 * len(sensor_data_with_faults))
fault_indices = np.random.choice(sensor_data_with_faults.index, size=num_faults, replace=False)

sensor_data_with_faults['fault'] = 0
sensor_data_with_faults.loc[fault_indices, 'fault'] = 1

sensor_data_with_faults.head(10)

Unnamed: 0,timestamp,Machine_name,vibration_amplitude_g,frequency_hz,humidity_percent,temperature_celsius,fault
0,2025-03-01 00:00:00,Machine_1,1.665936,55.745588,54,29,0
1,2025-03-01 00:30:00,Machine_1,1.750895,52.202482,30,34,0
2,2025-03-01 01:00:00,Machine_1,2.309288,46.044938,42,30,0
3,2025-03-01 01:30:00,Machine_1,2.284346,37.482894,58,44,0
4,2025-03-01 02:00:00,Machine_1,2.675255,54.8093,39,37,0
5,2025-03-01 02:30:00,Machine_1,2.814794,58.204397,42,46,1
6,2025-03-01 03:00:00,Machine_1,2.150983,43.83268,62,44,1
7,2025-03-01 03:30:00,Machine_1,2.224742,46.649671,39,44,0
8,2025-03-01 04:00:00,Machine_1,1.827094,50.770375,66,35,0
9,2025-03-01 04:30:00,Machine_1,1.842385,50.976814,64,41,0


In [5]:
sensor_data_with_faults.to_csv('sensor_data_test.csv', index=False)