In [None]:
import numpy as np
import pandas as pd
import random
from datetime import datetime, timedelta

# Set seed
np.random.seed(42)
random.seed(42)

# Number of devices
n = 1000

# Unique device IDs
device_ids = [f"D{str(i).zfill(4)}" for i in range(1, n+1)]

# Feature simulation
usage_hours = np.random.randint(50, 5000, size=n)
avg_temperature = np.round(np.random.normal(50, 10, n), 2)
vibration_level = np.round(np.random.uniform(0.01, 1.0, n), 3)
error_logs_count = np.random.poisson(2, n)
pressure_variation = np.round(np.random.normal(1.0, 0.3, n), 2)
last_maintenance = np.random.randint(0, 365, size=n)

# Simulated inspection dates (last 2 years)
start_date = datetime(2023, 1, 1)
inspection_dates = [start_date + timedelta(days=int(x)) for x in np.random.randint(0, 730, n)]

# Simulated device types
device_types = [random.choice(['Drill', 'Saw', 'Retractor']) for _ in range(n)]

# Risk determination (failure = 1)
failure = []
for i in range(n):
    risk_score = 0
    if usage_hours[i] > 3000:
        risk_score += 1
    if avg_temperature[i] > 65:
        risk_score += 1
    if vibration_level[i] > 0.7:
        risk_score += 1
    if error_logs_count[i] > 5:
        risk_score += 1
    if pressure_variation[i] > 1.5:
        risk_score += 1
    if last_maintenance[i] > 180:
        risk_score += 1
    failure.append(1 if risk_score >= 3 else 0)

# Raw DataFrame
df_raw = pd.DataFrame({
    "device_id": device_ids,
    "device_type": device_types,
    "inspection_date": inspection_dates,
    "usage_hours": usage_hours,
    "avg_temperature": avg_temperature,
    "vibration_level": vibration_level,
    "error_logs_count": error_logs_count,
    "pressure_variation": pressure_variation,
    "last_maintenance": last_maintenance,
    "failure": failure
})

# Save raw version
df_raw.to_csv("predictive_maintenance_data_raw.csv", index=False)

# Preview
print(df_raw.head())

  device_id device_type inspection_date  usage_hours  avg_temperature  \
0     D0001   Retractor      2024-05-21          910            38.75   
1     D0002       Drill      2024-06-12         3822            48.51   
2     D0003       Drill      2023-09-22         3142            49.93   
3     D0004   Retractor      2024-09-17          516            36.81   
4     D0005         Saw      2023-10-19         4476            58.04   

   vibration_level  error_logs_count  pressure_variation  last_maintenance  \
0            0.907                 0                1.43               136   
1            0.284                 3                0.87               188   
2            0.984                 1                1.23                 1   
3            0.149                 2                0.43               105   
4            0.210                 0                1.20               363   

   failure  
0        0  
1        0  
2        0  
3        0  
4        0  
