In [None]:
import pandas as pd
import numpy as np
import random
import uuid

from datetime import datetime, timedelta

# Set seed for reproducibility
np.random.seed(42)
random.seed(42)

# Configurations
NUM_RECORDS = 1000
equipment_types = ['HVAC', 'Elevator', 'Generator', 'Pump', 'Boiler']
locations = ['Building A', 'Building B', 'Building C', 'Warehouse 1', 'Warehouse 2']
priorities = ['Low', 'Medium', 'High']

# Helper: generate failure based on probability logic
def simulate_failure(temp, vib, errors):
    risk_score = 0
    if temp > 85: risk_score += 1
    if vib > 5: risk_score += 1
    if errors > 2: risk_score += 1
    return 1 if risk_score >= 2 else 0

# Helper: generate random date between two dates
def random_date(start_days_ago, end_days_ago):
    start = datetime.now() - timedelta(days=start_days_ago)
    end = datetime.now() - timedelta(days=end_days_ago)
    return start + (end - start) * random.random()

# Generate data
data = []
for _ in range(NUM_RECORDS):
    equipment_id = str(uuid.uuid4())
    equipment_type = random.choice(equipment_types)
    location = random.choice(locations)
    install_date = random_date(5*365, 365).date()  # 1-5 years ago
    age_days = (datetime.now().date() - install_date).days
    last_service = random_date(age_days, 30).date()
    next_service = last_service + timedelta(days=random.randint(30, 180))
    runtime_hours = round(np.random.normal(loc=5000, scale=1000), 2)
    temperature = round(np.random.normal(loc=75, scale=10), 2)  # °C
    vibration = round(np.random.normal(loc=3, scale=1.5), 2)     # mm/s
    power_kw = round(np.random.normal(loc=20, scale=5), 2)
    humidity = round(np.random.uniform(30, 90), 2)
    error_count = np.random.poisson(lam=1.5)
    manual_override = random.choice([0, 1])
    downtime = round(np.random.exponential(scale=2), 2)
    priority = random.choices(priorities, weights=[0.6, 0.3, 0.1])[0]

    failure = simulate_failure(temperature, vibration, error_count)

    data.append([
        equipment_id, equipment_type, location, install_date, last_service,
        next_service, age_days, runtime_hours, temperature, vibration,
        power_kw, humidity, error_count, manual_override, downtime,
        priority, failure
    ])

# Create DataFrame
columns = [
    'equipment_id', 'equipment_type', 'location', 'install_date',
    'last_service_date', 'next_scheduled_service', 'age_days',
    'runtime_hours', 'temperature', 'vibration_level',
    'power_consumption_kw', 'humidity_level', 'error_codes_count',
    'manual_override', 'downtime_last_30d', 'service_priority',
    'failure_within_7_days'
]

df = pd.DataFrame(data, columns=columns)
# df.head()
# Save to CSV
file_path = "C:/Users/Dejene\Documents/Git_comany/ML_predict_equipment_failures/Notebook/data/data/equipment_maintenance_data.csv"
df.to_csv("Notebook/data/data/equipment_maintenance_data.csv", index=False)


Unnamed: 0,equipment_id,equipment_type,location,install_date,last_service_date,next_scheduled_service,age_days,runtime_hours,temperature,vibration_level,power_consumption_kw,humidity_level,error_codes_count,manual_override,downtime_last_30d,service_priority,failure_within_7_days
0,cc94a590-c50f-4359-83d5-36fd5247261b,HVAC,Building A,2023-08-04,2024-01-26,2024-03-31,742,5496.71,73.62,3.97,27.62,39.36,0,0,0.12,Medium,0
1,3eb3774d-06b2-4e2e-9d7a-e701de44da19,Boiler,Building A,2022-12-26,2023-01-25,2023-03-19,963,5279.04,85.11,2.13,17.37,41.0,1,0,1.13,Low,0
2,c9a9d328-4212-4ef7-9fed-5e99a0a63671,Boiler,Building A,2022-11-14,2024-10-12,2025-03-30,1005,5816.45,59.76,2.36,16.29,51.98,2,1,1.44,Low,0
3,79d84064-88dc-4578-82b4-a3c5a897364a,Boiler,Building C,2023-11-11,2023-11-15,2024-01-24,643,4455.62,76.11,1.27,21.88,33.9,4,1,2.31,Low,0
4,4621008f-809c-4469-9e22-b84280b64deb,Elevator,Building B,2024-06-14,2024-10-26,2024-12-18,427,3978.45,73.38,2.2,19.97,84.56,1,1,0.75,Low,0
