In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

np.random.seed(42)
def to_german_decimal(x, decimals=1):
    return f"{x:.{decimals}f}".replace(".", ",")

# CONFIGURATION

NUM_VEHICLES = 60
NUM_MEASUREMENTS = 550
NUM_FEEDBACK = 220

# Road surface physics mapping
road_roughness = {
    "Highway": 0.8,
    "City": 1.2,
    "Rough": 1.8}

vehicle_models = ["Model A", "Model B", "Model C"]
engine_types = ["Petrol", "Diesel", "EV"]
regions = ["Europe", "Asia", "North America"]
road_surfaces = list(road_roughness.keys())


# 1. VEHICLE MASTER TABLE

vehicle_ids = [f"V{str(i).zfill(3)}" for i in range(1, NUM_VEHICLES + 1)]

vehicle_master = pd.DataFrame({
    "Vehicle_ID": vehicle_ids,
    "Vehicle_Model": np.random.choice(vehicle_models, NUM_VEHICLES),
    "Manufacturing_Date": pd.to_datetime(
        np.random.choice(pd.date_range("2020-01-01", "2023-12-31"), NUM_VEHICLES)
    ),
    "Engine_Type": np.random.choice(engine_types, NUM_VEHICLES),
    "Region": np.random.choice(regions, NUM_VEHICLES)
})


# 2. PHYSICS SIMULATION FUNCTIONS

def simulate_noise(speed, surface):
    roughness = road_roughness[surface]
    base_noise = 40
    speed_effect = 0.28 * speed
    surface_effect = 7 * roughness
    random_variation = np.random.normal(0, 2)
    return round(base_noise + speed_effect + surface_effect + random_variation, 2)

def simulate_vibration(speed, surface):
    roughness = road_roughness[surface]
    base_vibration = 0.6
    surface_effect = 1.6 * roughness
    speed_effect = 0.02 * speed
    random_variation = np.random.normal(0, 0.3)
    vibration = base_vibration + surface_effect + speed_effect + random_variation
    return round(max(vibration, 0.5), 2)

def calculate_harshness(noise, vibration):
    score = 10
    if noise > 75: score -= 2
    if noise > 85: score -= 2
    if noise > 95: score -= 2
    if vibration > 4: score -= 2
    if vibration > 6: score -= 2
    return max(1, score)



# 3. NVH Measurements

timestamps = pd.date_range("2024-01-01", periods=60, freq="D")
road_surfaces = ["City", "Highway", "Rough"]

records = []

for vid in vehicle_ids:
    for ts in timestamps:
        speed = np.random.uniform(40, 120)
        road = np.random.choice(road_surfaces)

        base_noise = 60 + (speed * 0.15)
        surface_noise = {"City": 3, "Highway": 1, "Rough": 6}[road]
        noise = base_noise + surface_noise + np.random.normal(0, 2)

        vibration = (
            1.2 +
            (speed * 0.02) +
            {"City": 0.8, "Highway": 0.3, "Rough": 1.5}[road] +
            np.random.normal(0, 0.4)
        )

        records.append([
            vid,
            ts.strftime("%d.%m.%Y"),
            road,
            to_german_decimal(speed, 1),
            to_german_decimal(noise, 1),
            to_german_decimal(vibration, 2)
        ])

nvh_measurements = pd.DataFrame(records, columns=[
    "Vehicle_ID", "Timestamp", "Road_Surface",
    "Speed_kmph", "Noise_dB", "Vibration_RMS"
])



# 4. CUSTOMER FEEDBACK TABLE

comments = [
    "Smooth ride",
    "Excessive vibration on rough roads",
    "Noticeable noise at high speed",
    "Comfortable driving experience",
    "Harsh ride quality"
]

customer_feedback = pd.DataFrame({
    "Feedback_ID": [f"F{500+i}" for i in range(NUM_FEEDBACK)],
    "Vehicle_ID": np.random.choice(vehicle_ids, NUM_FEEDBACK),
    "Feedback_Date": pd.to_datetime(
        np.random.choice(pd.date_range("2024-01-01", "2024-03-31"), NUM_FEEDBACK)
    ),
    "Comment": np.random.choice(comments, NUM_FEEDBACK)
})

# Map feedback to realistic NVH conditions
customer_feedback["Noise_dB"] = np.random.choice(
    nvh_measurements["Noise_dB"], NUM_FEEDBACK
)
customer_feedback["Vibration_RMS"] = np.random.choice(
    nvh_measurements["Vibration_RMS"], NUM_FEEDBACK
)

# Convert 'Noise_dB' and 'Vibration_RMS' to numeric before calculating harshness
customer_feedback["Noise_dB"] = customer_feedback["Noise_dB"].str.replace(',', '.').astype(float)
customer_feedback["Vibration_RMS"] = customer_feedback["Vibration_RMS"].str.replace(',', '.').astype(float)

customer_feedback["Harshness_Score"] = customer_feedback.apply(
    lambda x: calculate_harshness(x["Noise_dB"], x["Vibration_RMS"]), axis=1
)

# Drop intermediate columns
customer_feedback.drop(columns=["Noise_dB", "Vibration_RMS"], inplace=True)


# 5. SAVE FILES

vehicle_master.to_csv("Vehicle_Master.csv", index=False)
nvh_measurements.to_csv("NVH_Measurements.csv", index=False)
customer_feedback.to_csv("Customer_Feedback.csv", index=False)


print("Files created:")
print("- Vehicle_Master.csv")
print("- NVH_Measurements.csv")
print("- Customer_Feedback.csv")


âœ… Synthetic NVH datasets generated successfully!
Files created:
- Vehicle_Master.csv
- NVH_Measurements.csv
- Customer_Feedback.csv
