In [57]:
from tqdm import tqdm
import numpy as np
import pandas as pd

n_samples = 10000

light_conditions = ["Daylight", "Night", "Dusk", "Dawn"]
weather_conditions = ["Clear", "Rainy", "Foggy", "Windy", "Snowy"]
road_types = ["Urban", "Highway", "Rural", "Suburban"]
road_surfaces = ["Dry", "Wet", "Gravel", "Mud"]

road_surface_risk = {"Dry": 0.0, "Wet": 0.3, "Gravel": 0.5, "Mud": 0.7}
light_condition_risk = {"Daylight": 0.0, "Night": 0.4, "Dusk": 0.3, "Dawn": 0.2}
weather_condition_risk = {"Clear": 0.0, "Rainy": 0.4, "Foggy": 0.5, "Windy": 0.3, "Snowy": 0.7}
road_type_risk = {"Urban": 0.2, "Highway": 0.3, "Rural": 0.4, "Suburban": 0.3}

rows = []

for _ in tqdm(range(n_samples)):
    rider_age = np.random.randint(12, 80)
    rider_experience_years = np.random.randint(0, 40)
    speed = np.clip(np.random.normal(55, 15), 10, 120)
    helmet_used = np.random.choice([0, 1], p=[0.2, 0.8])
    alcohol_detected = np.random.choice([0, 1], p=[0.9, 0.1])
    recent_violations_count = np.random.poisson(0.5)
    road_surface = np.random.choice(road_surfaces)
    light_condition = np.random.choice(light_conditions)
    brake_condition = np.random.uniform(0, 1)  # Changed to allow 0
    weather_condition = np.random.choice(weather_conditions)
    road_type = np.random.choice(road_types)

    # Under-age risk (<25)
    age_risk_under = 0
    if rider_age < 25:
        age_risk_under = 0.2 * np.exp(-(rider_age - 18) / 7)

    # Over-age risk (>60)
    age_risk_over = 0
    if rider_age > 60:
        age_risk_over = 0.15 * (1 - np.exp(-(rider_age - 60) / 5))

    age_risk = age_risk_under + age_risk_over

    # Set risk factor to 0.95 if brake condition is 0 (or very close)
    if brake_condition <=0.1 and speed > 40:
        risk = np.random.uniform(0.90, 0.95)
        risk = np.clip(risk, 0, 1)
    if brake_condition <= 0.1:
        risk = np.random.uniform(0.85, 0.95)
        risk = np.clip(risk, 0, 1)
    elif brake_condition > 0.1 and brake_condition <= 0.3:
        risk = np.random.uniform(0.7, 0.85)
        risk = np.clip(risk, 0, 1)
    else:
        risk = (
            0.25 * (1 - helmet_used) +
            0.3 * alcohol_detected +
            0.1 * (speed > 80) +
            0.1 * road_surface_risk[road_surface] +
            0.1 * light_condition_risk[light_condition] +
            0.05 * weather_condition_risk[weather_condition] +
            0.05 * road_type_risk[road_type] +
            0.05 * (1 - brake_condition) +

            # Nonlinear additions
            0.15 * alcohol_detected * (speed / 120) +
            0.1 * ((speed / 120) ** 2) +
            0.1 * np.tanh(3 * (1 - brake_condition)) +
            0.1 * (1 - helmet_used) * alcohol_detected +
            0.05 * (1 - helmet_used) * (speed > 100).astype(float) +
            0.05 * (1 - brake_condition) * weather_condition_risk[weather_condition] +

            # Age risk addition
            age_risk
        )
        risk = np.clip(risk, 0, 1)

    rows.append({
        "rider_age": rider_age,
        "rider_experience_years": rider_experience_years,
        "speed": speed,
        "helmet_used": helmet_used,
        "alcohol_detected": alcohol_detected,
        "recent_violations_count": recent_violations_count,
        "road_surface": road_surface,
        "light_condition": light_condition,
        "brake_condition": brake_condition,
        "weather_condition": weather_condition,
        "road_type": road_type,
        "risk_factor": risk
    })

df = pd.DataFrame(rows)


100%|██████████| 10000/10000 [00:00<00:00, 13200.47it/s]


In [58]:
# # Percentage of rows to add noise to (e.g., 20%)
# noise_fraction = 0.1

# # Filter indices where brake_condition is NOT 0 or 1
# eligible_indices = df[(df['brake_condition'] <= 0.3) & (df['brake_condition'] != 1)].index

# # Number of noisy samples from eligible rows
# n_noisy = int(noise_fraction * len(eligible_indices))

# # Randomly select indices from eligible rows to add noise
# noisy_indices = np.random.choice(eligible_indices, size=n_noisy, replace=False)

# # Generate noise for selected rows
# noise = np.random.normal(0, 0.3, size=n_noisy)

# # Add noise to risk_factor only for selected indices, clip between 0 and 1
# df.loc[noisy_indices, 'risk_factor'] = np.clip(df.loc[noisy_indices, 'risk_factor'] + noise, 0, 1)



In [59]:
df

Unnamed: 0,rider_age,rider_experience_years,speed,helmet_used,alcohol_detected,recent_violations_count,road_surface,light_condition,brake_condition,weather_condition,road_type,risk_factor
0,60,19,51.398327,1,0,0,Mud,Dusk,0.151612,Windy,Highway,0.725473
1,51,18,39.634335,1,0,0,Mud,Dawn,0.711764,Windy,Urban,0.214512
2,79,4,38.527286,1,0,1,Wet,Dawn,0.015046,Snowy,Rural,0.869034
3,49,33,59.817724,1,0,1,Mud,Daylight,0.190475,Snowy,Suburban,0.709622
4,56,21,34.131623,1,0,0,Gravel,Dusk,0.822539,Foggy,Highway,0.190120
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,31,3,35.644515,1,0,0,Dry,Daylight,0.275192,Rainy,Suburban,0.736049
9996,78,37,40.162629,1,1,0,Dry,Dawn,0.659828,Foggy,Suburban,0.669827
9997,67,22,66.252962,1,0,0,Dry,Dusk,0.784267,Snowy,Rural,0.303808
9998,46,34,48.510352,0,0,0,Dry,Dusk,0.219151,Windy,Highway,0.816046


In [60]:
# Filter where brake_condition is less than 0.1
df_filtered = df[df['brake_condition'] < 0.1]
df_filtered


Unnamed: 0,rider_age,rider_experience_years,speed,helmet_used,alcohol_detected,recent_violations_count,road_surface,light_condition,brake_condition,weather_condition,road_type,risk_factor
2,79,4,38.527286,1,0,1,Wet,Dawn,0.015046,Snowy,Rural,0.869034
8,13,29,60.280484,1,0,1,Wet,Dusk,0.076314,Foggy,Urban,0.866063
9,32,14,45.148071,0,0,0,Mud,Daylight,0.014964,Clear,Rural,0.925539
12,33,31,59.426356,1,1,0,Dry,Daylight,0.015253,Snowy,Rural,0.889730
43,24,33,78.036625,1,0,2,Gravel,Dusk,0.058855,Snowy,Highway,0.912558
...,...,...,...,...,...,...,...,...,...,...,...,...
9973,67,0,52.377233,1,0,0,Dry,Daylight,0.095983,Rainy,Suburban,0.853555
9978,32,11,46.571572,1,0,2,Wet,Daylight,0.045227,Clear,Rural,0.897391
9986,53,8,45.688531,1,1,4,Dry,Dawn,0.077453,Rainy,Urban,0.897747
9992,64,13,37.046689,1,0,0,Wet,Dusk,0.019451,Windy,Highway,0.916697


In [61]:
df.to_excel('motorcycle_risk_factor.xlsx',index=False)