In [8]:
import pandas as pd
import numpy as np

# Set the random seed for reproducibility
np.random.seed(0)

size = 1000
# Generate synthetic data
data = {
    "Heart_Rate_(BPM)": np.random.randint(60, 100, size=size),
    "HRV_(ms)": np.random.uniform(20, 100, size=size),
    "Systolic_BP_(mmHg)": np.random.randint(110, 140, size=size),
    "Diastolic_BP_(mmHg)": np.random.randint(70, 90, size=size),
    "Respiration_Rate_(Breaths_per_Minute)": np.random.randint(12, 20, size=size),
    "Blood_Oxygen_Level_(SpO2)": np.random.uniform(95, 100, size=size),
    "Ambient_Temperature_(C)": np.random.uniform(15, 30, size=size),
    "Ambient_Noise_Level_(dB)": np.random.uniform(30, 80, size=size),
    "Time_of_Day": np.random.choice(["Morning", "Afternoon", "Evening", "Night"], size=size),
    "Previous_Activity_Level_(Steps)": np.random.randint(0, 10000, size=size),
    "Age": np.random.randint(18, 80, size=size),
}

# Assuming a simple criteria for 'fit to drive/ride' as an example: 
# - Heart Rate between 60 and 90 BPM
# - Systolic BP < 130 mmHg
# - Diastolic BP < 85 mmHg
# - Blood Oxygen Level > 97%
# The criteria can be adjusted based on more precise health guidelines
data["Is_Fit"] = np.where(
    (data["Heart_Rate_(BPM)"] <= 90) &
    (data["Systolic_BP_(mmHg)"] < 130) &
    (data["Diastolic_BP_(mmHg)"] < 85) &
    (data["Blood_Oxygen_Level_(SpO2)"] > 97),
    1, 0
)


df = pd.DataFrame(data)

In [9]:
# Introduce 'dirty' data to 5% of the dataset
dirty_indices = np.random.choice(df.index, size=int(0.1 * len(df)), replace=False)

# Introduce some inconsistencies/errors in the data
# For simplicity, we'll randomly adjust heart rate and blood pressure by significant amounts,
# and set some Blood Oxygen Levels and HRV to unrealistic values
np.random.seed(24)  # Ensure reproducibility for dirty data
df.loc[dirty_indices, "Heart_Rate_(BPM)"] = np.random.randint(50, 200, size=len(dirty_indices))
df.loc[dirty_indices, "Systolic_BP_(mmHg)"] = np.random.randint(90, 200, size=len(dirty_indices))
df.loc[dirty_indices, "Diastolic_BP_(mmHg)"] = np.random.randint(60, 150, size=len(dirty_indices))
df.loc[dirty_indices, "Blood_Oxygen_Level_(SpO2)"] = np.random.uniform(80, 100, size=len(dirty_indices))
df.loc[dirty_indices, "HRV_(ms)"] = np.random.uniform(10, 200, size=len(dirty_indices))

# Recalculate the 'Is_Fit' column to reflect these changes
data["Is_Fit"] = np.where(
    (data["Heart_Rate_(BPM)"] <= 90) &
    (data["Systolic_BP_(mmHg)"] < 130) &
    (data["Diastolic_BP_(mmHg)"] < 85) &
    (data["Blood_Oxygen_Level_(SpO2)"] > 97),
    1, 0
)

# save the dataframes to csv files
df.to_csv('data_is_fit.csv', index=False)