In [3]:
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples to generate
num_samples = 10000

# Generate random data for the dataset
data = {
    'age': np.random.randint(1, 90, size=num_samples),
    'sex': np.random.randint(0, 2, size=num_samples),
    'heart_rate': np.random.uniform(60, 180, size=num_samples),
    'systolic_bp': np.random.uniform(90, 180, size=num_samples),
    'diastolic_bp': np.random.uniform(60, 120, size=num_samples),
    'blood_oxygen_level': np.random.uniform(85, 100, size=num_samples),
    'seizure_history': np.random.randint(0, 2, size=num_samples),
}

# Generate the target variable (epileptic_seizure)
# More realistic logic for seizure prediction:
# If heart rate > 100, systolic BP > 120, or blood oxygen < 92%, the risk is higher.
data['epileptic_seizure'] = np.where(
    (
        (data['heart_rate'] > 100) | 
        (data['systolic_bp'] > 120) | 
        (data['blood_oxygen_level'] < 92)
    ) & (data['seizure_history'] == 1), 1, 0
)

# Create DataFrame
df = pd.DataFrame(data)

# Save the dataset to a CSV file
df.to_csv('epilepsy_prediction_dataset.csv', index=False)

print("Dataset saved as 'epilepsy_prediction_dataset.csv'")


Dataset saved as 'epilepsy_prediction_dataset.csv'
