In [1]:
import pandas as pd
import numpy as np


In [12]:
# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1500

# Generate synthetic data
data = {
    'Age': np.random.randint(30, 90, n_samples),  # Age between 30 and 90
    'Gender': np.random.choice([1, 0], n_samples),  # Male=1 or Female=0
    'Weight': np.random.randint(48, 96, n_samples),  # Weight between 50 and 120 kg
    'Height': np.random.randint(140, 180, n_samples),  # Height between 150 and 200 cm
    'Chest Pain Type': np.random.choice([0, 1, 2, 3], n_samples),  # Chest Pain Type 'None'=0, 'Mild'=1, 'Moderate'=2, 'Severe'=3
    'Heart Rate': np.random.randint(60, 130, n_samples),  # Heart Rate between 60 and 150 bpm
    'Systolic Blood Pressure': np.random.randint(100, 160, n_samples),  # Systolic BP between 90 and 180 mmHg
    'Diastolic Blood Pressure': np.random.randint(50, 120, n_samples),  # Diastolic BP between 60 and 120 mmHg
    'Smoking': np.random.choice([0, 1], n_samples),  # 0 = Non-Smoker, 1 = Smoker
    'Number of Cygrets Per Day': np.random.randint(0, 40, n_samples),  # Cigarettes per day, 0 for non-smokers
    'Diabetes': np.random.choice([0, 1], n_samples),  # 0 = No, 1 = Yes
    'Anaemia': np.random.choice([0, 1], n_samples),  # 0 = No, 1 = Yes
    'Any Past Heart Failures': np.random.choice([0, 1], n_samples),  # 0 = No, 1 = Yes
    'SpO2 Level': np.random.randint(70, 100, n_samples),  # SpO2 Level between 85% and 100%
    'BPM': np.random.choice([0, 1, 2], n_samples), # 0=LOW, 1=Normal, 2=HIGH
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Define heart failure condition based on given criteria
conditions = (
    (df['Heart Rate'] > 85) & 
    (df['Chest Pain Type'] >= 1) &
    (df['Any Past Heart Failures'] == 1)
)

# Assign Heart Failure = 1 for rows meeting the condition, otherwise 0
df['Heart Failure'] = np.where(conditions, 1, 0)

# Ensure consistency: Non-smokers should have 'Number of Cygrets Per Day' set to 0
df.loc[df['Smoking'] == 0, 'Number of Cygrets Per Day'] = 0

# Display the first few rows of the dataset
print(df.head())

# Save to a CSV file
df.to_csv('synthetic_heart_failure_data_with_conditions_v3.csv', index=False)

print("Synthetic dataset created and saved as 'synthetic_heart_failure_data_with_conditions_v3.csv'")



   Age  Gender  Weight  Height  Chest Pain Type  Heart Rate  \
0   68       0      73     170                3         107   
1   81       0      74     173                2          98   
2   58       0      84     178                0          90   
3   44       0      51     142                1          69   
4   72       1      71     147                0          89   

   Systolic Blood Pressure  Diastolic Blood Pressure  Smoking  \
0                      124                        51        0   
1                      112                        79        0   
2                      120                       101        0   
3                      140                        58        1   
4                      109                        71        0   

   Number of Cygrets Per Day  Diabetes  Anaemia  Any Past Heart Failures  \
0                          0         0        1                        1   
1                          0         0        1                        1   
2 