In [9]:
import numpy as np
import pandas as pd

# Set seed for reproducibility
np.random.seed(42)

# Define number of records
n = 1000

#  demographic data
patient_id = np.arange(1, n + 1)
age = np.random.normal(loc=65, scale=15, size=n).astype(int)

# clinical factors
admission_type = np.random.choice(['Emergency', 'Urgent', 'Elective'], size=n)  # Type of admission
diagnosis = np.random.choice(['Heart Failure', 'Pneumonia', 'Heart Attack', 'Other'], size=n)  # Primary diagnosis
num_lab_procedures = np.random.randint(1, 126, size=n)  # Number of lab procedures
num_medications = np.random.randint(1, 81, size=n)  # Number of medications
num_outpatient_visits = np.random.randint(0, 36, size=n)  # Number of outpatient visits
num_inpatient_visits = np.random.randint(0, 21, size=n)  # Number of inpatient visits
num_emergency_visits = np.random.randint(0, 64, size=n)  # Number of emergency visits
num_diagnoses = np.random.randint(1, 10, size=n)  # Number of diagnoses
a1c_result = np.random.uniform(5.0, 10.0, size=n)  # A1C Result (for diabetes monitoring)

# Environmental factors
pollution_index = np.random.normal(loc=100, scale=20, size=n)  # Air quality index
temperature = np.random.normal(loc=22, scale=5, size=n)  # Average temperature in Celsius
num_readmissions = np.random.randint(0, 6, size=n)




# Calculate predicted readmission considering all factors, including the number of readmissions
predicted_readmission = (
    0.03 * age +  # Age factor
    0.2 * num_diagnoses +  # Number of diagnoses factor
    0.1 * num_medications +  # Number of medications factor
    0.05 * num_inpatient_visits +  # Previous inpatient visits factor
    0.1 * num_lab_procedures +  # Number of lab procedures
    0.05 * num_outpatient_visits +  # Number of outpatient visits
    0.2 * num_emergency_visits +  # Emergency visits
    0.5 * (pollution_index > 120) +  # Pollution index factor
    0.02 * temperature +  # Temperature factor
    0.3 * (admission_type == 'Emergency') +  # Emergency admission type
    0.1 * (diagnosis == 'Heart Failure') +  # Diagnosis factor
    0.02 * a1c_result +  # A1C Result factor (higher value may indicate diabetes-related complications)
    0.1 * num_readmissions +  # Include number of previous readmissions (more readmissions means higher risk)
    np.random.normal(0, 0.1, n)  # Random noise to simulate real-world variability
).astype(float)


# Normalize predicted readmission between 0 and 1
predicted_readmission = (predicted_readmission - predicted_readmission.min()) / (predicted_readmission.max() - predicted_readmission.min())

threshold = 0.5
readmitted = (predicted_readmission > threshold).astype(int)

# Create DataFrame with all calculated features
data = pd.DataFrame({
    'patient_id': patient_id,
    'age': age,
    'admission_type': admission_type,
    'diagnosis': diagnosis,
    'num_lab_procedures': num_lab_procedures,
    'num_medications': num_medications,
    'num_outpatient_visits': num_outpatient_visits,
    'num_inpatient_visits': num_inpatient_visits,
    'num_emergency_visits': num_emergency_visits,
    'num_diagnoses': num_diagnoses,
    'num_readmissions': num_readmissions  ,
    'a1c_result': a1c_result,
    'pollution_index': pollution_index,
    'temperature': temperature,
    'predicted_readmission': predicted_readmission,
    'readmitted': readmitted
})

data.head()


Unnamed: 0,patient_id,age,admission_type,diagnosis,num_lab_procedures,num_medications,num_outpatient_visits,num_inpatient_visits,num_emergency_visits,num_diagnoses,num_readmissions,a1c_result,pollution_index,temperature,predicted_readmission,readmitted
0,1,72,Elective,Heart Failure,19,29,9,17,50,1,0,7.988066,103.820725,25.949289,0.383887,0
1,2,62,Emergency,Pneumonia,64,6,24,3,2,3,5,9.047137,87.472694,25.295056,0.162004,0
2,3,74,Elective,Pneumonia,91,49,7,7,60,2,1,9.000763,101.105213,20.100548,0.744007,1
3,4,87,Emergency,Pneumonia,117,40,10,14,11,8,3,9.660645,112.221747,29.99161,0.562963,1
4,5,61,Elective,Heart Failure,98,65,4,2,28,7,1,8.851144,109.169529,25.759937,0.619632,1


In [10]:
data.to_csv('patient_readmission_data.csv', index=False)
from google.colab import files
files.download('/content/patient_readmission_data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>