# Health Services Analysis

In [14]:
import numpy as np
import pandas as pd

In [15]:
# Set random seed for reproducibility
np.random.seed(42)

# Number of records
n_records = 5000

In [16]:
# Generate synthetic data
data = {
    'patient_id': np.arange(1, n_records + 1),
    'age': np.random.randint(18, 90, size=n_records),
    'gender': np.random.choice(['Male', 'Female'], size=n_records),
    'procedure_type': np.random.choice(['Type A', 'Type B', 'Type C'], size=n_records),
    'priority_level': np.random.choice(['High', 'Medium', 'Low'], size=n_records),
    'waiting_time_days': np.random.normal(loc=60, scale=15, size=n_records).astype(int),
    'scheduled_delay_days': np.random.normal(loc=10, scale=5, size=n_records).astype(int),
    'unplanned_emergency': np.random.choice([0, 1], size=n_records, p=[0.8, 0.2]),
    'outcome': np.random.choice(['Successful', 'Complication', 'Death'], size=n_records, p=[0.85, 0.1, 0.05])
}


In [17]:
# Create DataFrame
df = pd.DataFrame(data)
print(df.to_csv())

,patient_id,age,gender,procedure_type,priority_level,waiting_time_days,scheduled_delay_days,unplanned_emergency,outcome
0,1,69,Male,Type C,Low,53,16,0,Death
1,2,32,Female,Type C,Medium,37,-2,0,Successful
2,3,89,Male,Type B,Low,25,6,0,Successful
3,4,78,Male,Type A,Low,43,7,0,Successful
4,5,38,Male,Type A,High,36,-4,0,Successful
5,6,41,Male,Type B,High,65,3,0,Successful
6,7,20,Female,Type C,Medium,52,13,0,Successful
7,8,39,Female,Type A,Medium,51,10,0,Successful
8,9,70,Male,Type B,Medium,45,17,0,Successful
9,10,19,Male,Type B,Medium,11,17,1,Complication
10,11,47,Female,Type A,Low,82,12,0,Successful
11,12,55,Female,Type A,Medium,44,11,0,Successful
12,13,19,Female,Type C,Low,53,10,0,Successful
13,14,81,Male,Type C,High,38,-3,0,Successful
14,15,77,Female,Type A,Medium,74,3,0,Successful
15,16,38,Male,Type C,High,62,10,1,Successful
16,17,50,Male,Type B,High,47,13,0,Successful
17,18,75,Female,Type C,Low,54,11,0,Successful
18,19,39,Male,Type C,Medium,85,5,0,Successful
19,20,

In [18]:
# Ensure no negative waiting times or delays
df['waiting_time_days'] = df['waiting_time_days'].apply(lambda x: max(0, x))
df['scheduled_delay_days'] = df['scheduled_delay_days'].apply(lambda x: max(0, x))

In [19]:
# Add a column to indicate if surgery was within recommended time (e.g., 90 days)
df['within_recommended_time'] = df['waiting_time_days'] <= 90

In [20]:
# Calculate risk factors for unplanned emergency and death
df['risk_unplanned_emergency'] = df.apply(
    lambda row: 0.1 if row['waiting_time_days'] > 90 else 0.05, axis=1
)
df['risk_death'] = df.apply(
    lambda row: 0.15 if row['waiting_time_days'] > 90 else 0.05, axis=1
)

In [21]:
# Save the data to a CSV file for use in analysis
df.to_csv('synthetic_health_services_data.csv', index=False)
print("Data saved to 'synthetic_health_services_data.csv' for use in analysis.")

Data saved to 'synthetic_health_services_data.csv' for use in analysis.
