# Generate sample monitoring data

In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 10000

# Generate model_version
model_version = np.random.choice(["v1", "v2", "v3"], n_samples)

# Generate outcome
outcome = np.random.binomial(1, 0.075, n_samples)

# Generate pred_prob
mean, std_dev = 0.075, 0.05
pred_prob = np.random.normal(mean, std_dev, n_samples)
pred_prob = np.clip(pred_prob, 0, 1)  # Clip values to be between 0 and 1
pred_prob[outcome == 1] += np.random.normal(0, 0.01, sum(outcome == 1))  # Add noise to pred_prob based on outcome

# Generate sex
sex = np.random.choice(["Male", "Female"], n_samples)

# Generate age
age = np.random.normal(65, 10, n_samples).astype(int)
age = np.clip(age, 40, 90)  # Clip values to be between 40 and 90

# Generate date
start_date = pd.Timestamp("2022-07-01")
end_date = pd.Timestamp("2023-04-30")
date_range = (end_date - start_date).days
date = [start_date + pd.Timedelta(days=np.random.randint(0, date_range)) for _ in range(n_samples)]

# Generate Run ID
run_id = np.random.choice(["Run 1", "Run 2", "Run 3", "Run 4", "Run 5"], n_samples)

# Generate site
site = np.random.choice(["NSEC", "WANS", "NHTC", "HEXH"], n_samples)

# Generate op_type
op_type = np.random.choice(["knee", "hip"], n_samples)



# Create a DataFrame with generated data
data = pd.DataFrame({"model_version": model_version,
                     "outcome": outcome,
                     "pred_prob": pred_prob,
                     "sex": sex,
                     "age": age,
                     "date": date,
                     "run_id": run_id,
                     "site": site,
                     "op_type": op_type})

# Generate SHAP_ASA_Grade
shap_asa_grade = np.random.normal(0.1, 0.02, n_samples)

# Generate SHAP_Creatinine
shap_creatinine = np.random.normal(0.08, 0.05, n_samples)

# Generate SHAP_Haemoglobin
shap_haemoglobin = np.random.normal(0.05, 0.03, n_samples)

# Generate SHAP_Hypertension
shap_hypertension = np.random.normal(0.045, 0.07, n_samples)

# Generate SHAP_COPD
shap_copd = np.random.normal(0.045, 0.07, n_samples)

# Generate SHAP_Age
shap_age = np.random.normal(0.1, 0.07, n_samples)

# Add the generated columns to the DataFrame
data["SHAP_ASA_Grade"] = shap_asa_grade
data["SHAP_Creatinine"] = shap_creatinine
data["SHAP_Haemoglobin"] = shap_haemoglobin
data["SHAP_Hypertension"] = shap_hypertension
data["SHAP_COPD"] = shap_copd
data["SHAP_Age"] = shap_age

# Show the first few rows of the generated data set
print(data.head())


# Show the first few rows of the generated data set
print(data.head())


  model_version  outcome  pred_prob     sex  age       date run_id  site  \
0            v3        0   0.080247  Female   53 2022-12-12  Run 1  NHTC   
1            v1        0   0.108232  Female   63 2022-07-07  Run 5  NSEC   
2            v3        0   0.199765  Female   47 2022-07-01  Run 1  NHTC   
3            v3        0   0.076624    Male   61 2022-09-26  Run 3  NSEC   
4            v1        0   0.018591  Female   80 2022-12-27  Run 5  HEXH   

  op_type  SHAP_ASA_Grade  SHAP_Creatinine  SHAP_Haemoglobin  \
0    knee        0.097194         0.162300          0.020951   
1    knee        0.117136         0.047972          0.057473   
2    knee        0.069466         0.012067          0.007385   
3    knee        0.136085         0.224544          0.121656   
4     hip        0.120149         0.130355          0.024133   

   SHAP_Hypertension  SHAP_COPD  SHAP_Age  
0           0.057995   0.020616  0.018579  
1          -0.002112  -0.083730  0.160015  
2           0.170075   0.0

In [3]:
# Save to csv
data.to_csv("sample_data_0428.csv", index=False)