In [1]:
import pandas as pd
import numpy as np

# Load registry
df = pd.read_csv('../data/ra_registry.csv')

# Simulate 3 visits per patient
visit_days = [0, 30, 60]  # Day 0, Day 30, Day 60

# Create CRF-like structure
crf_data = []

for _, row in df.iterrows():
    for day in visit_days:
        crf_entry = {
            'Patient_ID': row['Patient_ID'],
            'Visit_Day': day,
            'RA_Severity_Score': np.clip(row['RA_Severity_Score'] + np.random.randint(-1, 2), 0, 10),
            'Symptom_Notes': np.random.choice(['Stable', 'Improved', 'Worsened']),
            'Drug_Dosage_mg': np.random.choice([10, 20, 30]),
            'Consent_Status': row['Consent_Status']
        }
        crf_data.append(crf_entry)

crf_df = pd.DataFrame(crf_data)

# Preview
print("🧾 Simulated CRF Entries:")
print(crf_df.head())

# Save to CSV (optional)
crf_df.to_csv('../data/crf_simulated.csv', index=False)


🧾 Simulated CRF Entries:
  Patient_ID  Visit_Day  RA_Severity_Score Symptom_Notes  Drug_Dosage_mg  \
0      RA001          0                  6      Improved              30   
1      RA001         30                  7        Stable              20   
2      RA001         60                  8      Improved              10   
3      RA002          0                  4        Stable              20   
4      RA002         30                  5      Improved              10   

  Consent_Status  
0            Yes  
1            Yes  
2            Yes  
3            Yes  
4            Yes  


**Ethics Note:**  
This CRF simulation is based on mock registry data and does not represent real patient records. Visit logic and symptom tracking are randomized for educational purposes. Consent status is retained to reflect GCP-compliant trial structure.