In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import numpy as np
import pandas as pd
from simulator import LungCancerProgressionGenerator

# Generate datasets for both scenarios
generator = LungCancerProgressionGenerator(num_patients=10000, doubling_time=500)
df_without_test = generator.generate_dataset(use_blood_test=False)
df_with_test = generator.generate_dataset(use_blood_test=True)

In [3]:
import pandas as pd

def prepare_data_for_multistate(df, blood_test=False):
    # Ensure the data is sorted by sample_id and time_entry_to_origin
    df_sorted = df.sort_values(['sample_id', 'time_entry_to_origin'])
    
    # Create a DataFrame for transitions
    transitions = []
    for _, group in df_sorted.groupby('sample_id'):
        for i in range(len(group) - 1):
            transitions.append({
                'id': group['sample_id'].iloc[i],
                'from': group['origin_state'].iloc[i],
                'to': group['target_state'].iloc[i+1],
                'entry_time': group['time_entry_to_origin'].iloc[i],
                'exit_time': group['time_entry_to_origin'].iloc[i+1],
                'age_at_diagnosis': group['age_at_diagnosis'].iloc[i]
            })
    
    transitions_df = pd.DataFrame(transitions)
    
    # Add a column for blood test group (1 for with test, 0 for without)
    transitions_df['blood_test'] = 1 if blood_test else 0
    
    return transitions_df

# Prepare data for both groups
transitions_without_test = prepare_data_for_multistate(df_without_test, blood_test=False)
transitions_with_test = prepare_data_for_multistate(df_with_test, blood_test=True)

# Combine the datasets
all_transitions = pd.concat([transitions_without_test, transitions_with_test])

# Export to CSV
all_transitions.to_csv('multistate_data.csv', index=False)

print("Data exported to multistate_data.csv")

Data exported to multistate_data.csv


In [4]:
all_transitions

Unnamed: 0,id,from,to,entry_time,exit_time,age_at_diagnosis,blood_test
0,28,0,1,145,146,,0
1,28,1,1,146,147,,0
2,28,1,1,147,148,,0
3,28,1,1,148,149,,0
4,28,1,1,149,150,,0
...,...,...,...,...,...,...,...
6545,9998,1,1,174,175,64.0,1
6546,9998,1,1,175,176,64.0,1
6547,9998,1,1,176,177,64.0,1
6548,9998,1,1,177,178,64.0,1
