# **Target Trial Emulation in Python**

## 1. Setup

### a. Library and Paths Setup

In [1]:
import pandas as pd
import os
import numpy as np

# Add working directory to path
import sys
cwd = os.getcwd()
if cwd not in sys.path:
    sys.path.append(cwd)

# Custom modules
from custom_modules.trialemulation import trial_sequence as ts
from custom_modules.trialemulation.model_fitting import stats_glm_logit
from custom_modules.trialemulation.te_weights import TeWeightsSpec


CSV_PATH = './csv_files/'
PP_PATH = './models/PP/'
ITT_PATH = './models/ITT/'
SCRIPT_PATH = os.getcwd()

# Set pandas display options to prevent wrapping
pd.set_option('display.width', 1000)  # Adjust width to fit your terminal
pd.set_option('display.max_columns', None)  # Show all columns

### b. Specify estimands

In [2]:
# Create instances
trial_pp = ts.TrialSequencePP()
trial_itt = ts.TrialSequenceITT()

### c. Create directories

In [3]:
trial_pp_dir = os.path.join(os.getcwd(), "trial_pp")
os.makedirs(trial_pp_dir, exist_ok=True)

trial_itt_dir = os.path.join(os.getcwd(), "trial_itt")
os.makedirs(trial_itt_dir, exist_ok=True)

## 2. Data Preparation

### a. Load the data

In [4]:
file_path = os.path.join(CSV_PATH, "data_censored.csv")

# Read the CSV file into a DataFrame
try:
    data_censored = pd.read_csv(file_path)
    print("Data loaded successfully!")
    print(data_censored.head())  # Display the first few rows
except FileNotFoundError:
    print(f"File not found at {file_path}")

Data loaded successfully!
   id  period  treatment  x1        x2  x3        x4  age     age_s  outcome  censored  eligible
0   1       0          1   1  1.146148   0  0.734203   36  0.083333        0         0         1
1   1       1          1   1  0.002200   0  0.734203   37  0.166667        0         0         0
2   1       2          1   0 -0.481762   0  0.734203   38  0.250000        0         0         0
3   1       3          1   0  0.007872   0  0.734203   39  0.333333        0         0         0
4   1       4          1   1  0.216054   0  0.734203   40  0.416667        0         0         0


### b. Set the data

In [5]:
trial_pp.set_data(data_censored)
trial_itt.set_data(data_censored)

In [6]:
trial_itt.show()

Trial Sequence Object
Estimand: Intention-to-treat

Data:
     id  period  treatment  x1        x2  x3        x4  age     age_s  outcome  censored  eligible  time_of_event  am_1  switch  regime_start  time_on_regime  cumA  eligible0  eligible1
0     1       0          1   1  1.146148   0  0.734203   36  0.083333        0         0         1           9999     0       1           0.0             0.0     1          1          0
1     1       1          1   1  0.002200   0  0.734203   37  0.166667        0         0         0           9999     1       0           0.0             1.0     2          0          1
2     1       2          1   0 -0.481762   0  0.734203   38  0.250000        0         0         0           9999     1       0           0.0             2.0     3          0          1
3     1       3          1   0  0.007872   0  0.734203   39  0.333333        0         0         0           9999     1       0           0.0             3.0     4          0          1
4     1     

## 3. Weight models and censoring

### a. Censoring due to treatment switching

In [7]:
# Set switch weight model
trial_pp = trial_pp.set_switch_weight_model(
    numerator="age",  
    denominator="age + x1 + x3",  
    model_fitter=stats_glm_logit(os.path.join(PP_PATH, "switch_models"))
)

trial_pp.show_switch_weights()

  - Numerator formula: treatment ~ age
  - Denominator formula: treatment ~ age + x1 + x3
  - Model fitter type: GLMLogitModelFitter
  - Weight models not fitted. Use `calculate_weights()`


### 3.2 Other informative censoring

In [8]:
# Set censor weight model
trial_pp = trial_pp.set_censor_weight_model(
    censor_event="censored",
    numerator="~ x2",
    denominator="~ x2 + x1",
    pool_models="none",
    model_fitter= stats_glm_logit(os.path.join(PP_PATH, "switch_models"))
)

# Display censor weights
trial_pp.show_censor_weights()

Outcome formula updated based on censor weights.
  - Numerator formula: 1 - censored ~  x2
  - Denominator formula: 1 - censored ~  x2 + x1
  - Neither model is pooled.
  - Model fitter type: GLMLogitModelFitter
  - Weight models not fitted. Use `calculate_weights()`


In [9]:
# Set censor weight model
trial_itt = trial_itt.set_censor_weight_model(
    censor_event="censored",
    numerator="~ x2",
    denominator="~ x2 + x1",
    pool_models="numerator",
    model_fitter= stats_glm_logit(save_path = ITT_PATH + "switch_models")
)

trial_itt.show_censor_weights()

Outcome formula updated based on censor weights.
  - Numerator formula: 1 - censored ~  x2
  - Denominator formula: 1 - censored ~  x2 + x1
  - Numerator model is pooled across treatment arms. Denominator model is not pooled.
  - Model fitter type: GLMLogitModelFitter
  - Weight models not fitted. Use `calculate_weights()`


## 4. Calculate weights

In [10]:
# Calculate weights for Per-protocol trial
trial_pp = trial_pp.calculate_weights()

# Calculate weights for ITT trial
trial_itt = trial_itt.calculate_weights()

Weights calculated successfully.




ValueError: Switch weight model fitter is missing. Did you call set_switch_weight_model()?

In [None]:
trial_itt.show_weight_models()

In [None]:
trial_pp.show_weight_models()

## 5. Specify Outcome Model

In [None]:
# General case
trial_pp.set_outcome_model()

# ITT trial with adjustment term
trial_itt.set_outcome_model(adjustment_terms="x2")