# **Target Trial Emulation in Python**

## A. Setup

### I. Library and Paths Setup

In [2]:
import pandas as pd
import os
import numpy as np

import sys
cwd = os.getcwd()
if cwd not in sys.path:
    sys.path.append(cwd)

from trial_emulation import TrialSequence

CSV_PATH = './csv-files/'
PP_PATH = './models/PP/'
ITT_PATH = './models/ITT/'
SCRIPT_PATH = os.getcwd()

# Set pandas display options to prevent wrapping
pd.set_option('display.width', 1000)  # Adjust width to fit your terminal
pd.set_option('display.max_columns', None)  # Show all columns

ModuleNotFoundError: No module named 'pandas'

### II. Specify estimands

In [None]:
trial_pp = TrialSequence(None, "PP")
trial_itt = TrialSequence(None, "ITT")

### III. Create directories

In [None]:
trial_pp_dir = os.path.join(os.getcwd(), "trial_pp")
os.makedirs(trial_pp_dir, exist_ok=True)

trial_itt_dir = os.path.join(os.getcwd(), "trial_itt")
os.makedirs(trial_itt_dir, exist_ok=True)

## B. Data Preparation

### I. Load the data

In [None]:
file_path = os.path.join(CSV_PATH, "data_censored.csv")

# Read the CSV file into a DataFrame
try:
    data_censored = pd.read_csv(file_path)
    print("Data loaded successfully!")
    print(data_censored.head())  # Display the first few rows
except FileNotFoundError:
    print(f"File not found at {file_path}")

Data loaded successfully!
   id  period  treatment  x1        x2  x3        x4  age     age_s  outcome  censored  eligible
0   1       0          1   1  1.146148   0  0.734203   36  0.083333        0         0         1
1   1       1          1   1  0.002200   0  0.734203   37  0.166667        0         0         0
2   1       2          1   0 -0.481762   0  0.734203   38  0.250000        0         0         0
3   1       3          1   0  0.007872   0  0.734203   39  0.333333        0         0         0
4   1       4          1   1  0.216054   0  0.734203   40  0.416667        0         0         0


### II. Set the data

In [None]:
trial_pp.set_data(data_censored)
trial_itt.set_data(data_censored)

In [None]:
trial_itt

TrialSequence(estimand=ITT, data_shape=(725, 12))

## 3. Weight models and censoring

### 3.1 Censoring due to treatment switching

In [None]:
# Set switch weight model
trial_pp = trial_pp.set_switch_weight_model(
    numerator="~ age",
    denominator="~ age + x1 + x3",
    model_fitter=stats_glm_logit(PP_PATH + "switch_models")  # Pass the fitter function
)

# Access switch weights
print(trial_pp.display_switch_weights())

NameError: name 'stats_glm_logit' is not defined

### 3.2 Other informative censoring

In [None]:
# Set censor weight model
trial_pp = trial_pp.set_censor_weight_model(
    censor_event="censored",
    numerator="~ x2",
    denominator="~ x2 + x1",
    pool_models="none",
    model_fitter= stats_glm_logit(save_path = PP_PATH + "switch_models")
)

# Display censor weights
print(trial_pp.display_censor_weights())

Model details saved to ./models/PP/switch_models\model_details.txt

Censor weight model set with censor event: censored, numerator: ~ x2, denominator: ~ x2 + x1
##  - Numerator formula: 1 - censored ~ x2 
##  - Denominator formula: 1 - censored ~ x2 + x1 
##  - Model fitter type: te_stats_glm_logit 
##  - Weight models not fitted. Use calculate_weights()


In [None]:
# Set censor weight model
trial_itt = trial_itt.set_censor_weight_model(
    censor_event="censored",
    numerator="~ x2",
    denominator="~ x2 + x1",
    pool_models="numerator",
     model_fitter= stats_glm_logit(save_path = ITT_PATH + "switch_models")
)

# Display censor weights
print(trial_itt.display_censor_weights())

Model details saved to ./models/ITT/switch_models\model_details.txt

Censor weight model set with censor event: censored, numerator: ~ x2, denominator: ~ x2 + x1
##  - Numerator formula: 1 - censored ~ x2 
##  - Denominator formula: 1 - censored ~ x2 + x1 
##  - Model fitter type: te_stats_glm_logit 
##  - Weight models not fitted. Use calculate_weights()


## 4. Calculate weights

In [None]:
# Calculate weights for Per-protocol trial
trial_pp = trial_pp.calculate_weights()

# Calculate weights for ITT trial
trial_itt = trial_itt.calculate_weights()


Model details saved to ./models/PP/switch_models\model_details.txt

Model details saved to ./models/PP/switch_models\model_details.txt

Weights calculated for trial: Per-protocol
Model details saved to ./models/ITT/switch_models\model_details.txt

Weights calculated for trial: ITT


In [None]:
trial_itt.show_weight_models()

## Weight Models for Informative Censoring
## ---------------------------------------

## Switch Weight Model not set.

## [Censor Weight Model]
Model: P(censor_event = 0 | X, previous treatment) for denominator

## [[n]]
Model: P(censor_event = 0 | X) for n

  term          estimate     std.error   statistic   p.value
  (Intercept)   1.8941961    0.2071122   9.145746   5.921948e-20
  x2           -0.5898292    0.1693402   -3.483101  4.956409e-04
  x1            0.8552603    0.3452930    2.476912  1.325247e-02

  null.deviance df.null logLik    AIC      BIC      deviance df.residual nobs
  283.0723      425     -132.1655 270.3309 282.4943 264.3309 423         426

  path: ./models/ITT/switch_models\model_details.txt

## [[d0]]
Model: P(censor_event = 0 | X) for d0

  term          estimate     std.error   statistic   p.value
  (Intercept)   1.8941961    0.2071122   9.145746   5.921948e-20
  x2           -0.5898292    0.1693402   -3.483101  4.956409e-04
  x1            0.8552603    0.34

In [None]:
trial_pp.show_weight_models()

## Weight Models for Informative Censoring
## ---------------------------------------

## [Switch Model]
Model: P(switch_event = 0 | X) for numerator

  term          estimate     std.error   statistic   p.value
  (Intercept)   2.4480907    0.1405726   17.415128   6.334656e-68
  x2           -0.4486482    0.1368765   -3.277759   1.046346e-03

  null.deviance df.null logLik    AIC      BIC      deviance df.residual nobs
  404.2156      724     -196.7002 397.4004 406.5727 393.4004 723         725

  path: ./models/PP/switch_models\model_details.txt

## [Censor Weight Model]
Model: P(censor_event = 0 | X, previous treatment) for denominator

## [[n]]
Model: P(censor_event = 0 | X) for n

  term          estimate     std.error   statistic   p.value
  (Intercept)   1.8941961    0.2071122   9.145746   5.921948e-20
  x2           -0.5898292    0.1693402   -3.483101  4.956409e-04
  x1            0.8552603    0.3452930    2.476912  1.325247e-02

  null.deviance df.null logLik    AIC      BIC   