# NIHCXR Clinical Drift

## Load Libraries

In [None]:
from cyclops.datasets.slicing import SlicingConfig
from cyclops.monitor import ClinicalShiftApplicator, Detector, Reductor, TSTester
from cyclops.monitor.utils import apply_transforms

## Query Data

In [None]:
import os
from functools import partial

import numpy as np
import pandas as pd
import torch
from datasets import Dataset, Image
from monai.transforms import AddChanneld, Compose, Lambdad, Resized, ToDeviced
from torchxrayvision.models import DenseNet

from cyclops.monitor.utils import nihcxr_preprocess

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
transforms = Compose(
    [
        AddChanneld(keys=("features",), allow_missing_keys=True),
        Resized(
            keys=("features",), spatial_size=(1, 224, 224), allow_missing_keys=True
        ),
        Lambdad(
            keys=("features",),
            func=lambda x: ((2 * (x / 255.0)) - 1.0) * 1024,
            allow_missing_keys=True,
        ),
        ToDeviced(keys=("features",), device=device, allow_missing_keys=True),
    ],
)


nihcxr_dir = "/home/akore/NIHCXR"
df = pd.read_csv(os.path.join(nihcxr_dir, "Data_Entry_2017.csv"))
df = nihcxr_preprocess(df, nihcxr_dir)
nih_ds = Dataset.from_pandas(df, preserve_index=False)
nih_ds = nih_ds.cast_column("features", Image(decode=False))

nih_ds = nih_ds.select(np.random.choice(nih_ds.shape[0], 5000, replace=False))

## Split Source/Target Datasets

In [None]:
source_slice = SlicingConfig(feature_values=[{"Patient Gender": {"value": "M"}}])
target_slice = SlicingConfig(feature_values=[{"Patient Gender": {"value": "F"}}])
shifter = ClinicalShiftApplicator("custom", source=source_slice, target=target_slice)

source_ds, target_ds = shifter.apply_shift(nih_ds, num_proc=6)

In [None]:
target_ds = target_ds.add_column(
    "timestamp",
    pd.date_range(start="1/1/2019", end="12/25/2019", periods=target_ds.num_rows),
)

In [None]:
source_ds = source_ds.with_transform(
    partial(apply_transforms, transforms=transforms),
    columns=["features"],
    output_all_columns=True,
)
source_ds = source_ds.cast_column("features", Image(decode=True))

target_ds = target_ds.with_transform(
    partial(apply_transforms, transforms=transforms),
    columns=["features"],
    output_all_columns=True,
)
target_ds = target_ds.cast_column("features", Image(decode=True))

## Initalize Detector (Reductor+Tester) and Run Sensitivity Test

In [None]:
model = DenseNet(weights="densenet121-res224-all")

reductor = Reductor(dr_method="bbse-soft", model=model, device="cuda")

tester = TSTester(
    tester_method="mmd",
)

detector = Detector(
    "sensitivity_test",
    reductor=reductor,
    tester=tester,
    device="cuda",
    source_sample_size=100,
    target_sample_size=[10, 50, 100],
    num_runs=5,
)

detector.detect_shift(source_ds, target_ds)

## Initalize Detector (Reductor+Tester) and Run Rolling Window Experiment

In [None]:
model = DenseNet(weights="densenet121-res224-all")

reductor = Reductor(dr_method="bbse-soft", model=model, device="cuda")

tester = TSTester(
    tester_method="mmd",
)

detector = Detector(
    "rolling_window_drift",
    reductor=reductor,
    tester=tester,
    device="cuda",
    source_sample_size=1000,
    target_sample_size=50,
    timestamp_column="timestamp",
    window_size="1M",
)

detector.detect_shift(source_ds, target_ds)