# NIHCXR Clinical Drift

## Load Libraries

In [1]:
from cyclops.monitor import (
    Detector,
    Experimenter,
    Reductor,
    ClinicalShiftApplicator,
    TSTester
)
from cyclops.datasets.slicing import SlicingConfig
from cyclops.monitor.plotter import plot_drift_samples_pval
from cyclops.monitor.utils import Loader

## Query Data

In [2]:
import os

import pandas as pd
from datasets import Dataset, Image
import numpy as np

from cyclops.monitor.utils import nihcxr_preprocess
import PIL
from torchxrayvision.models import DenseNet
from monai.transforms import AddChanneld, Compose, Lambdad, Resized, ToDeviced, EnsureChannelFirstd
from torchvision.transforms import PILToTensor
import torch
from typing import Dict, List, Optional, Tuple, Union
from functools import partial
from torchxrayvision.datasets import XRayCenterCrop, XRayResizer

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
transforms = Compose(
    [
        AddChanneld(keys=("features",), allow_missing_keys=True),
        Resized(keys=("features",), spatial_size=(1, 224, 224), allow_missing_keys=True),
        Lambdad(keys=("features",), func=lambda x: ((2 * (x / 255.0)) - 1.0) * 1024, allow_missing_keys=True),
        ToDeviced(keys=("features",), device=device, allow_missing_keys=True),
    ],
)


def apply_transforms(examples: Dict[str, List], transforms: callable) -> dict:
    """Apply transforms to examples."""

    # examples is a dict of lists; convert to list of dicts.
    # doing a conversion from PIL to tensor is necessary here when working
    # with the Image feature type.
    value_len = len(list(examples.values())[0])
    examples = [
        {
            k: PILToTensor()(v[i]) if isinstance(v[i], PIL.Image.Image) else v[i]
            for k, v in examples.items()
        }
        for i in range(value_len)
    ]

    # apply the transforms to each example
    examples = [transforms(example) for example in examples]

    # convert back to a dict of lists
    examples = {k: [d[k] for d in examples] for k in examples[0]}

    return examples

nihcxr_dir = "/home/akore/NIHCXR"
df = pd.read_csv(os.path.join(nihcxr_dir, "Data_Entry_2017.csv"))
df = nihcxr_preprocess(df, nihcxr_dir)
nih_ds = Dataset.from_pandas(df, preserve_index=False)
nih_ds = nih_ds.cast_column("features", Image(decode=False))

nih_ds = nih_ds.select(np.random.choice(nih_ds.shape[0], 
                                                     5000, replace=False))

## Split Source/Target Datasets

In [5]:
source_slice = SlicingConfig(feature_values=[{"Patient Gender": {"value": "M"}}])
target_slice = SlicingConfig(feature_values=[{"Patient Gender": {"value": "F"}}])
shifter = ClinicalShiftApplicator("custom", source=source_slice, target=target_slice)

source_ds, target_ds = shifter.apply_shift(nih_ds, num_proc=6)

Filter (num_proc=6):   0%|          | 0/5000 [00:00<?, ? examples/s]

Filter (num_proc=6):   0%|          | 0/5000 [00:00<?, ? examples/s]

In [6]:
target_ds = target_ds.add_column("timestamp", pd.date_range(
    start="1/1/2019", end="12/25/2019", periods=target_ds.num_rows))

Flattening the indices:   0%|          | 0/2120 [00:00<?, ? examples/s]

In [7]:
source_ds = source_ds.with_transform(
    partial(apply_transforms, transforms=transforms),
    columns=["features"],
    output_all_columns=True
)
source_ds = source_ds.cast_column("features", Image(decode=True))

target_ds = target_ds.with_transform(
    partial(apply_transforms, transforms=transforms),
    columns=["features"],
    output_all_columns=True
)
target_ds = target_ds.cast_column("features", Image(decode=True))


## Initalize Detector (Reductor+Tester) and Run Sensitivity Test

In [None]:
model = DenseNet(weights="densenet121-res224-all")

reductor = Reductor(
    dr_method="bbse-soft",
    model=model,
    device='cuda'
)

tester = TSTester(
    tester_method="mmd",
)

detector = Detector(
    "balanced_sensitivity_test",
    reductor=reductor,
    tester=tester,
    device='cuda',
    source_sample_size=100,
    target_sample_size=[10, 50, 100],
    num_runs=5
)

detector.detect_shift(source_ds, target_ds)

## Initalize Detector (Reductor+Tester) and Run Rolling Window Experiment

In [None]:
model = DenseNet(weights="densenet121-res224-all")

reductor = Reductor(
    dr_method="bbse-soft",
    model=model,
    device='cuda'
)

tester = TSTester(
    tester_method="mmd",
)

detector = Detector(
    "rolling_window_drift",
    reductor=reductor,
    tester=tester,
    device='cuda',
    source_sample_size=1000,
    target_sample_size=50,
    timestamp_column="timestamp",
    window_size="1M"
)

detector.detect_shift(source_ds, target_ds)