TODO:
 * wait, why would it look at the training data? that makes no sense.

In [313]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import random

In [341]:
from triage_detector import triage_detector
from importlib import reload
reload(triage_detector)
MultiDriftDetector = triage_detector.MultiDriftDetector

In [342]:
import wasabi

msg = wasabi.Printer()

## Generate a synthetic data stream to use

First we set up the the drift detector.

In [353]:
# Specify where the data and status of the detector should be written to 
# so it can be read by the dash app and so the detector can be restored in a later session.
write_dir = './data/demo'

# specify what should happen to a drift message when drift is detected
def notify_clinicians(drift_message):
    print()
    msg.info('The clinicians have received the following message:')
    print(drift_message)
    print()

# Instantiate a TraigeDetector object
detector = MultiDriftDetector(
    write_dir = write_dir,
    drift_action = notify_clinicians
)

# Specify the features
n_features = 10
feature_names = [ f'Feature{i}' for i in range(n_features) ]
detector.set_features(feature_names)

# Specify the labels
n_labels = 4
label_names = [ f'Priority{i}' for i in range(n_labels) ]
detector.set_labels(label_names)

Next we simulate a clinical environment.

In [354]:
# Our referrals will only have binary features.
# Compatability with sequential and categorical features is in progress.
# These GPs randomly fill out their referral documents.
class GP:
    feature_rate = 0.2
    def make_referral(self):
        return stats.bernoulli.rvs(p=GP.feature_rate, size=n_features)

# Clinicians always give the lowest priority.
class Clinician:
    only_label = label_names[0]
    def label(self, instance):
        return Clinician.only_label

# The model always gives the lowest priority.
class Model:
    only_label = 0
    def predict(self, x):
#         class_scores = np.random.random([n_labels])
#         prediction = class_scores / np.sum(class_scores)
        prediction = [0] * n_labels
        prediction[Model.only_label] = 1
        return prediction

In [355]:
gp = GP()
ref = gp.make_referral()
ref

array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])

In [356]:
clinician = Clinician()
clinician.label(ref)

'Priority0'

In [357]:
model = Model()
model.predict(ref)

[1, 0, 0, 0]

### Simulate A Clinical Setting

In [358]:
# We'll similulate a clinical environment with 300 timesteps.
n_timesteps = 2000
# Feature drift will occur one third of the way through.
feature_drift_t = n_timesteps // 3
# Concept drift will occur two thirds of the way through.
concept_drift_t = n_timesteps // 3 * 2

id_count = 0
backlog = []

for t in range(n_timesteps):
    
    ###############################################################
    ## Simulate drift if appropriate.                            ##
    ###############################################################
    
    # Concept drift is simulated by changing the (only) label that the model uses.
    if t == concept_drift_t:
        msg.divider('CONCEPT DRIFT HAS HAPPENED')
        print()
        model.only_label = 1
    
    # Simluate feature drift by changing the rate of every feature.
    if t == feature_drift_t:
        msg.divider('FEATURE DRIFT HAS HAPPENED')
        print()
        GP.feature_rate = 0.8
    
    ###############################################################
    ## Option 1: A GP sends a new referral                       ##
    ###############################################################
    if random.random() < 0.5 or id_count < 20: # second term to build up back-log of documents
    
        ref = gp.make_referral()
        pred = model.predict(ref)
        
        detector.add_instance(ref, instance_id=id_count, description=f'ID={id_count}')
        detector.add_prediction(pred, instance_id=id_count, description=f'ID={id_count}')
        
        print(f'A GP has created a referral with id {id_count}.')
        
        backlog.append( (id_count, ref) )
        id_count += 1
        
    
    ###############################################################
    ## Option 2: A clinician adds a priority label to a referral ##
    ###############################################################
    else:
        
        # A clinician randomly selects a referral and labels it
        if len(backlog)==0:
            continue
        i = random.randrange(len(backlog))
        (ref_id, ref) = backlog[i]
        del(backlog[i])
        
        label = clinician.label(ref)
        detector.add_label(label, instance_id=ref_id, description=f'ID={ref_id}')
        
        print(f'A clincian has labelled a referral with id {ref_id}.')

A GP has created a referral with id 0.
A GP has created a referral with id 1.
A GP has created a referral with id 2.
A GP has created a referral with id 3.
A GP has created a referral with id 4.
A GP has created a referral with id 5.
A GP has created a referral with id 6.
A GP has created a referral with id 7.
A GP has created a referral with id 8.
A GP has created a referral with id 9.
A GP has created a referral with id 10.
A GP has created a referral with id 11.
A GP has created a referral with id 12.
A GP has created a referral with id 13.
A GP has created a referral with id 14.
A GP has created a referral with id 15.
A GP has created a referral with id 16.
A GP has created a referral with id 17.
A GP has created a referral with id 18.
A GP has created a referral with id 19.
A clincian has labelled a referral with id 7.
A clincian has labelled a referral with id 2.
A GP has created a referral with id 20.
A GP has created a referral with id 21.
A GP has created a referral with id 22

In [359]:
detector.get_status()

[38;5;2m✔ No concept drift detected.[0m
[38;5;2m✔ No feature drift detected.[0m
[38;5;2m✔ No label drift detected.[0m
