In [2]:
import pandas as pd

In [6]:
snomed = pd.read_csv('snomed/mimic-iv_notes_training_set.csv')
gt = pd.read_csv('snomed/train_annotations.csv')

In [14]:
print(snomed.text[1])

 
Name:  ___                  Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   M
 
Service: MEDICINE
 
Allergies: 
No Known Allergies / Adverse Drug Reactions
 
Attending: ___
 
Chief Complaint:
CHIEF COMPLAINT: Chest pain, NSTEMI
 
Major Surgical or Invasive Procedure:
Cardiac Catheterization

 
History of Present Illness:
___ year old man with a h/o COPD who p/w chest pain and elevated 
troponin. Had CP on ___ after eating, resolved, did not 
seek medical care. CP recurrent ___ and once again, resolved 
with rest. Starting ___ at 2PM he developed left-sided and 
substernal CP that radiated to the left arm.  He described the 
feeling as heavy pressure and stated that he felt lightheaded 
and had a chill but no diaphoresis, nausea, or vomiting. Had 
baseline SOB due to COPD which had been getting progressively 
worse over the past few months but isn't acutely worse at this 
time. Had never had CP before ___, no history of

In [8]:
snomed

Unnamed: 0,note_id,text
0,10060142-DS-9,\nName: ___ Unit No: ___\...
1,10097089-DS-8,\nName: ___ Unit No: ___\...
2,10124346-DS-4,\nName: ___ Unit No: ___\n \n...
3,10302979-DS-5,\nName: ___ Unit No: ___\n...
4,10352433-DS-20,\nName: ___ Unit No: ___\...
...,...,...
199,19859532-DS-19,\nName: ___ Unit No: ___...
200,19871603-DS-14,\nName: ___ Unit No: ___\...
201,19884924-DS-14,\nName: ___ Unit No: __...
202,19895550-DS-7,\nName: ___ Unit No: ___\n \...


In [16]:
gt.query('note_id=="10060142-DS-9"').head(20)

Unnamed: 0,note_id,start,end,concept_id
0,10060142-DS-9,179,190,91936005
1,10060142-DS-9,228,248,95563007
2,10060142-DS-9,294,322,45595009
3,10060142-DS-9,390,411,95563007
4,10060142-DS-9,425,444,1835003
5,10060142-DS-9,476,496,310244003
6,10060142-DS-9,501,516,19387007
7,10060142-DS-9,541,566,57653000
8,10060142-DS-9,574,582,268910001
9,10060142-DS-9,618,628,13467000


In [17]:
import numpy as np
import random

def get_attention_weights(seqlength, alpha, reverse_mode):

    init_weights = np.exp(-alpha * np.array(range(seqlength)))

    if reverse_mode:

        weights_ = init_weights / np.sum(init_weights)

    else:

        weights_ = np.flip(init_weights / np.sum(init_weights))
        
    return weights_ 

def generate_trajectory_final(num_states=3, 
                        Num_observations=10, 
                        Num_samples=1000, 
                        Max_seq=20, 
                        Min_seq=3,
                        Max_length = 50,
                        alpha = 1,
                        proportion = 0.5,
                        personalized = 0,
                        reverse_mode=False,
                        P_trans = np.array([[0.9, 0.1, 0.01], 
                                            [0.3, 0.6, 0.1], 
                                            [0.1, 0.8, 0.1]]),
                        P_0=[0.9, 0.1, 0],
                        mu_=[-10, 5, 10],
                        var_=[0.5, 1, 1.5]):
    '''
    Max_length: the time length
    Proportion: the ratio between actual visits and all possible visits
    '''

    X_  = []
    S_  = []
    time_ = []
    total_visit = 0

    # All visit positions
    num_pos = Max_length * Num_samples
    pos_ = random.sample(list(np.arange(num_pos)), int(num_pos * proportion))
    mask_ = np.zeros(num_pos, bool)
    mask_[pos_] = True
    mask_ = mask_.reshape((Num_samples, Max_length))

    
    for k in range(Num_samples):
    
        if personalized != 0:
            # Generate a personalized effect on mu
            shift = np.random.uniform(low=-personalized, high=personalized, size=(num_states,))
            current_mu = mu_ + shift
        else:
            current_mu = mu_


        seq_mask = mask_[k]
        seq_mask[0] = True
        time = np.where(seq_mask)[0]

        while len(time) < Min_seq:
            time = np.unique(list(time) + [np.random.randint(1, Max_length)])
        
        while len(time) >= Max_seq:
            bad = random.sample(list(time),1)
            seq_mask[bad] = False
            seq_mask[0] = True
            time = np.where(seq_mask)[0]

        S_new   = []
        X_new   = []
    
        for u in range(Max_length):
        
            if u == 0:
            
                S_new.append(np.random.choice(num_states, 1, p=P_0)[0])
            
            else:

                weights_    = get_attention_weights(u + 1, alpha, reverse_mode)            
                P_trans_new = np.sum(np.array([P_trans[S_new[m], :] * weights_[m] for m in range(len(S_new))]), axis=0)
                P_trans_new = P_trans_new/np.sum(P_trans_new)
            
                S_new.append(np.random.choice(num_states, 1, p=P_trans_new)[0])
            
            X_new.append((current_mu[S_new[-1]] + var_[S_new[-1]] * np.random.normal(0, 1, (1, Num_observations))).reshape(-1,))
            

        S_.append(np.array(S_new)[time])
        X_.append(np.array(X_new)[time])
        time_.append(time)
        total_visit += len(time)

     
    return X_, S_ , time_, total_visit

In [20]:
X_, S_ , time_, total_visit = generate_trajectory_final()