---

In [1]:
import os
import sys
import pickle
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# set seed
seed = 24
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)

  from .autonotebook import tqdm as notebook_tqdm


---

In [2]:
with open('../output/output.hfs', 'rb') as f:
	hfs = pickle.load(f)
with open('../output/output.seqs', 'rb') as f:
	seqs = pickle.load(f)
with open('../output/output.types', 'rb') as f:
	types = pickle.load(f)
print(len(hfs))
print(len(seqs))
assert len(hfs) == len(seqs)
print(len(types))

5447
5447
4512


where

- `hfs`: contains the heart failure label (0: normal, 1: heart failure) for each patient
- `seqs`: contains a list of visit (in ICD9 codes) for each patient
- `types`: contains the map from ICD9 codes to ICD-9 labels

In [3]:
# GRAM paper Table 1 replication:

# number of patients
print("Table 1: Basic statistics of MIMIC-III:")
print("# of patients:", len(hfs))

# Number of visits
num_visits = 0
num_codes = 0
max_codes_in_visit = 0
for patient_record in seqs:
    num_visits += len(patient_record)
    for visit in patient_record:
        num_codes += len(visit)
        if len(visit) > max_codes_in_visit:
            max_codes_in_visit = len(visit)
print("# of visits:", num_visits)

# Avg. # of visits per patient
print("Avg. # of visits per patient:", round(num_visits / len(hfs), 2))

# Num of unique ICD9 codes
print("# of unique ICD9 codes: ", len(types))

# Avg. # of codes per visit
print("Avg. # of codes per visit:", round(num_codes / num_visits, 2))

# Max # of codes per visit
print("Max # of codes per visit:", max_codes_in_visit)


print("Number of HF patients:", sum(hfs))
print("Number of normal patients:", len(hfs) - sum(hfs))
print("Ratio of HF patients: %.2f" % (sum(hfs) / len(hfs)))




# take the 3rd patient as an example
print("\nPatient at index 14:")
print("Heart Failure status:", hfs[14])
for visit in range(len(seqs[14])):

    print("Visits and diagnosis:", hfs[14])
    print(f"\t{visit}-th visit id:", visit)
    print(f"\t{visit}-th visit diagnosis labels:", seqs[14][visit])

Table 1: Basic statistics of MIMIC-III:
# of patients: 5447
# of visits: 11902
Avg. # of visits per patient: 2.19
# of unique ICD9 codes:  4512
Avg. # of codes per visit: 11.32
Max # of codes per visit: 39
Number of HF patients: 1280
Number of normal patients: 4167
Ratio of HF patients: 0.23

Patient at index 14:
Heart Failure status: 1
Visits and diagnosis: 1
	0-th visit id: 0
	0-th visit diagnosis labels: [250, 157, 251, 252, 5, 11, 253, 0, 12]
Visits and diagnosis: 1
	1-th visit id: 1
	1-th visit diagnosis labels: [107, 28, 16, 254, 255, 256, 0, 5, 11]
Visits and diagnosis: 1
	2-th visit id: 2
	2-th visit diagnosis labels: [257, 41, 258, 62, 259, 260, 261, 139, 16, 180, 59, 262, 263, 264, 265]


## 1 Build the dataset [30 points]

### 1.1 CustomDataset [5 points]

First, let us implement a custom dataset using PyTorch class `Dataset`, which will characterize the key features of the dataset we want to generate.

We will use the sequences of diagnosis codes `seqs` as input and heart failure `hfs` as output.

In [4]:
from torch.utils.data import Dataset


class CustomDataset(Dataset):
    
    def __init__(self, seqs, hfs):
        self.x = seqs
        self.y = hfs

    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
        
dataset = CustomDataset(seqs, hfs)

### 1.2 Collate Function [20 points]

As you note that, we do not convert the data to tensor in the built `CustomDataset`. Instead, we will do this using a collate function `collate_fn()`. 

This collate function `collate_fn()` will be called by `DataLoader` after fetching a list of samples using the indices from `CustomDataset` to collate the list of samples into batches.

For example, assume the `DataLoader` gets a list of two samples.

```
[ [ [0, 1, 2], [8, 0] ], 
  [ [12, 13, 6, 7], [12], [23, 11] ] ]
```

where the first sample has two visits `[0, 1, 2]` and `[8, 0]` and the second sample has three visits `[12, 13, 6, 7]`, `[12]`, and `[23, 11]`.

The collate function `collate_fn()` is supposed to pad them into the same shape (3, 4), where 3 is the maximum number of visits and 4 is the maximum number of diagnosis codes.

``` 
[ [ [0, 1, 2, *0*], [8, 0, *0*, *0*], [*0*, *0*, *0*, *0*]  ], 
  [ [12, 13, 6, 7], [12, *0*, *0*, *0*], [23, 11, *0*, *0*] ] ]
```

Further, the padding information will be stored in a mask with the same shape, where 1 indicates that the diagnosis code at this position is from the original input, and 0 indicates that the diagnosis code at this position is the padded value.

```
[ [ [1, 1, 1, 0], [1, 1, 0, 0], [0, 0, 0, 0] ], 
  [ [1, 1, 1, 1], [1, 0, 0, 0], [1, 1, 0, 0] ] ]
```

Lastly, we will have another diagnosis sequence in reversed time. This will be used in our RNN model for masking. Note that we only flip the true visits.

``` 
[ [ [8, 0, *0*, *0*], [0, 1, 2, *0*], [*0*, *0*, *0*, *0*]  ], 
  [ [23, 11, *0*, *0*], [12, *0*, *0*, *0*], [12, 13, 6, 7] ] ]
```

And a reversed mask as well.

```
[ [ [1, 1, 0, 0], [1, 1, 1, 0], [0, 0, 0, 0] ], 
  [ [1, 1, 0, 0], [1, 0, 0, 0], [1, 1, 1, 1], ] ]
```

We need to pad the sequences into the same length so that we can do batch training on GPU. And we also need this mask so that when training, we can ignored the padded value as they actually do not contain any information.

In [5]:
def collate_fn(data):
    """
    TODO: Collate the the list of samples into batches. For each patient, you need to pad the diagnosis
        sequences to the sample shape (max # visits, max # diagnosis codes). The padding infomation
        is stored in `mask`.
    
    Arguments:
        data: a list of samples fetched from `CustomDataset`
        
    Outputs:
        x: a tensor of shape (# patiens, max # visits, max # diagnosis codes) of type torch.long
        masks: a tensor of shape (# patiens, max # visits, max # diagnosis codes) of type torch.bool
        rev_x: same as x but in reversed time. This will be used in our RNN model for masking 
        rev_masks: same as mask but in reversed time. This will be used in our RNN model for masking
        y: a tensor of shape (# patiens) of type torch.float
        
    Note that you can obtains the list of diagnosis codes and the list of hf labels
        using: `sequences, labels = zip(*data)`
    """

    sequences, labels = zip(*data)

    y = torch.tensor(labels, dtype=torch.float)
    
    num_patients = len(sequences)
    num_visits = [len(patient) for patient in sequences]
    num_codes = [len(visit) for patient in sequences for visit in patient]

    max_num_visits = max(num_visits)
    max_num_codes = max(num_codes)
    
    x = torch.zeros((num_patients, max_num_visits, max_num_codes), dtype=torch.long)
    #rev_x = torch.zeros((num_patients, max_num_visits, max_num_codes), dtype=torch.long)
    masks = torch.zeros((num_patients, max_num_visits, max_num_codes), dtype=torch.bool)
    # NOTE: reverse masks is a bi-directional RNN, but gram_hf.py function padMatrix() doesn't collate the data this way.
    #rev_masks = torch.zeros((num_patients, max_num_visits, max_num_codes), dtype=torch.bool)
    for i_patient, patient in enumerate(sequences):
        for j_visit, visit in enumerate(patient):
            """
            TODO: update `x`, `rev_x`, `masks`, and `rev_masks`
            """
            for k, code in enumerate(sequences[i_patient][j_visit]):
                x[i_patient][j_visit][k] = code
                #rev_x[i_patient][len(patient) - j_visit - 1][k] = code
                masks[i_patient][j_visit][k] = 1
                #rev_masks[i_patient][len(patient) - j_visit - 1][k] = 1
    
    return x, masks, y

In [6]:
'''
AUTOGRADER CELL. DO NOT MODIFY THIS.
'''

from torch.utils.data import DataLoader

loader = DataLoader(dataset, batch_size=10, collate_fn=collate_fn)
loader_iter = iter(loader)
#x, masks, rev_x, rev_masks, y = next(loader_iter)
x, masks, y = next(loader_iter)

#assert x.dtype == rev_x.dtype == torch.long
assert x.dtype == torch.long
assert y.dtype == torch.float
assert masks.dtype == torch.bool
#assert masks.dtype == rev_masks.dtype == torch.bool

print(x.shape)
#print(rev_x.shape)
print(masks.shape)
#print(rev_masks.shape)
#assert x.shape == rev_x.shape == masks.shape == rev_masks.shape == (10, 3, 24)
assert y.shape == (10,)



torch.Size([10, 3, 19])
torch.Size([10, 3, 19])


Now we have `CustomDataset` and `collate_fn()`. Let us split the dataset into training and validation sets.

In [7]:
from torch.utils.data.dataset import random_split

# get int for 20% of the dataset
#split = int(len(dataset)*0.07)
split = int(len(dataset)*0.14)
#split = int(len(dataset)*0.21)
#split = int(len(dataset)*0.28)


"""
I modified RNN/rnn.ipynb to calculate AUC for a give train dataset ratio.
You can change the train ratio here:
split = int(len(dataset)*0.2)
We need AUC for 10%,20%,…,100% .

I trained the model, using 7%,14%,…,70% of train dataset, with 20% as test dataset and the rest as valid dataset.
So
int(len(dataset)*0.07)
int(len(dataset)*0.14)
int(len(dataset)*0.21)
…
int(len(dataset)*0.70)
might be better to compare apple to apple.
"""

lengths = [split, len(dataset) - split]
train_dataset, val_dataset = random_split(dataset, lengths)

print("Length of train dataset:", len(train_dataset))
print("Length of val dataset:", len(val_dataset))

Length of train dataset: 762
Length of val dataset: 4685


### 1.3 DataLoader [5 points]

Now, we can load the dataset into the data loader.

In [8]:
from torch.utils.data import DataLoader

def load_data(train_dataset, val_dataset, collate_fn):
    
    '''
    TODO: Implement this function to return the data loader for  train and validation dataset. 
    Set batchsize to 32. Set `shuffle=True` only for train dataloader.
    
    Arguments:
        train dataset: train dataset of type `CustomDataset`
        val dataset: validation dataset of type `CustomDataset`
        collate_fn: collate function
        
    Outputs:
        train_loader, val_loader: train and validation dataloaders
    
    Note that you need to pass the collate function to the data loader `collate_fn()`.
    '''
    
    batch_size = 32
    train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader = torch.utils.data.DataLoader(dataset = val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    
    return train_loader, val_loader

train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

## 2 Naive RNN [35 points] 

Let us implement a naive bi-directional RNN model.

<img src="img/bi-rnn.jpg" width="600"/>

Remember from class that, first of all, we need to transform the diagnosis code for each visit of a patient to an embedding. To do this, we can use `nn.Embedding()`, where `num_embeddings` is the number of diagnosis codes and `embedding_dim` is the embedding dimension.

Then, we can construct a simple RNN structure. Each input is this multi-hot vector. At the 0-th visit, this has $\boldsymbol{X}_0$, and at t-th visit, this has $\boldsymbol{X}_t$.

Each one of the input will then map to a hidden state $\boldsymbol{\overleftrightarrow{h}}_t$. The forward hidden state $\boldsymbol{\overrightarrow{h}}_t$ can be determined by $\boldsymbol{\overrightarrow{h}}_{t-1}$ and the corresponding current input $\boldsymbol{X}_t$.

Similarly, we will have another RNN to process the sequence in the reverse order, so that the hidden state $\boldsymbol{\overleftarrow{h}}_t$ is determined by $\boldsymbol{\overleftarrow{h}}_{t+1}$ and $\boldsymbol{X}_t$.

Finally, once we have the $\boldsymbol{\overrightarrow{h}}_T$ and $\boldsymbol{\overleftarrow{h}}_{0}$, we will concatenate the two vectors as the feature vector and train a NN to perform the classification.

Now, let us build this model. The forward steps will be:

    1. Pass the sequence through the embedding layer;
    2. Sum the embeddings for each diagnosis code up for a visit of a patient;
    3. Pass the embeddings through the RNN layer;
    4. Obtain the hidden state at the last visit;
    5. Do 1-4 for both directions and concatenate the hidden states.
    6. Pass the hidden state through the linear and activation layers.

### 2.1 Mask Selection [20 points]

Importantly, you need to use `masks` to mask out the paddings in before step 2 and before 4. So, let us first preform the mask selection.

In [9]:
def sum_embeddings_with_mask(x, masks):
    """
    TODO: mask select the embeddings for true visits (not padding visits) and then
        sum the embeddings for each visit up.

    Arguments:
        x: the embeddings of diagnosis sequence of shape (batch_size, # visits, # diagnosis codes, embedding_dim)
        masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

    Outputs:
        sum_embeddings: the sum of embeddings of shape (batch_size, # visits, embedding_dim)
        
    NOTE: Do NOT use for loop.

    """
    
    # your code here
    #raise NotImplementedError
    masked_x = x * masks[..., None]
    return torch.sum(masked_x, dim = 2)

In [10]:
'''
AUTOGRADER CELL. DO NOT MODIFY THIS.
'''

import random
import ast
import inspect


def uses_loop(function):
    loop_statements = ast.For, ast.While, ast.AsyncFor

    nodes = ast.walk(ast.parse(inspect.getsource(function)))
    return any(isinstance(node, loop_statements) for node in nodes)

def generate_random_mask(batch_size, max_num_visits , max_num_codes):
    num_visits = [random.randint(1, max_num_visits) for _ in range(batch_size)]
    num_codes = []
    for n in num_visits:
        num_codes_visit = [0] * max_num_visits
        for i in range(n):
            num_codes_visit[i] = (random.randint(1, max_num_codes))
        num_codes.append(num_codes_visit)
    masks = [torch.ones((l,), dtype=torch.bool) for num_codes_visit in num_codes for l in num_codes_visit]
    masks = torch.stack([torch.cat([i, i.new_zeros(max_num_codes - i.size(0))], 0) for i in masks], 0)
    masks = masks.view((batch_size, max_num_visits, max_num_codes)).bool()
    return masks


batch_size = 16
max_num_visits = 10
max_num_codes = 20
embedding_dim = 100

torch.random.manual_seed(7)
x = torch.randn((batch_size, max_num_visits , max_num_codes, embedding_dim))
masks = generate_random_mask(batch_size, max_num_visits , max_num_codes)
out = sum_embeddings_with_mask(x, masks)

assert uses_loop(sum_embeddings_with_mask) is False
assert out.shape == (batch_size, max_num_visits, embedding_dim)




In [11]:
def get_last_visit(hidden_states, masks):
    """
    TODO: obtain the hidden state for the last true visit (not padding visits)

    Arguments:
        hidden_states: the hidden states of each visit of shape (batch_size, # visits, embedding_dim)
        masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

    Outputs:
        last_hidden_state: the hidden state for the last true visit of shape (batch_size, embedding_dim)
        
    NOTE: DO NOT use for loop.
    
    HINT: First convert the mask to a vector of shape (batch_size,) containing the true visit length; 
          and then use this length vector as index to select the last visit.
    """
    
    # your code here
    #raise NotImplementedError
    batch_size = hidden_states.shape[0]
    masks = torch.sum(masks, dim = 2) > 0
    masks = torch.sum(masks, dim = 1) - 1
    last_hidden_state = hidden_states[range(batch_size), masks, :]
    return last_hidden_state


In [12]:
'''
AUTOGRADER CELL. DO NOT MODIFY THIS.
'''

assert uses_loop(get_last_visit) is False

max_num_visits = 10
batch_size = 16
max_num_codes = 20
embedding_dim = 100

torch.random.manual_seed(7)
hidden_states = torch.randn((batch_size, max_num_visits, embedding_dim))
masks = generate_random_mask(batch_size, max_num_visits , max_num_codes)
out = get_last_visit(hidden_states, masks)

assert out.shape == (batch_size, embedding_dim)




### 2.2 Build NaiveRNN [15 points]

In [13]:
class NaiveRNN(nn.Module):
    
    """
    TODO: implement the naive RNN model above.
    """
    
    def __init__(self, num_codes):
        super().__init__()
        """
        TODO: 
            1. Define the embedding layer using `nn.Embedding`. Set `embDimSize` to 128.
            2. Define the RNN using `nn.GRU()`; Set `hidden_size` to 128. Set `batch_first` to True.
            2. Define the RNN for the reverse direction using `nn.GRU()`;
               Set `hidden_size` to 128. Set `batch_first` to True.
            3. Define the linear layers using `nn.Linear()`; Set `in_features` to 256, and `out_features` to 1.
            4. Define the final activation layer using `nn.Sigmoid().

        Arguments:
            num_codes: total number of diagnosis codes
        """
        embDimSize = 128
        self.embedding = nn.Embedding(num_embeddings = num_codes, embedding_dim=embDimSize)
        self.rnn = nn.GRU(input_size = embDimSize, hidden_size=128, batch_first = True)
        self.fc = nn.Linear(in_features=128, out_features=1)
        self.sigmoid = nn.Sigmoid() # GRAM paper uses Softmax for activation function
    
    #def forward(self, x, masks, rev_x, rev_masks):
    def forward(self, x, masks):
        """
        Arguments:
            x: the diagnosis sequence of shape (batch_size, # visits, # diagnosis codes)
            masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

        Outputs:
            probs: probabilities of shape (batch_size)
        """
        
        batch_size = x.shape[0]
        
        # 1. Pass the sequence through the embedding layer;
        x = self.embedding(x)
        # 2. Sum the embeddings for each diagnosis code up for a visit of a patient.
        x = sum_embeddings_with_mask(x, masks)
        
        # 3. Pass the embegginds through the RNN layer;
        output, _ = self.rnn(x)
        # 4. Obtain the hidden state at the last visit.
        true_h_n = get_last_visit(output, masks)
        
        # 6. Pass the hidden state through the linear and activation layers.
        logits = self.fc(true_h_n)        
        #probs = self.sigmoid(logits)
        probs = self.sigmoid(logits)  ## GRAM paper uses Softmax for activation function
        return probs.view(batch_size)
    

# load the model here
naive_rnn = NaiveRNN(num_codes = len(types))
naive_rnn

NaiveRNN(
  (embedding): Embedding(4512, 128)
  (rnn): GRU(128, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

## 3 Model Training [35 points]

### 3.1 Loss and Optimizer [5 points]

In [14]:
"""
TODO: Specify Binary Cross Entropy as the loss function (`nn.BCELoss`) and assign it to `criterion`.
      Spcify Adam as the optimizer (`torch.optim.Adam`)  with learning rate 0.001 and assign it to `optimizer`.
"""

criterion = None
optimizer = None

# your code here
#raise NotImplementedError
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(naive_rnn.parameters(), lr = 0.001)

### 3.2 Evaluate [10 points]

Then, let us implement the `eval_model()` function first.

In [15]:
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score


def eval_model(model, val_loader):
    
    """
    TODO: evaluate the model.
    
    Arguments:
        model: the RNN model
        val_loader: validation dataloader
        
    Outputs:
        precision: overall precision score
        recall: overall recall score
        f1: overall f1 score
        roc_auc: overall roc_auc score
        
    Note that please pass all four arguments to the model so that we can use this function for both 
    models. (Use `model(x, masks, rev_x, rev_masks)`.)
        
    HINT: checkout https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
    """
    
    model.eval()
    y_pred = torch.LongTensor()
    y_score = torch.Tensor()
    y_true = torch.LongTensor()
    model.eval()
    for x, masks, y in val_loader:
        y_hat = model(x, masks)
        y_score = torch.cat((y_score,  y_hat.detach().to('cpu')), dim=0)
        y_hat = (y_hat > 0.5).int()
        y_pred = torch.cat((y_pred,  y_hat.detach().to('cpu')), dim=0)
        y_true = torch.cat((y_true, y.detach().to('cpu')), dim=0)

    #for x, masks, rev_x, rev_masks, y in val_loader:
    #    y_hat = model(x, masks, rev_x, rev_masks)
    #    y_score = torch.cat((y_score,  y_hat.detach().to('cpu')), dim=0)
    #    y_hat = (y_hat > 0.5).int()
    #    y_pred = torch.cat((y_pred,  y_hat.detach().to('cpu')), dim=0)
    #    y_true = torch.cat((y_true, y.detach().to('cpu')), dim=0)
    """
    TODO:
        Calculate precision, recall, f1, and roc auc scores.
        Use `average='binary'` for calculating precision, recall, and fscore.
    """
    p, r, f, roc_auc = None, None, None, None
    
    # your code here
    #raise NotImplementedError
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average = 'binary')
    roc_auc = roc_auc_score(y_true, y_score)

    return p, r, f, roc_auc

In [16]:
'''
AUTOGRADER CELL. DO NOT MODIFY THIS.
'''

p, r, f, roc_auc = eval_model(naive_rnn, val_loader)
assert p.size == 1, "Precision should be a scalar."
assert r.size == 1, "Recall should be a scalar."
assert f.size == 1, "F1 should be a scalar."
assert roc_auc.size == 1, "ROC-AUC should be a scalar."



### 3.3 Training and evlauation [20 points]

Now let us implement the `train()` function. Note that `train()` should call `eval_model()` at the end of each training epoch to see the results on the validaion dataset.

In [17]:
def train(model, train_loader, val_loader, n_epochs):
    """
    TODO: train the model.
    
    Arguments:
        model: the RNN model
        train_loader: training dataloder
        val_loader: validation dataloader
        n_epochs: total number of epochs
        
    You need to call `eval_model()` at the end of each training epoch to see how well the model performs 
    on validation data.
        
    Note that please pass all four arguments to the model so that we can use this function for both 
    models. (Use `model(x, masks, rev_x, rev_masks)`.)
    """
    
    for epoch in range(n_epochs):
        model.train()
        train_loss = 0
        for x, masks, y in train_loader:
            """
            TODO:
                1. zero grad
                2. model forward
                3. calculate loss
                4. loss backward
                5. optimizer step
            """
            loss = None
            
            # your code here
            #raise NotImplementedError
            optimizer.zero_grad()
            y_hat = model(x, masks)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
        #for x, masks, rev_x, rev_masks, y in train_loader:
        #    """
        #    TODO:
        #        1. zero grad
        #        2. model forward
        #        3. calculate loss
        #        4. loss backward
        #        5. optimizer step
        #    """
        #    loss = None
        #    
        #    # your code here
        #    #raise NotImplementedError
        #    optimizer.zero_grad()
        #    y_hat = model(x, masks, rev_x, rev_masks)
        #    loss = criterion(y_hat, y)
        #    loss.backward()
        #    optimizer.step()
#
        #    train_loss += loss.item()
        train_loss = train_loss / len(train_loader)
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
        p, r, f, roc_auc = eval_model(model, val_loader)
        print('Epoch: {} \t Validation p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}'
              .format(epoch+1, p, r, f, roc_auc))

In [18]:
# number of epochs to train the model
n_epochs = 100
train(naive_rnn, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 0.591764
Epoch: 1 	 Validation p: 0.41, r:0.03, f: 0.06, roc_auc: 0.72
Epoch: 2 	 Training Loss: 0.417939
Epoch: 2 	 Validation p: 0.63, r:0.20, f: 0.30, roc_auc: 0.79
Epoch: 3 	 Training Loss: 0.299142
Epoch: 3 	 Validation p: 0.72, r:0.29, f: 0.41, roc_auc: 0.83
Epoch: 4 	 Training Loss: 0.193046
Epoch: 4 	 Validation p: 0.73, r:0.44, f: 0.55, roc_auc: 0.85
Epoch: 5 	 Training Loss: 0.106736
Epoch: 5 	 Validation p: 0.71, r:0.54, f: 0.61, roc_auc: 0.86
Epoch: 6 	 Training Loss: 0.058417
Epoch: 6 	 Validation p: 0.73, r:0.58, f: 0.65, roc_auc: 0.87
Epoch: 7 	 Training Loss: 0.033947
Epoch: 7 	 Validation p: 0.72, r:0.62, f: 0.67, roc_auc: 0.87
Epoch: 8 	 Training Loss: 0.021183
Epoch: 8 	 Validation p: 0.71, r:0.63, f: 0.67, roc_auc: 0.87
Epoch: 9 	 Training Loss: 0.014437
Epoch: 9 	 Validation p: 0.73, r:0.65, f: 0.69, roc_auc: 0.88
Epoch: 10 	 Training Loss: 0.010510
Epoch: 10 	 Validation p: 0.73, r:0.66, f: 0.69, roc_auc: 0.88
Epoch: 11 	 Training Loss: 0

In [19]:
'''
AUTOGRADER CELL. DO NOT MODIFY THIS.
'''
p, r, f, roc_auc = eval_model(naive_rnn, val_loader)
print(roc_auc)
assert roc_auc > 0.7, "ROC AUC is too low on the validation set (%f < 0.7)"%(roc_auc)



0.8890676001642535
