In [2]:
# n number of subsequences of a patient's notes
# c is scaling factor and controls influence of number of subsequences 
# use c= 2
# pmax is max probability of readmission
# pmean is mean probability of readmission
from datasets import load_dataset
import os
import numpy as np
import torch
from scipy.special import softmax

In [1]:
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('emilyalsentzer/Bio_ClinicalBERT')
#tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

  return torch._C._cuda_getDeviceCount() > 0


In [3]:
def encode(examples):
     return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)

In [4]:
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
model = DistilBertForSequenceClassification.from_pretrained("../../models/orig_lr4e-5/checkpoint-12000")

In [5]:
# training model on tokenized and split data
class Dataset(torch.utils.data.Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val) for key, val in self.inputs[idx].items() if key != 'text'}
        item['labels'] = torch.tensor(int(self.labels[idx]['text']))
        return item

    def __len__(self):
        return len(self.labels)

In [6]:
def probability(test_dataset):
    # generates prediction from model
    train_pred = trainer.predict(test_dataset)
    pred = train_pred.predictions
    
    # softmax each row so each row sums to 1
    prob = softmax(pred, axis = 1)
    
    # find the mean probability of readmission
    meanprob = np.mean(prob,axis=0)[1]
    
    # find the max probability of readmission
    maxprob = np.amax(prob,axis=0)[1]
    
    n = pred.shape[0]
    
    # return mean, max, shape
    return meanprob, maxprob, n

In [7]:
def prepare_data(patientID):
    # loading features and labels per patient
    input_dataset = load_dataset('text', data_files={'test': '../../data/processPatient/'+patientID})
    label_dataset = load_dataset('text', data_files={'test': '../../data/labels/'+patientID})
    
    # applying encoding function to dataset
    input_dataset = input_dataset.map(encode, batched=True)
    
    # setting dataset to testing dataset
    test_dataset = Dataset(input_dataset['test'], label_dataset['test'])
    
    return test_dataset

In [8]:
# calculating readmit probability on per patient basis
def readmit_probability(maxprob,meanprob,n):
    # c accounts for patients with many notes
    c=2
    # weight as n/c
    scaling = n/c
    denominator = 1+scaling
    numerator = maxprob + (meanprob * scaling)
    
    probability = numerator/denominator
    return probability

In [9]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score

# generating numpy array of all the real labels
def patient_labels(patients):
    labels = []
    for i in range(len(patients)):
        # taking label per patient
        with open('../../data/labels/'+ patients[i], 'r') as f:
            text = f.readline().strip()
            if text == '1':
                labels.append(1)
            elif text == '0':
                labels.append(0)
    
    label_array = np.asarray(labels)
            
    return label_array

# take in probabilities per patient array and threshold
# turn into list of labels of 0 or 1
def convert_probability(pred, threshold):
    labels= []
    for val in pred:
        if val>threshold:
            labels.append(1)
        else:
            labels.append(0)
            
    labels_array = np.asarray(labels)        
    return labels_array

# computing accuracy, f1, precision, recall, auroc
# parameters are the arrays of predicted labels, real labels, and predicted probabilities
def compute_metrics(pred_label, real_label, readmit_prob):
    labels = real_label
    preds = pred_label
    predictions = readmit_prob
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    roc = roc_auc_score(labels, predictions)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'auroc': roc,
    }

In [10]:
trainer = Trainer(
    # the instantiated 🤗 Transformers model to be trained
    model=model,
)

In [11]:
with open('../../data/splits/valid_list','r') as f:
    lines = f.read().splitlines()
    set_valid = set(lines)
valid_list = list(set_valid)

with open('../../data/splits/test_list','r') as f:
    lines = f.read().splitlines()
    set_test = set(lines)
test_list = list(set_test)

In [14]:
# empty list of scalable readmission prediction probabilities
patient_prob = []

# load valid list for testing
for i in range(len(valid_list)):
    # load the patient datset
    test_dataset = prepare_data(patients[i])
    
    # find the max and mean probability of readmission
    mean, maximum, n = probability(test_dataset)
    
    # calculate readmission probability per patient
    readmit = readmit_probability(mean,maximum,n)
    
    # add probabilities into list of all patient probabilities
    patient_prob.append(readmit)
    print(i)

Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-3e19ec7f7f2644f2/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-34f5df87db561b03/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-3e19ec7f7f2644f2/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-19a967c3df8a5d42.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-0bf049cc091870e0/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-a441e02143a1abca/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca

0


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-23a84bc2cc97cf4b/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


1


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-acf0c736ef08f158/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-23a84bc2cc97cf4b/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-ed2798692e7b3b2a.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-66c402bcb3f582e6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-e39417a9a212b3b6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-66c402bcb3f582e6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/

2


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-00a5a346a53920a8/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-7fb1e5d2e54bef16/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-00a5a346a53920a8/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-50f7250a16eb7cba.arrow


3


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-4661fe00984e4a30/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-3c626d5124b752fc/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-4661fe00984e4a30/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-70aff9242dbc1d42.arrow


4


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-754e3fa29a22772a/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-22fd3fba09897541/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-754e3fa29a22772a/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-01fd7a37a50a8fb7.arrow


5


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-f0dbc79c5367767e/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-40dd0f0874eda788/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-f0dbc79c5367767e/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-f14cddc5e83a2351.arrow


6


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-e99a477551777f3f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-9d5434cb5decf15f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


7


Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-e99a477551777f3f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-1205204410f2c216.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-cf1b7662383386e9/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-b05ef66731cacc3c/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-cf1b7662383386e9/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-ad4fbfefdb34827a.arrow


8


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-a1ba682b66c55cb5/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


9


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-bf9a0a18d9225359/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-a1ba682b66c55cb5/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-c52975535d677fb8.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-59b4cedb20efa773/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


10


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-efd5a7d5389e1ac2/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-59b4cedb20efa773/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-162bf9bd5cfc3ed4.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-92bd6a01fc324d03/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-f19f7be902860b98/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-92bd6a01fc324d03/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/

11


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-b935994e9c96fb2b/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-90d23c92f6dc19ee/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-b935994e9c96fb2b/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-4a546ee2da6da24c.arrow


12


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-80d09a866edeb796/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-b506ad738cf7964c/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-80d09a866edeb796/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-b23b503b2fdc2216.arrow


13


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-acd1416430e5108c/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-faf12505c61e8396/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-acd1416430e5108c/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-54255017f93d2ec2.arrow


14
15


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-336fd626d53448c6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-50046297087764b3/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-336fd626d53448c6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-86d5829e777c7dfb.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-001b24d98a806f38/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-a97ab119623df72d/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca

16


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-7b507c8f5538b2d4/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-d93460a2446599fb/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-7b507c8f5538b2d4/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-1089764ba7c70db5.arrow


17


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-4b48db97f64c51e6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-5f11726d697d71dc/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-4b48db97f64c51e6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-8d8535421bf3b1f2.arrow


18


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-e5989d7ae20c1b8f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


19


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-b9a4e3a6b0612038/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-e5989d7ae20c1b8f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-feaff7b4f1899877.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-0d6b6cee4e3b0bce/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


20


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-d8db759fbb8d9fc7/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-0d6b6cee4e3b0bce/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-182f27ea910779c4.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-e6e22c1ed585f3f5/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-3c74eb3c2ddfb548/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-e6e22c1ed585f3f5/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/

21


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-2b7304120fe62385/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-454b2a7bd60499ee/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-2b7304120fe62385/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-0481132795c9d727.arrow


22


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-29d87898c608d7e1/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-4f9963dfd0417d29/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-29d87898c608d7e1/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-486fefd0decd3c82.arrow


23


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-2686f2ddea6cd5a1/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-3541da19909336f6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-2686f2ddea6cd5a1/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-c268781ec5445ecb.arrow


24


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-afd0422b41224d5f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-a6c56d292799b75d/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-afd0422b41224d5f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-699b56dca0f04a95.arrow


25


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-85777d7d996a36a2/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-34bd1379253e8360/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-85777d7d996a36a2/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-74ba3a0495b84369.arrow


26


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-236e036db5b52810/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-81fa62a3e76d3e55/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-236e036db5b52810/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-e026aaed2c638aed.arrow


27
28


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-b110e40d1d6302a1/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-171d20ccf7cbd48f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-b110e40d1d6302a1/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-cbe5e050d2cdab0c.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-70e6d1204508a9ce/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-dc245fac204e1058/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca

29


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-a49c51ae392c173f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


30


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-a74a22896662c0c1/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-a49c51ae392c173f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-0cb904c3a9ea0d19.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-4e24bf46633b6898/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


31


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-80e47182e4701f95/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-4e24bf46633b6898/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-5c3d21cb2beb7fa9.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-330c0e83b6dcbd73/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-bcd962817bfc860f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-330c0e83b6dcbd73/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/

32


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-2aa3a11977f52b2b/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-24d7da0c25f8c1e2/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-2aa3a11977f52b2b/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-2247e96ff2f8fe7f.arrow


33


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-1b9c84f7a7d61d18/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-beb4af484da2edf9/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-1b9c84f7a7d61d18/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-4efd7b5fa53e01ce.arrow


34


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-16c0b52d79d554b5/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-8d9f722836cce688/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-16c0b52d79d554b5/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-8b47feeaf8a9cd6c.arrow


35


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-3eb65bc408cde543/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-9d193d4465ae9e9d/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-3eb65bc408cde543/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-c9e8dd10590c50ab.arrow


36


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-9ee2ced3af1e1c12/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-2090d0c7d62de18c/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-9ee2ced3af1e1c12/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-b1c6a0e1ef375def.arrow


37


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-c06caae496ab2140/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-3c1811655e7a59d3/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-c06caae496ab2140/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-18a191a68f75b386.arrow


38


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-1f56d9111c4bbddf/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-b50cef8eeb0f2972/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-1f56d9111c4bbddf/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-a61457189205a745.arrow


39


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-a39fc86741876a49/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-a2ec9f3cc233e501/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-a39fc86741876a49/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-8147b15b366a62ae.arrow


40


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-77d4c3216fce9901/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-095bf64a1d45e44a/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


41


Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-77d4c3216fce9901/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-7125eaa61da380c9.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-e5c3b22e1e97aa3e/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


42


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-131e36bd865abd26/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-e5c3b22e1e97aa3e/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-738a15f550bc6964.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-1e6858c5a741b366/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-c3a69fb2f541fb08/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-1e6858c5a741b366/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/

43


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-4686991cefd7ab6f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-45a98a58abd4134a/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-4686991cefd7ab6f/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-27b916e283b11874.arrow


44


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-91f9c36906576463/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-74b0ac8c43e7b1f4/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-91f9c36906576463/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-30bac97305b08030.arrow


45


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-c0437b251d1c8eb9/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-bb48ae2995231b0c/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-c0437b251d1c8eb9/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-4e758aba73381101.arrow


46


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-049fe79b7d10ad5c/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


47


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-bb7416fae363eb52/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-049fe79b7d10ad5c/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-1971edda54ddca4f.arrow
Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-3d3463510a3788a6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)


48


Using custom data configuration default
Reusing dataset text (/home/ubuntu/.cache/huggingface/datasets/text/default-cb194ecf4dd7e98e/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab)
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/text/default-3d3463510a3788a6/0.0.0/daf90a707a433ac193b369c8cc1772139bb6cca21a9c7fe83bdd16aad9b9b6ab/cache-19d39a13ea89e754.arrow


49


In [15]:
# turn predicted probability list into 1d numpy array
pred_prob = np.asarray(patient_prob)

# generate label array from probability list and threshold
# if probability over a certain threshold, generate a readmit label of 1
# otherwise, readmit = 0
pred_labels = convert_probability(pred_prob,0.5)

# generating actual labels of patients for valid list
real_labels = patient_labels(valid_list)

print(real_labels)
print(pred_prob)
print(pred_labels)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 1 1 0 0 0 1 0]
[0.77823622 0.68497595 0.16474945 0.44565125 0.76632552 0.39542726
 0.54565689 0.83436842 0.51923815 0.49727453 0.56509746 0.40367569
 0.07613089 0.54243808 0.21673481 0.37446617 0.90575721 0.42735737
 0.22505034 0.21577791 0.70185932 0.90070226 0.96163096 0.45155781
 0.12463467 0.68941407 0.92494046 0.59353308 0.60165684 0.21242536
 0.15658053 0.4126593  0.32864412 0.81731479 0.53273795 0.24218046
 0.78754893 0.31139394 0.57564662 0.26023443 0.87719549 0.13014864
 0.93622163 0.94967131 0.97418219 0.27464363 0.3415327  0.48940997
 0.9558592  0.56523283]
[1 1 0 0 1 0 1 1 1 0 1 0 0 1 0 0 1 0 0 0 1 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 1
 0 1 0 1 0 1 1 1 0 0 0 1 1]


In [16]:
# computing the metrics 
print(compute_metrics(pred_labels, real_labels,pred_prob))

{'accuracy': 0.64, 'f1': 0.47058823529411764, 'precision': 0.3076923076923077, 'recall': 1.0, 'auroc': 0.988095238095238}
