# **Setup**

### **Mount Google Drive**

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

ModuleNotFoundError: No module named 'google.colab'

### **Install Ludwig**
<p> Install Ludwig and Ludwig's LLM related dependencies. </p>

In [None]:
!pip uninstall -y tensorflow --quiet
!pip install ludwig --quiet
!pip install ludwig[llm] --quiet
!pip install torch --quiet
!pip install torch transformers --quiet

### **Import packages**

In [4]:
from google.colab import data_table; data_table.enable_dataframe_formatter()
import numpy as np; np.random.seed(123)
import pandas as pd

import getpass
import locale; locale.getpreferredencoding = lambda: "UTF-8"
import logging
import os
import time

import re
import shutil
import yaml

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from ludwig.api import LudwigModel


os.environ["HUGGING_FACE_HUB_TOKEN"] = getpass.getpass("Input Your Huggingface READ Token:")

ModuleNotFoundError: No module named 'google.colab'

### **Write flush function**

In [None]:
def clear_cache():
  if torch.cuda.is_available():
    model = None
    torch.cuda.empty_cache()

# **Preprocessing**

### **Read in data**

In [None]:
df = pd.read_pickle("training.pkl")
df = df.fillna("")
df = df.rename(columns={'text':'notes', 'label':'output'})
df = df.astype({'output':'str'})

### **Split into train and val**

In [None]:
total_rows = len(df)
split_0_count = int(total_rows * 0.9)
split_1_count = total_rows - split_0_count

split_values = np.concatenate([
    np.zeros(split_0_count),
    np.ones(split_1_count),
])

np.random.shuffle(split_values)

df['split'] = split_values
df['split'] = df['split'].astype(int)

### **Add prompt**

In [None]:
prompt = "Acute Respiratory Distress Syndrome (ARDS) patients have bilateral lung infiltrates on chest radiographies. Based on these notes, return 'true' if the patient has ARDS, 'false' otherwise."
df['instruction'] = prompt

### **Generate summaries**

In [None]:
tokenizer=AutoTokenizer.from_pretrained('T5-base')
model=AutoModelForSeq2SeqLM.from_pretrained('T5-base', return_dict=True)
TOKEN_LIMIT = 320

def get_note(section):
  if section[1] == section[2] or section[2].startswith(section[1]):
    return section[2]
  else:
    return section[1] + section[2]

def summarize(text):
  input = re.sub(r'\s', r' ', text)
  notes = re.split(r'Note \d+: ', input)
  notes = [re.findall(r'([A-Z]{4,}: )([^A-Z]*([A-Z]{0,3}[^A-Z]+)*)', note) for note in notes]
  notes = [get_note(section) for note in notes for section in note if section[0] in ["IMPRESSION: ", "FINDINGS: "]]
  inputs = [tokenizer.encode("summarize:" + note, return_tensors="pt") for note in notes]
  outputs = [model.generate(input, max_length=TOKEN_LIMIT) for input in inputs]
  summary = [re.sub(r"<pad>|</s>", r"", tokenizer.decode(output[0])) for output in outputs]
  return summary

df['input'] = df['notes'].apply(summarize)

### **Get "bilateral" notes**

In [None]:
def get_bilateral(text):
  input = re.sub(r'\s', r' ', text)
  notes = re.split(r'Note \d+: ', input)
  notes = [re.findall(r'([A-Z]{4,}: )([^A-Z]*([A-Z]{0,3}[^A-Z]+)*)', note) for note in notes]
  return ([" ".join(note[1].split()[:320]) for note in notes[1] if "bilateral" in note[1]])

df['bilateral'] = df['notes'].apply(get_bilateral)

### **Restructure dataframe**
<p> Note that we're taking a small subsample of the false records for training, in order to combat both the data imbalance and the GPU limitations.

In [None]:
train = df[df['split'] == 0]
val = df[df['split'] == 1]

In [None]:
train = pd.concat([train[train['output'] == 'True'], train[train['output'] == 'False'].sample(frac=0.15)])

In [None]:
train_data = [(index, item, row['output'], row['instruction']) for index, row in train.iterrows() for section in [row['input'], row['bilateral']] for item in section if len(item) > 1]
train = pd.DataFrame(train_data, columns=['patient', 'input', 'output', 'instruction'])

val_data = [(index, item, row['output'], row['instruction']) for index, row in val.iterrows() for section in [row['input'], row['bilateral']] for item in section if len(item) > 1]
val = pd.DataFrame(val_data, columns=['patient', 'input', 'output', 'instruction'])

# **Fine-tuning**

### **Training**

In [None]:
model = None
clear_cache()
df_train = train
qlora_fine_tuning_config = yaml.safe_load(
"""
model_type: llm
base_model: meta-llama/Llama-2-7b-hf

input_features:
  - name: instruction
    type: text

output_features:
  - name: output
    type: text

prompt:
  template: >-
    Below is an instruction that describes a task, paired with an input
    that may provide further context. Write a response that appropriately
    completes the request.

    ### Instruction: {instruction}

    ### Input: {input}

    ### Response:

generation:
  temperature: 0.0000001
  max_new_tokens: 10

adapter:
  type: lora
  r: 4

quantization:
  bits: 4

trainer:
  type: finetune
  epochs: 1
  batch_size: 1
  eval_batch_size: 1
  gradient_accumulation_steps: 16
  learning_rate: 0.00001
  optimizer:
    type: adam
    params:
      eps: 1.e-8
      betas:
        - 0.9
        - 0.999
      weight_decay: 0
  learning_rate_scheduler:
    warmup_fraction: 0.03
    reduce_on_plateau: 0
"""
)

model = LudwigModel(config=qlora_fine_tuning_config, logging_level=logging.INFO)
results = model.train(dataset=df_train)
filename = "/content/drive/MyDrive/" + time.strftime("%m.%d.%y_%H.%M.%S", time.localtime())
shutil.copytree("results", filename, copy_function = shutil.copy)

### **Evaluation**

In [None]:
val_predictions = model.predict(val)

In [None]:
patients = {patient:False for patient in set(val['patient'])}
for row, prediction in zip(val.iterrows(), val_predictions[0]['output_response']):
  patient = row[1]['patient']
  if prediction[0].strip().lower() == "true":
    patients[patient] = True

In [None]:
true_positive = 0
true_negative = 0
false_positive = 0
false_negative = 0
for row in val.iterrows():
  patient = row[1]['patient']
  truth = row[1]['output']
  if str(patients[patient]) == truth:
    if truth == "True":
      true_positive += 1
    else:
      true_negative += 1
  else:
    if truth == "True":
      false_negative += 1
    else:
      false_positive += 1

In [None]:
if true_positive + false_positive == 0:
  precision = 0
else:
  precision = true_positive/(true_positive+false_positive)
if true_positive + false_negative == 0:
  recall = 0
else:
  recall = true_positive/(true_positive+false_negative)
if precision + recall == 0:
  f1 = 0
else:
  f1 = (2*precision*recall)/(precision+recall)
print("Precision:", precision)
print("Recall:", recall)
print("F1:", f1)

# **Generating results**

### **Preprocessing test data**

In [None]:
df = pd.read_pickle("test.pkl")
df = df.fillna("")

In [None]:
prompt = "Acute Respiratory Distress Syndrome (ARDS) patients have bilateral lung infiltrates on chest radiographies. Based on these notes, return 'true' if the patient has ARDS, 'false' otherwise."
df['instruction'] = prompt

In [None]:
tokenizer=AutoTokenizer.from_pretrained('T5-base')
model=AutoModelForSeq2SeqLM.from_pretrained('T5-base', return_dict=True)
TOKEN_LIMIT = 320

def get_note(section):
  if section[1] == section[2] or section[2].startswith(section[1]):
    return section[2]
  else:
    return section[1] + section[2]

def summarize(text):
  input = re.sub(r'\s', r' ', text)
  notes = re.split(r'Note \d+: ', input)
  notes = [re.findall(r'([A-Z]{4,}: )([^A-Z]*([A-Z]{0,3}[^A-Z]+)*)', note) for note in notes]
  notes = [get_note(section) for note in notes for section in note if section[0] in ["IMPRESSION: ", "FINDINGS: "]]
  inputs = [tokenizer.encode("summarize:" + note, return_tensors="pt") for note in notes]
  outputs = [model.generate(input, max_length=TOKEN_LIMIT) for input in inputs]
  summary = [re.sub(r"<pad>|</s>", r"", tokenizer.decode(output[0])) for output in outputs]
  return summary

df['input'] = df['text'].apply(summarize)

In [None]:
def get_bilateral(text):
  input = re.sub(r'\s', r' ', text)
  notes = re.split(r'Note \d+: ', input)
  notes = [re.findall(r'([A-Z]{4,}: )([^A-Z]*([A-Z]{0,3}[^A-Z]+)*)', note) for note in notes]
  return ([" ".join(note[1].split()[:320]) for note in notes[1] if "bilateral" in note[1]])

df['bilateral'] = df['text'].apply(get_bilateral)

In [None]:
test_data = [(index, item, row['instruction']) for index, row in train.iterrows() for section in [row['input'], row['bilateral']] for item in section if len(item) > 1]
test = pd.DataFrame(train_data, columns=['patient', 'input', 'instruction'])

### **Prediction**

In [5]:
import pickle
import pandas as pd

In [65]:
test = pd.read_pickle('valandtestfiles/final_test.pkl')

In [66]:
test

Unnamed: 0,patient,input,instruction
0,0,the mediastinum is accentuated by low lung vo...,Acute Respiratory Distress Syndrome (ARDS) pat...
1,0,concurrent pneumonia is difficult to exclude ...,Acute Respiratory Distress Syndrome (ARDS) pat...
2,0,left lower lobe is probably substantially ate...,Acute Respiratory Distress Syndrome (ARDS) pat...
3,0,bibasilar opacification most likely represent...,Acute Respiratory Distress Syndrome (ARDS) pat...
4,0,"examination is severely motion degraded, part...",Acute Respiratory Distress Syndrome (ARDS) pat...
...,...,...,...
23705,2044,"there is mild mass effect on the cord, best s...",Acute Respiratory Distress Syndrome (ARDS) pat...
23706,2044,a band of subsegmental collapse in the left l...,Acute Respiratory Distress Syndrome (ARDS) pat...
23707,2044,"a comparison made to multiple priors, most re...",Acute Respiratory Distress Syndrome (ARDS) pat...
23708,2044,the study is compared with the significantly ...,Acute Respiratory Distress Syndrome (ARDS) pat...


In [55]:
test = pd.read_pickle('../test.pkl')

In [56]:
test

0       Note 1: EXAMINATION:  Chest radiograph\n\nINDI...
1       Note 1: ADDENDUM\nAGATSTON SCORE: The total (a...
2       Note 1: EXAMINATION:  DX CHEST PORT LINE/TUBE ...
3       Note 1: EXAMINATION:  CHEST (PORTABLE AP)\n\nI...
4       Note 1: ADDENDUM  The right common femoral art...
                              ...                        
2040    Note 1: Please refer to chest CT performed sub...
2041    Note 1: EXAMINATION:  CHEST (PORTABLE AP)\n\nI...
2042    Note 1: CHEST RADIOGRAPH\n\nINDICATION:  STEMI...
2043    Note 1: CT ABDOMEN AND PELVIS WITH INTRAVENOUS...
2044    Note 1: INDICATION: ___ man with seizure, ques...
Name: text, Length: 2045, dtype: object

In [57]:
preds = pd.read_pickle('bignewsbigfiles/test_pre_222.pkl')

In [58]:
preds

[(            output_predictions  \
  0    [, false, , , , , , , , ]   
  1    [, false, , , , , , , , ]   
  2    [, false, , , , , , , , ]   
  3    [, false, , , , , , , , ]   
  4    [, false, , , , , , , , ]   
  ..                         ...   
  123  [, false, , , , , , , , ]   
  124  [, false, , , , , , , , ]   
  125  [, false, , , , , , , , ]   
  126  [, false, , , , , , , , ]   
  127  [, false, , , , , , , , ]   
  
                                    output_probabilities output_response  \
  0    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...         [false]   
  1    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...         [false]   
  2    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...         [false]   
  3    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...         [false]   
  4    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...         [false]   
  ..                                                 ...             ...   
  123  [0.0, 0.0, 0.0, 0.0, 0.0, 

In [59]:
df=pd.DataFrame();
len(preds)
for i in range(len(preds)):
    df=pd.concat([df, preds[i][0]], ignore_index=True)

In [60]:
df

Unnamed: 0,output_predictions,output_probabilities,output_response,output_probability
0,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
1,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
2,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
3,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
4,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
...,...,...,...,...
23705,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
23706,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
23707,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
23708,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf


In [63]:
df['output_response_str']=df['output_response'].apply(lambda x: str(x))
df['output_response_str'].unique()

array(["['false']", "['true']", "['nobody']", "['Љ']"], dtype=object)

In [69]:
final=test.merge(df,left_index=True, right_index=True)

In [79]:
final

Unnamed: 0,patient,input,instruction,output_predictions,output_probabilities,output_response,output_probability,output_response_str,prediction
0,0,the mediastinum is accentuated by low lung vo...,Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False
1,0,concurrent pneumonia is difficult to exclude ...,Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False
2,0,left lower lobe is probably substantially ate...,Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False
3,0,bibasilar opacification most likely represent...,Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False
4,0,"examination is severely motion degraded, part...",Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False
...,...,...,...,...,...,...,...,...,...
23705,2044,"there is mild mass effect on the cord, best s...",Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False
23706,2044,a band of subsegmental collapse in the left l...,Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False
23707,2044,"a comparison made to multiple priors, most re...",Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False
23708,2044,the study is compared with the significantly ...,Acute Respiratory Distress Syndrome (ARDS) pat...,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf,['false'],False


In [82]:
final.loc[final['prediction']==True]['patient'].unique()

array([   5,    8,   55,   61,   95,   99,  145,  146,  155,  156,  163,
        175,  194,  195,  205,  207,  224,  225,  230,  273,  280,  282,
        286,  299,  308,  365,  371,  375,  399,  403,  404,  422,  427,
        435,  444,  452,  457,  470,  489,  502,  503,  516,  517,  521,
        562,  564,  567,  576,  599,  615,  621,  633,  637,  641,  642,
        672,  677,  690,  702,  733,  760,  766,  779,  788,  802,  807,
        829,  839,  860,  861,  862,  902,  923,  925,  937,  942,  948,
        957,  963,  982, 1002, 1003, 1012, 1016, 1041, 1042, 1075, 1083,
       1088, 1100, 1105, 1120, 1140, 1147, 1151, 1177, 1178, 1188, 1189,
       1198, 1209, 1215, 1233, 1234, 1244, 1260, 1281, 1295, 1296, 1302,
       1303, 1316, 1329, 1386, 1388, 1399, 1406, 1427, 1438, 1443, 1448,
       1465, 1480, 1487, 1537, 1547, 1565, 1566, 1596, 1603, 1613, 1624,
       1627, 1637, 1646, 1651, 1668, 1670, 1684, 1690, 1721, 1723, 1743,
       1770, 1773, 1775, 1793, 1808, 1821, 1887, 19

In [78]:
final['prediction']=final['output_response_str'].str.contains('true')

In [91]:
patients = pd.DataFrame(range(2045))

In [111]:
patients.rename(columns={0: "patient_id"}, inplace=True)

In [113]:
patients

Unnamed: 0,patient_id,prediction
0,0,False
1,1,False
2,2,False
3,3,False
4,4,False
...,...,...
2040,2040,False
2041,2041,False
2042,2042,False
2043,2043,False


In [115]:
for i in final.loc[final['prediction']==True]['patient'].unique():
    patients.loc[patients['patient_id']==i]=True

In [116]:
patients['prediction'].value_counts()

prediction
False    1884
True      161
Name: count, dtype: int64

In [119]:
patients['prediction'].to_csv('asdf.csv', index=False, header=False)

In [34]:
s1=preds[1][0]

In [36]:
s2=preds[2][0]

In [37]:
pd.concat([s1, s2], ignore_index=True)

Unnamed: 0,output_predictions,output_probabilities,output_response,output_probability
0,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
1,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
2,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
3,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
4,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
...,...,...,...,...
251,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
252,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
253,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
254,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf


In [32]:
preds[1][0]

Unnamed: 0,output_predictions,output_probabilities,output_response,output_probability
0,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
1,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
2,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
3,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
4,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
...,...,...,...,...
123,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
124,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
125,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf
126,"[, false, , , , , , , , ]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[false],-inf


In [None]:
test_predictions = model.predict(test)

In [16]:
pd.DataFrame(preds)

Unnamed: 0,0,1
0,"output_predictions \ 0 [, fals...",results
1,"output_predictions \ 0 [, fals...",results
2,"output_predictions \ 0 [, fals...",results
3,"output_predictions \ 0 [, fals...",results
4,"output_predictions \ 0 [, fals...",results
5,"output_predictions \ 0 [, fals...",results
6,"output_predictions \ 0 [, fals...",results
7,"output_predictions \ 0 [, fals...",results
8,"output_predictions \ 0 [, fals...",results
9,"output_predictions \ 0 [, fals...",results


In [17]:
patients = {patient:False for patient in set(test['patient'])}

In [83]:
patients

{0: False,
 1: False,
 2: False,
 3: False,
 4: False,
 5: False,
 6: False,
 7: False,
 8: False,
 9: False,
 10: False,
 11: False,
 12: False,
 13: False,
 14: False,
 15: False,
 16: False,
 17: False,
 18: False,
 19: False,
 20: False,
 21: False,
 22: False,
 23: False,
 24: False,
 25: False,
 26: False,
 27: False,
 28: False,
 29: False,
 30: False,
 31: False,
 32: False,
 33: False,
 34: False,
 35: False,
 36: False,
 37: False,
 38: False,
 39: False,
 40: False,
 41: False,
 42: False,
 43: False,
 44: False,
 45: False,
 46: False,
 47: False,
 48: False,
 49: False,
 50: False,
 51: False,
 52: False,
 53: False,
 54: False,
 55: False,
 56: False,
 57: False,
 58: False,
 59: False,
 60: False,
 61: False,
 62: False,
 63: False,
 64: False,
 65: False,
 66: False,
 67: False,
 68: False,
 69: False,
 70: False,
 71: False,
 72: False,
 73: False,
 74: False,
 75: False,
 76: False,
 77: False,
 78: False,
 79: False,
 80: False,
 81: False,
 82: False,
 83: False,
 8

In [53]:
for row in zip(test.iterrows()):
    print(row[0][1])

patient                                                        0
input           the mediastinum is accentuated by low lung vo...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 0, dtype: object
patient                                                        0
input           concurrent pneumonia is difficult to exclude ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1, dtype: object
patient                                                        0
input           left lower lobe is probably substantially ate...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 2, dtype: object
patient                                                        0
input           bibasilar opacification most likely represent...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 3, dtype: object
patient                                                        0
input           examination is severely motion degraded, part..

Name: 1131, dtype: object
patient                                                       38
input           air outlines the heart and mediastinum-pneumo...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1132, dtype: object
patient                                                       38
input           soft tissue emphysema is seen in the left nec...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1133, dtype: object
patient                                                       38
input           pleural effusions and bilateral airspace opac...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1134, dtype: object
patient                                                       38
input           a small right apical pneumothorax does not ap...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1135, dtype: object
patient                                                       38
input                    

Name: 1922, dtype: object
patient                                                      116
input           there is no pneumothorax. There is no pneumot...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1923, dtype: object
patient                                                      116
input           hepatic echotexture is normal, but there is w...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1924, dtype: object
patient                                                      116
input           the right ABI was 1.19. on the left, triphasi...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1925, dtype: object
patient                                                      116
input           no evidence of arterial insufficiency to the ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 1926, dtype: object
patient                                                      116
input           the cathe

patient                                                      185
input           low lung volumes contribute to crowding of br...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 2745, dtype: object
patient                                                      185
input           mild pulmonary edema and moderate cardiomegal...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 2746, dtype: object
patient                                                      185
input           patent right internal jugular vein. hemodialy...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 2747, dtype: object
patient                                                      185
input           successful placement of a 19 cm tip-to-cuff l...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 2748, dtype: object
patient                                                      185
input               patent bilateral basilic and ce

Name: 3744, dtype: object
patient                                                      276
input           normal flow, compression and augmentation is ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 3745, dtype: object
patient                                                      276
input           no evidence of deep vein thrombosis in either...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 3746, dtype: object
patient                                                      276
input           CTA CTA CTA CTA CTA CTA CTA CTA CTA CTA CTA C...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 3747, dtype: object
patient                                                      276
input           no pulmonary embolism, no pleural effusions, ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 3748, dtype: object
patient                                                      276
input           there is 

patient                                                      365
input           multiple myeloma is a rare form of degenerati...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 4744, dtype: object
patient                                                      365
input           puncture attempted at Levels L4-L5, L5-S1 and...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 4745, dtype: object
patient                                                      365
input           lumbar puncture with attempts at L4-L5, L5-S1...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 4746, dtype: object
patient                                                      365
input           no pulmonary edema, no pneumonia, no pleural ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 4747, dtype: object
patient                                                      365
input           grade 1 anterolisthesis of L4 on L5

Name: 5733, dtype: object
patient                                                      462
input           only the left basal opacity is minimally impr...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 5734, dtype: object
patient                                                      462
input           there are no abnormally dilated loops of larg...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 5735, dtype: object
patient                                                      462
input                 normal bowel gas pattern. Mild stool load.
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 5736, dtype: object
patient                                                      462
input           the known pleural lesion on the left is bette...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 5737, dtype: object
patient                                                      462
input           linear at

Name: 6564, dtype: object
patient                                                      537
input           small amount of right pleural effusion is pre...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 6565, dtype: object
patient                                                      537
input           there is no evidence of focal liver lesions o...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 6566, dtype: object
patient                                                      537
input           the main portal vein has normalized velocity ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 6567, dtype: object
patient                                                      537
input           right central venous line tip is at the cavoa...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 6568, dtype: object
patient                                                      537
input           there is 

Name: 7406, dtype: object
patient                                                      613
input           there is moderate bibasilar bronchial wall th...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 7407, dtype: object
patient                                                      613
input                            no acute intrathoracic process.
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 7408, dtype: object
patient                                                      614
input           hematoma measuring up to 10 mm in thickness f...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 7409, dtype: object
patient                                                      614
input           hematoma thickness has increased to 10 mm com...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 7410, dtype: object
patient                                                      614
input           right sub

Name: 8260, dtype: object
patient                                                      690
input           pigtail is essentially unchanged in size, wit...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 8261, dtype: object
patient                                                      690
input           the patient was brought to the CT suite and l...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 8262, dtype: object
patient                                                      690
input           successful upsizing of the 10 ___ ___ cathete...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 8263, dtype: object
patient                                                      690
input           Previously identified right IJ line is no lon...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 8264, dtype: object
patient                                                      690
input                    

Name: 9241, dtype: object
patient                                                      770
input           moderate-to-severe pulmonary edema likely car...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 9242, dtype: object
patient                                                      770
input           a fracture of the T10 vertebral body with sma...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 9243, dtype: object
patient                                                      770
input           there is marked cardiomegaly with worsening p...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 9244, dtype: object
patient                                                      770
input           status post right IJ line placement without e...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 9245, dtype: object
patient                                                      770
input          Over last 

Name: 10227, dtype: object
patient                                                      851
input                                   no pneumothorax is seen.
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 10228, dtype: object
patient                                                      851
input           pulmonary vascular congestion and probable sm...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 10229, dtype: object
patient                                                      851
input           the size of the cardiac silhouette is unchang...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 10230, dtype: object
patient                                                      851
input           the tip of a new right internal jugular centr...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 10231, dtype: object
patient                                                      851
input               

Name: 11048, dtype: object
patient                                                      923
input           edema of colon is similar to prior allowing f...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 11049, dtype: object
patient                                                      923
input           scattered ground-glass opacities throughout b...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 11050, dtype: object
patient                                                      923
input           patent right internal jugular vein. catheter ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 11051, dtype: object
patient                                                      923
input           the catheter terminates in the distal superio...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 11052, dtype: object
patient                                                      923
input           ther

Name: 11946, dtype: object
patient                                                      999
input           clear right lung with no pleural effusions or...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 11947, dtype: object
patient                                                      999
input           successful placement of a 16 ___ MIC gastroje...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 11948, dtype: object
patient                                                      999
input           successful placement of a 16 ___ MIC gastroje...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 11949, dtype: object
patient                                                      999
input           the nasogastric tube has been removed. no def...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 11950, dtype: object
patient                                                      999
input           ther

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




patient                                                     1405
input           pleural effusions are resolved with an unchan...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 16540, dtype: object
patient                                                     1405
input           a very small left pleural effusion is unchang...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 16541, dtype: object
patient                                                     1405
input           effusion is stable, but no appreciable right ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 16542, dtype: object
patient                                                     1405
input           a small right pneumothorax has decreased in s...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 16543, dtype: object
patient                                                     1405
input           pigtail pleural catheter remov

Name: 17491, dtype: object
patient                                                     1502
input           compared to postoperative chest radiograph __...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 17492, dtype: object
patient                                                     1502
input           atelectatic changes are seen at the left lung...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 17493, dtype: object
patient                                                     1502
input           As above As above As above As above As above ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 17494, dtype: object
patient                                                     1502
input           there is no evidence of pneumothorax. there i...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 17495, dtype: object
patient                                                     1502
input           ther

Name: 18387, dtype: object
patient                                                     1581
input           a tube with its tip approximately 2.4 cm abov...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 18388, dtype: object
patient                                                     1581
input                            New right-sided New right-sided
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 18389, dtype: object
patient                                                     1581
input           no evidence of hemorrhage, edema, mass effect...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 18390, dtype: object
patient                                                     1581
input           no obvious evidence of cerebral edema related...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 18391, dtype: object
patient                                                     1581
input           a sm

Name: 19287, dtype: object
patient                                                     1653
input           no evidence of hemorrhage in the cervical spi...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 19288, dtype: object
patient                                                     1653
input           if the patient has tenderness over the AC joi...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 19289, dtype: object
patient                                                     1653
input           mass centered within right L4-L5 neural foram...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 19290, dtype: object
patient                                                     1653
input           mass centered at the right L4-L5 neural foram...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 19291, dtype: object
patient                                                     1653
input           ther

Name: 20235, dtype: object
patient                                                     1710
input                                aspiration of thin liquids.
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 20236, dtype: object
patient                                                     1711
input           hazy opacity projecting over right mid to upp...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 20237, dtype: object
patient                                                     1711
input           opacities in the lungs as described above, be...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 20238, dtype: object
patient                                                     1711
input           there is no evidence of infarction, edema, or...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 20239, dtype: object
patient                                                     1711
input           soft

Name: 21191, dtype: object
patient                                                     1798
input           pulmonary edema has improved with the study o...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 21192, dtype: object
patient                                                     1798
input           -Multiple enlarging low-density lesions in th...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 21193, dtype: object
patient                                                     1798
input           placement of a nasogastric tube loops slightl...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 21194, dtype: object
patient                                                     1798
input           -No evidence of pulmonary embolism or aortic ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 21195, dtype: object
patient                                                     1798
input           righ

Name: 22090, dtype: object
patient                                                     1885
input           evidence of prior trauma is present in a righ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 22091, dtype: object
patient                                                     1885
input           evidence of prior trauma is a consideration. ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 22092, dtype: object
patient                                                     1885
input           no lymphadenopathy, mediastinal hematoma, or ...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 22093, dtype: object
patient                                                     1885
input           no evidence of acute trauma. 2. Hepatic steat...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 22094, dtype: object
patient                                                     1885
input           a fr

Name: 22996, dtype: object
patient                                                     1974
input           no evidence of active angiograph extravasatio...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 22997, dtype: object
patient                                                     1974
input           atelectasis is a small-to-moderate sized left...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 22998, dtype: object
patient                                                     1974
input           no evidence of active bleeding noted in the a...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 22999, dtype: object
patient                                                     1974
input           possibility of supervening pneumonia at the l...
instruction    Acute Respiratory Distress Syndrome (ARDS) pat...
Name: 23000, dtype: object
patient                                                     1975
input           ther

In [12]:
patients = {patient:False for patient in set(test['patient'])}
for row, prediction in zip(test.iterrows(), preds[0]['output_response']):
  patient = row[1]['patient']
  if prediction[0].strip().lower() == "true":
    patients[patient] = True

TypeError: tuple indices must be integers or slices, not str

### **Output results**

In [23]:
test_results = pd.DataFrame([str(result) for patient, result in patients.items()])
test_results.to_csv("test_result.csv", header=False, index=False)