In [None]:
import pandas as pd
from google.colab import files
data_to_load = files.upload()

Saving dataset.csv to dataset.csv


In [None]:
import io
dataset = pd.read_csv(io.BytesIO(data_to_load['dataset.csv']))
dataset

Unnamed: 0,id,abstract,summary
0,1,BACKGROUND: Xuesaitong dispersible tablet (XST...,"Aspirin, Xuesaitong dispersible tablet"
1,2,Arterial hypertension is a major risk factor f...,"Aspirin, antihypertensive drugs"
2,3,Traditional Chinese medicine (TCM)-based herba...,"Aspirin, herbs"
3,4,INTRODUCTION: The aging of the population in t...,"Aspirin, NSAIDs"
4,5,The effects of antibiotics on the intestinal f...,"Aspirin, amoxicillin"
5,6,BACKGROUND: It is unclear whether the combinat...,"Aspirin, salvianolate"


In [None]:
!pip install --upgrade pip
!pip install --disable-pip-version-check torch==1.13.1 torchdata==0.5.1 --quiet
!pip install transformers==4.27.2 datasets==2.11.0 --quiet #library from huggingface

[0m

In [None]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig

In [None]:
model_name = 'google/flan-t5-base'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) # tokenizes

In [None]:
dash_line='-'.join('' for x in range(100))


for i in range(len(dataset)):
  abstract = dataset['abstract'][i]
  summary = dataset['summary'][i]

  inputs = tokenizer(abstract, return_tensors='pt')
  output = tokenizer.decode(
      model.generate(
          inputs["input_ids"],
          max_new_tokens=50,
      )[0],
      skip_special_tokens=True
  )

  print(dash_line)
  print('Example ',str(i+1))
  print(dash_line)
  print(f'INPUT PROMPT:\n{abstract}')
  print(dash_line)
  print(f'BASELINE HUMAN SUMMARY:\n{summary}')
  print(dash_line)
  print(f"MODEL GENERATION - WITHOUT PROMPT ENGINEERING: \n{output}\n")

---------------------------------------------------------------------------------------------------
Example  1
---------------------------------------------------------------------------------------------------
INPUT PROMPT:
BACKGROUND: Xuesaitong dispersible tablet (XST) product has been clinically proven to be effective for treating cardio-cerebrovascular disease. Furthermore, herb-drug interactions between the XST product and drugs that are commonly co-administered, such as aspirin (ASA), must be explored to ensure safe clinical use. STUDY DESIGN AND METHODS: The current study aims to investigate whether the XST product interacts with ASA when they are administered concomitantly to ensure safety and efficacy. A ultra-high performance liquid chromatography-tandem mass spectrometry (UHPLC-MS/MS) method was developed for the simultaneous determination of ginsenoside Rg1 (Rg1), ginsenoside Rd (Rd), notoginsenoside R1 (R1) and salicylic acid (SA) in rat plasma to investigate the pharmaco

In [None]:
#Zero Shot Inference with an Instruction Prompt - In-context learning - "Interactions" prompt
for i in range(len(dataset)):
  abstract = dataset['abstract'][i]
  summary = dataset['summary'][i]

  prompt = f"""
List the main drugs that interact.

{abstract}

Drug-drug interactions:
  """

  inputs = tokenizer(prompt, return_tensors='pt')
  output = tokenizer.decode(
      model.generate(
          inputs["input_ids"],
          max_new_tokens=50,
      )[0],
      skip_special_tokens=True
  )

  print(dash_line)
  print('Example ',str(i+1))
  print(dash_line)
  print(f'INPUT PROMPT:\n{prompt}')
  print(dash_line)
  print(f'BASELINE HUMAN SUMMARY:\n{summary}')
  print(dash_line)
  print(f"MODEL GENERATION - ZERO SHOT LEARNING: \n{output}\n")

---------------------------------------------------------------------------------------------------
Example  1
---------------------------------------------------------------------------------------------------
INPUT PROMPT:

List the main drugs that interact.

BACKGROUND: Xuesaitong dispersible tablet (XST) product has been clinically proven to be effective for treating cardio-cerebrovascular disease. Furthermore, herb-drug interactions between the XST product and drugs that are commonly co-administered, such as aspirin (ASA), must be explored to ensure safe clinical use. STUDY DESIGN AND METHODS: The current study aims to investigate whether the XST product interacts with ASA when they are administered concomitantly to ensure safety and efficacy. A ultra-high performance liquid chromatography-tandem mass spectrometry (UHPLC-MS/MS) method was developed for the simultaneous determination of ginsenoside Rg1 (Rg1), ginsenoside Rd (Rd), notoginsenoside R1 (R1) and salicylic acid (SA) in r

In [None]:
def make_prompt(example_indices_full, example_index_to_summarize):
  prompt = ''
  for i in example_indices_full:
    abstract = dataset['abstract'][i]
    summary = dataset['summary'][i]

    prompt += f"""
Abstract {str(i+1)}:

{abstract}

List the drugs that interact.

{summary}
{dash_line}
"""
  for i in example_index_to_summarize:
    abstract = dataset['abstract'][i]

    prompt += f"""
Abstract {str(i+1)}:

{abstract}

List the drugs that interact.

"""

  return prompt


In [None]:
one_shot_example = [0]
one_shot_summary = [5]

one_shot_prompt = make_prompt(one_shot_example, one_shot_summary)

print (one_shot_prompt)


Abstract 1:

BACKGROUND: Xuesaitong dispersible tablet (XST) product has been clinically proven to be effective for treating cardio-cerebrovascular disease. Furthermore, herb-drug interactions between the XST product and drugs that are commonly co-administered, such as aspirin (ASA), must be explored to ensure safe clinical use. STUDY DESIGN AND METHODS: The current study aims to investigate whether the XST product interacts with ASA when they are administered concomitantly to ensure safety and efficacy. A ultra-high performance liquid chromatography-tandem mass spectrometry (UHPLC-MS/MS) method was developed for the simultaneous determination of ginsenoside Rg1 (Rg1), ginsenoside Rd (Rd), notoginsenoside R1 (R1) and salicylic acid (SA) in rat plasma to investigate the pharmacokinetic interaction of XST and ASA in blood stasis model rats. RESULTS AND CONCLUSION: The ASA and XST combination noticeably altered R1 and Rg1 absorption, distribution and disposition. This study indicates tha

In [None]:
#One Shot Inference with an Instruction Prompt - In-context learning
for i in one_shot_summary:
  summary = dataset['summary'][i]

  inputs = tokenizer(one_shot_prompt, return_tensors='pt')
  output = tokenizer.decode(
      model.generate(
          inputs["input_ids"],
          max_new_tokens=50,
      )[0],
      skip_special_tokens=True
  )

  print(dash_line)
  print(f'BASELINE HUMAN SUMMARY:\n{summary}')
  print(dash_line)
  print(f"MODEL GENERATION - ONE SHOT: \n{output}\n")

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
Aspirin, salvianolate
---------------------------------------------------------------------------------------------------
MODEL GENERATION - ONE SHOT: 
pharmacokinetics and pharmacodynamics of salvianolate and aspirin in patients with coronary heart disease ------------------------------------------------------------------



In [None]:
#Few Shot Inference
few_shot_examples = [0, 1, 2]
few_shot_summary = [5]

few_shot_prompt = make_prompt(few_shot_examples, few_shot_summary)

print (few_shot_prompt)


Abstract 1:

BACKGROUND: Xuesaitong dispersible tablet (XST) product has been clinically proven to be effective for treating cardio-cerebrovascular disease. Furthermore, herb-drug interactions between the XST product and drugs that are commonly co-administered, such as aspirin (ASA), must be explored to ensure safe clinical use. STUDY DESIGN AND METHODS: The current study aims to investigate whether the XST product interacts with ASA when they are administered concomitantly to ensure safety and efficacy. A ultra-high performance liquid chromatography-tandem mass spectrometry (UHPLC-MS/MS) method was developed for the simultaneous determination of ginsenoside Rg1 (Rg1), ginsenoside Rd (Rd), notoginsenoside R1 (R1) and salicylic acid (SA) in rat plasma to investigate the pharmacokinetic interaction of XST and ASA in blood stasis model rats. RESULTS AND CONCLUSION: The ASA and XST combination noticeably altered R1 and Rg1 absorption, distribution and disposition. This study indicates tha

In [None]:
#Few Shot Inference with an Instruction Prompt - In-context learning
for i in few_shot_summary:
  summary = dataset['summary'][i]
  inputs = tokenizer(few_shot_prompt, return_tensors='pt')
  output = tokenizer.decode(
      model.generate(
          inputs["input_ids"],
          max_new_tokens=50,
      )[0],
      skip_special_tokens=True
  )

  print(dash_line)
  print(f'BASELINE HUMAN SUMMARY:\n{summary}')
  print(dash_line)
  print(f"MODEL GENERATION - FEW SHOT: \n{output}\n")

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
Aspirin, salvianolate
---------------------------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT: 
Aspirin, salvianolate, and aspirin in coronary heart disease.



In [None]:
# Change conservativeness/creativity of responses.
generation_config = GenerationConfig(max_new_tokens=50)
# generation_config = GenerationConfig(max_new_tokens=10)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)
generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)
# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}')
print(dash_line)
print(f"MODEL GENERATION - FEW SHOT: \n{output}\n")


---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
Aspirin, salvianolate
---------------------------------------------------------------------------------------------------
MODEL GENERATION - FEW SHOT: 
Aspirin, salvianolate, and aspirin in coronary heart disease.

