In [1]:
import torch
import os, json

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Running using GPU")
else:
    device = torch.device("cpu")
    print("Running using CPU")

MAIN_DIR = ".."

Running using GPU


In [2]:
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

# Llama-7B

## Load in Model Weights

In [3]:
from transformers import LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model_path_7b = os.path.join(MAIN_DIR, "pretrained", "llama", "7B_HF")
model_path_13b = os.path.join(MAIN_DIR, "pretrained", "llama", "13B_HF")
model_path_30b = os.path.join(MAIN_DIR, "pretrained", "llama", "30B_HF")

# model = LlamaForCausalLM.from_pretrained(model_path_7b)
# tokenizer = LlamaTokenizer.from_pretrained(model_path_7b)

# model = LlamaForCausalLM.from_pretrained(model_path_13b)
# tokenizer = LlamaTokenizer.from_pretrained(model_path_13b)

model = LlamaForCausalLM.from_pretrained(model_path_30b)
tokenizer = LlamaTokenizer.from_pretrained(model_path_30b)

generation_configs = GenerationConfig(
    temperature = 0.5,
    top_p = 0.75,
    num_beams = 5,
    max_new_tokens = 101,
    min_new_tokens = 100
)

llm_pipeline = pipeline(
    task = "text-generation",
    model = model,
    tokenizer = tokenizer,
    generation_config = generation_configs
)

llm = HuggingFacePipeline(pipeline=llm_pipeline)

## Benchmark on Short Prompt

In [38]:
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from typing import Dict, Optional

In [51]:
def count_token_from_prompt(
    prompt: str,
    tokenizer: PreTrainedTokenizerBase,
    output_text: Optional[str] = None
) -> Dict:
    input_tokens = tokenizer(prompt, return_tensors="pt")
    input_token_count = len(input_tokens["input_ids"][0])
    if output_text:
        output_tokens = tokenizer(output_text, return_tensors="pt")
        output_token_count = len(output_tokens["input_ids"][0])
    else:
        output_token_count = 0
    
    total_token_count = input_token_count + output_token_count
                    
    return {"prompt_tokens": input_token_count, "generated_tokens": output_token_count, "total_tokens": total_token_count}

In [63]:
# system_prompt = """
# You are a helpful and honest AI Assistant.
# =====================
# TASK:
# Your task is to answer questions provided by human.
# """

# human_prompt = "Question: {question}"

# SHORT_PROMPT = ChatPromptTemplate.from_messages(
#     [
#         SystemMessagePromptTemplate.from_template(system_prompt),
#         HumanMessagePromptTemplate.from_template(human_prompt)
#     ]
# )

# template_question = "Give a brief introduction of Singapore history."
# print(SHORT_PROMPT.format(question = template_question))

prompt = """
You are a helpful and honest AI Assistant.
=====================
TASK:
Your task is to answer questions provided by human.
=====================
Question: {question}

Answer: {prefix}
"""

SHORT_PROMPT = PromptTemplate.from_template(
    template = prompt,
)

template_question = "Give a brief introduction of Singapore history."
prefix = "Singapore history starts in year 1819 when"

print(SHORT_PROMPT.format(question=template_question, prefix=prefix))


You are a helpful and honest AI Assistant.
TASK:
Your task is to answer questions provided by human.
Question: Give a brief introduction of Singapore history.

Answer: Singapore history starts in year 1819 when



In [84]:
llama_7b_qa_chain = LLMChain(
    llm=llm,
    prompt=SHORT_PROMPT,
    verbose=True
)

In [79]:
%timeit
answers = []
response = llama_7b_qa_chain({"question": template_question, "prefix": prefix})
answers.append(response)
print(response["text"])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a helpful and honest AI Assistant.
TASK:
Your task is to answer questions provided by human.
Question: Give a brief introduction of Singapore history.

Answer: Singapore history starts in year 1819 when
[0m

[1m> Finished chain.[0m
\begin{itemize}
\item Sir Stamford Raffles
\end{itemize}

arrived in Singapore.
\begin{itemize}
\item Sir Stamford Raffles
\end{itemize}

arrived in Singapore.
\begin{itemize}
\item Sir Stamford Raffles
\end{itemize}

arrived in Singapore.
\begin{itemize}
\item Sir Stam


In [80]:
print(answers[0]["text"])

\begin{itemize}
\item Sir Stamford Raffles
\end{itemize}

arrived in Singapore.
\begin{itemize}
\item Sir Stamford Raffles
\end{itemize}

arrived in Singapore.
\begin{itemize}
\item Sir Stamford Raffles
\end{itemize}

arrived in Singapore.
\begin{itemize}
\item Sir Stam


In [81]:
count_token_from_prompt(
    SHORT_PROMPT.format(question=template_question, prefix=prefix),
    tokenizer=tokenizer,
    output_text=answers[0]["text"]
    )

{'prompt_tokens': 65, 'generated_tokens': 102, 'total_tokens': 167}

## Benchmark on Long Prompt

In [11]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import TextLoader

In [12]:
document_folder = os.path.join(MAIN_DIR, "llm-workshops", "SGH", "example2")
document_paths = [os.path.join(document_folder, file_path) for file_path in os.listdir(document_folder)] # Generate the list of paths to all doc

all_meetings = []
for document_path in document_paths:
    all_meetings.extend(TextLoader(document_path, encoding = "utf8").load()) # TextLoader Module will read the content of the text files 

print("Number of loaded document:", len(all_meetings))

Number of loaded document: 4


In [13]:
first_meeting = all_meetings[0]
print(first_meeting.page_content)


Doctor: Good morning, my name is Dr. Anderson. How can I assist you today?

Patient: Good morning, Doctor. I've been experiencing some abdominal pain lately, and I've noticed some blood in my stool. I'm quite worried about it.

Doctor: I understand. It's good that you've come to see me. Can you describe the pain you are feeling?

Patient: It's a kind of sharp pain that comes and goes. It's mainly in the lower part of my abdomen.

Doctor: And can you tell me how long this has been going on?

Patient: I'd say for about two weeks now.

Doctor: Okay, I see. I understand that this can be concerning. We'll get to the bottom of this. Now, I'm going to ask you a few questions to help me understand your situation better. May I know your age, please?

Patient: I'm 55 years old.

Doctor: Thank you. Are you currently taking any medication or do you have any known allergies?

Patient: No, I'm not on any medication. As for allergies, I'm allergic to penicillin.

Doctor: Noted. And have you previous

In [28]:
summarize_prompt = """You are an honest and detail-oriented AI Medical Assistant. If you do not know the answers, just say I don't know, do not make up an answer.
=========================
TASK: Your task is to summarize the conversation between the doctor and the patient for doctors' future refence for treatments/recommendations.
Note that your summary should be concise while including details on patients and treatments.
========================
OUTPUT INSTRUCTION: Your summary should include, if applicable:
- Patient's details such as age, gender, ethnicity and family member history
- Living habit such as eating, drinking, smoking
- Previous medical conditions, treatment, medication
- Current Symptoms & Treatment recommendation.

Keep your summary in fewer than 300 words.
=======================
CONVERSATION:
{text}
=======================
SUMMARY:
"""

custom_summarize_prompt = PromptTemplate.from_template(
    summarize_prompt
)

custom_summarize_chain = load_summarize_chain(
    llm = llm,
    prompt = custom_summarize_prompt,
    chain_type = "stuff",
    verbose = True
    )

In [29]:
documents_str = "\n\n".join(document.page_content for document in all_meetings[:2])

prompt_str = summarize_prompt.format(text = documents_str)
len(tokenizer(prompt_str)["input_ids"])

1986

In [30]:
custom_summary_all = custom_summarize_chain(all_meetings[:2]) # Send API request & extract content of API response



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an honest and detail-oriented AI Medical Assistant. If you do not know the answers, just say I don't know, do not make up an answer.
TASK: Your task is to summarize the conversation between the doctor and the patient for doctors' future refence for treatments/recommendations.
Note that your summary should be concise while including details on patients and treatments.
OUTPUT INSTRUCTION: Your summary should include, if applicable:
- Patient's details such as age, gender, ethnicity and family member history
- Living habit such as eating, drinking, smoking
- Previous medical conditions, treatment, medication
- Current Symptoms & Treatment recommendation.

Keep your summary in fewer than 300 words.
CONVERSATION:

Doctor: Good morning, my name is Dr. Anderson. How can I assist you today?

Patient: Good morning, Doctor. I've been experiencing some abd

In [32]:
print(custom_summary_all["output_text"])

Doctor: Good morning, my name is Dr. Anderson. How can I assist you today?

Patient: Good morning, Doctor. I've been experiencing some abdominal pain lately, and I've noticed some blood in my stool. I'm quite worried about it.

Doctor: I understand. It's good that you've come to see me. Can you describe the pain you are feeling?

Patient: It
