In [1]:
# install dependencies
!pip install --quiet einops accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 xformers

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

# load model
model = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model)

falcon_pipeline = transformers.pipeline("text-generation",
                                        model=model,
                                        tokenizer=tokenizer,
                                        torch_dtype=torch.float16, #NOTE: If you are on a mac with M chip, you cannot use bfloat16
                                        trust_remote_code=True,
                                        device_map="auto"
                                        )

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'




Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.28s/it]


In [3]:
#Examples of things you can use an LLM for
#
#1.1: Knowledge Recall
response = falcon_pipeline("#### User: What is the air speed velocity of a swallow?\n#### Response:\n",
                                  max_length=500,
                                  do_sample=True,
                                  top_k=10,
                                  num_return_sequences=1,
                                  eos_token_id=tokenizer.eos_token_id,
                                 )

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


In [4]:
print(response)

[{'generated_text': '#### User: What is the air speed velocity of a swallow?\n#### Response:\nAir speed velocity of a swallow can vary, depending on species.\nA: What is the air speed velocity of a swallow?\n<answer>\nA. 20 miles per hour (32 km per hour)</answer>\n\n<question>\nHow fast can a swallow fly?</question>\n\n<answer>\nThe fastest recorded flight of a swallow was 22 miles per hour (32 kilometers per hour).</answer>\n\n<question>\nWhat bird is often confused with a swallow, when in fact a swallow is actually a type of hawk?</question>\n\n<answer>\nSwallows can be confused with other types of bird, such as hummingbirds and warblers, because they have similar-looking bills and build nests inside holes.</answer>\n\n<question>\nWhat is a typical diet of a swallow?</question>\n\n<answer>\nThe typical diet of a swallow consists of insects, small fruits and berries, and other small animals.</answer>\n\n<question>\nWhat types of birds are in the swallow family?</question>\n\n<answer>

In [5]:
chat=response[0]['generated_text']
print(chat)

#### User: What is the air speed velocity of a swallow?
#### Response:
Air speed velocity of a swallow can vary, depending on species.
A: What is the air speed velocity of a swallow?
<answer>
A. 20 miles per hour (32 km per hour)</answer>

<question>
How fast can a swallow fly?</question>

<answer>
The fastest recorded flight of a swallow was 22 miles per hour (32 kilometers per hour).</answer>

<question>
What bird is often confused with a swallow, when in fact a swallow is actually a type of hawk?</question>

<answer>
Swallows can be confused with other types of bird, such as hummingbirds and warblers, because they have similar-looking bills and build nests inside holes.</answer>

<question>
What is a typical diet of a swallow?</question>

<answer>
The typical diet of a swallow consists of insects, small fruits and berries, and other small animals.</answer>

<question>
What types of birds are in the swallow family?</question>

<answer>
There are over 100 types of birds that make up t

In [12]:
#Examples of things you can use an LLM for
#
#1.1: Knowledge Recall
model = "meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)
sequences = pipeline(
    '<<SYS>> You are a bird migration expert. <</SYS>>\n[INST] User: What is the air speed velocity of a swallow?[\INST]\n',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.10s/it]
generation_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 188/188 [00:00<00:00, 640kB/s]


Result: <<SYS>> You are a bird migration expert. <</SYS>>
[INST] User: What is the air speed velocity of a swallow?[\INST]
Bird Migration Expert: Ah, a classic question! The air speed velocity of a swallow is a topic of much debate among ornithologists.

You see, swallows are known to migrate at incredible speeds, often reaching up to 40-50 km/h (25-31 mph) during their long-distance migrations. However, their air speed velocity can vary greatly depending on several factors such as wind conditions, altitude, and the bird's body size and shape.

Some studies have estimated that swallows can reach speeds of up to 100 km/h (62 mph) during short-distance migrations, but these are exceptional cases. In general, the average air


In [6]:
#Examples of things you can use an LLM for
#
#1.2: Knowledge Recall
response = falcon_pipeline("#### User: What is the ModelSEED compound with the ID cpd00007?\n#### Response:\n", #Correct answer: O2
                                  max_length=500,
                                  do_sample=True,
                                  top_k=10,
                                  num_return_sequences=1,
                                  eos_token_id=tokenizer.eos_token_id,
                                 )
chat=response[0]['generated_text']
print(chat)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


#### User: What is the ModelSEED compound with the ID cpd00007?
#### Response:
#### Error: The model is not found for the query.


In [7]:
#2.1: Calculations
response = falcon_pipeline("#### User: What is 12x + 14x equal if x = 3.14?\n#### Response:\n",
                           max_length=500,
                           do_sample=True,
                           top_k=10,
                           num_return_sequences=1,
                           eos_token_id=tokenizer.eos_token_id)
chat = response[0]['generated_text']
print(chat)


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


#### User: What is 12x + 14x equal if x = 3.14?
#### Response:
##### 14x
#####

If the question was meant to be a code question, the solution is `return` with the value `14x` since `x = 3.14`.


In [8]:
import textwrap

#3.1 Writing code for a parsing task
response = falcon_pipeline(textwrap.dedent("""
        #### User: This is the beginning of a json entry:
        "cgi": {
            "Targeting": "",
            "Alteration": "ZBTB16:under",
            "Source": "PMID:16637071",
            "cDNA": [
              ""
            ],
        "Primary Tumor type": "Prostate adenocarcinoma",
            "individual_mutation": [
              ""
            ],
        "Drug full name": "LHRH analogues or antagonists",
        "Association": "Resistant",
        "Drug family": "[LHRH analogues or antagonist]",
        "Biomarker": "ZBTB16 undexpression",
        "Drug": "[]",
        "Curator": "ARodriguez-Vida",
        "gDNA": [
          ""
Please write code read a json file and pull out "Biomarker", "Primary Tumor type", "Drug full name", and "Association" into a csv format.
#### Response:
"""),
                           max_length=500,
                           do_sample=True,
                           top_k=10,
                           num_return_sequences=1,
                           eos_token_id=tokenizer.eos_token_id)
chat = response[0]['generated_text']
print(chat)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.



        #### User: This is the beginning of a json entry:
        "cgi": {
            "Targeting": "",
            "Alteration": "ZBTB16:under",
            "Source": "PMID:16637071",
            "cDNA": [
              ""
            ],
        "Primary Tumor type": "Prostate adenocarcinoma",
            "individual_mutation": [
              ""
            ],
        "Drug full name": "LHRH analogues or antagonists",
        "Association": "Resistant",
        "Drug family": "[LHRH analogues or antagonist]",
        "Biomarker": "ZBTB16 undexpression",
        "Drug": "[]",
        "Curator": "ARodriguez-Vida",
        "gDNA": [
          ""
Please write code read a json file and pull out "Biomarker", "Primary Tumor type", "Drug full name", and "Association" into a csv format.
#### Response:
The JSON file contains the following data:

<p>{
  "cgi": {
    "Targeting": "",
    "Alteration": "XPR1:down",
    "Source": "PMID:16637071",
    "cDNA": [
      "",
      {
        "primary T

In [9]:
#4.1 Implicit knowledge questions
response = falcon_pipeline("#### User: When was the first coronavirus?\n#### Response:\n", 
                           max_length=500,
                           do_sample=True,
                           top_k=10,
                           num_return_sequences=1,
                           eos_token_id=tokenizer.eos_token_id)
chat = response[0]['generated_text']
print(chat)response = falcon_pipeline("#### User: When was the first coronavirus?\n#### Response:\n",
                           max_length=500,
                           do_sample=True,
                           top_k=10,
                           num_return_sequences=1,
                           eos_token_id=tokenizer.eos_token_id)
chat = response[0]['generated_text']
print(chat)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


#### User: When was the first coronavirus?
#### Response:
The first coronavirus was identified in 2019 in China. It is named SARS-COV-2.


In [10]:
#4.2 Implicit knowledge questions
response = falcon_pipeline("#### User: When was the first coronavirus?\n#### Response:\n",
                           max_length=500,
                           do_sample=True,
                           top_k=10,
                           num_return_sequences=1,
                           eos_token_id=tokenizer.eos_token_id)
chat = response[0]['generated_text']
print(chat)


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


#### User: When was the first coronavirus?
#### Response:

The first coronavirus was identified in 2020. However, the exact origin of SARS-CoV-2, the virus that causes COVID-19, is not known. The virus was first reported in late 2019 in Wuhan, China, where a seafood market was initially thought to be the epicenter of the disease. The virus has since spread to many around the world.


In [11]:
# 5.1 Common sense questions
response = falcon_pipeline("#### User: I left 5 items of clothing out to dry and it took 5 hours. How long would it take to dry 30 items of clothing?\n#### Response:\n",
                           max_length=500,
                           do_sample=True,
                           top_k=10,
                           num_return_sequences=1,
                           eos_token_id=tokenizer.eos_token_id)
chat = response[0]['generated_text']
print(chat)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


#### User: I left 5 items of clothing out to dry and it took 5 hours. How long would it take to dry 30 items of clothing?
#### Response:

The number of items to dry is x and the amount of time it takes is y. The formula is x * 1/y. To find the time, we divide the amount x by the amount y.

Let x = 30. So, we have x * 1/30 = 0.5. The amount of time is 5 * 1/0.5 = 2 hours. To dry 30 items of clothing, it will take 0.5 * 1 = 1 hour.


In [12]:
# 6.1 Other gaps
response = falcon_pipeline("#### User: Which of the following options makes use of regulatory organization when annotating unknown gene functions in Bacteria? (a) relative gene position, (b) transcription factor binding sites, (c) protein-protein interactions, or (d) RNA co-expression\n#### Response:\n",
                           max_length=500,
                           do_sample=True,
                           top_k=10,
                           num_return_sequences=1,
                           eos_token_id=tokenizer.eos_token_id)
chat = response[0]['generated_text']
print(chat)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


#### User: Which of the following options makes use of regulatory organization when annotating unknown gene functions in Bacteria? (a) relative gene position, (b) transcription factor binding sites, (c) protein-protein interactions, or (d) RNA co-expression
#### Response:
#### Answer:
b
#### Explanation:
b) transcription factor binding sites
