In [1]:
import torch
import transformers

from transformers import LlamaForCausalLM, LlamaTokenizer

In [2]:
model_dir = "/data/yingfei/models/llm/llama2/llama/llama-2-7b-chat-hf"
model = LlamaForCausalLM.from_pretrained(model_dir)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
tokenizer = LlamaTokenizer.from_pretrained(model_dir)

In [4]:
pipeline = transformers.pipeline(
    "text-generation",

    model=model,

    tokenizer=tokenizer,

    torch_dtype=torch.float16,

    device_map="auto",

)

In [5]:
sequences = pipeline(
    'I have tomatoes, basil and cheese at home. What can I cook for dinner?\n',

    do_sample=True,

    top_k=10,

    num_return_sequences=1,

    eos_token_id=tokenizer.eos_token_id,

    max_length=400,
)

for seq in sequences:
    print(f"{seq['generated_text']}")

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


I have tomatoes, basil and cheese at home. What can I cook for dinner?

Answer: With the ingredients you have, you can make a delicious Tomato and Basil Pasta. Here's a simple recipe you can follow:

Ingredients:

* 1 lb pasta (spaghetti or linguine work well)
* 2 cups tomato sauce (homemade or store-bought)
* 1/4 cup fresh basil leaves, chopped
* 1/4 cup grated Parmesan cheese
* Salt and pepper to taste
* Olive oil for greasing the pan

Instructions:

1. Bring a large pot of salted water to boil and cook the pasta according to the package instructions.
2. While the pasta cooks, heat a tablespoon of olive oil in a large skillet over medium heat.
3. Add the chopped basil to the skillet and sautÃ© for 1-2 minutes until fragrant.
4. Add the tomato sauce to the skillet and stir to combine with the basil.
5. Drain the cooked pasta and add it to the skillet. Toss the pasta in the tomato sauce until well coated.
6. Transfer the pasta and sauce to a serving dish and sprinkle the Parmesan chees

In [20]:
### test the cancer drug sensitivity prompt
sequences = pipeline(
    'Think step by step and decide in a single word reflecting the drug sensitivity of the drug on the cell line with given mutations: [Sensitive/Resistant], [Reasoning].\nDrug and cell line mutations: \nThe drug is YK-4-279. The drug SMILES structure is COC1=CC=C(C=C1)C(=O)CC2(C3=C(C=CC(=C3NC2=O)Cl)Cl)O. Drug target is RNA helicase A. Drug target pathway is Other.\nThe mutations of the cell line are NOTCH1, NOTCH3, PIK3R1, PPP2R1A, TP53, TSC2, WHSC1L1.\nDrug Sensitivity: ?',

    do_sample=True,

    top_k=10,

    num_return_sequences=1,
    
    eos_token_id=tokenizer.eos_token_id,

    max_length=400,
)

for seq in sequences:
    print(f"{seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Think step by step and decide in a single word reflecting the drug sensitivity of the drug on the cell line with given mutations: [Sensitive/Resistant], [Reasoning].
Drug and cell line mutations: 
The drug is YK-4-279. The drug SMILES structure is COC1=CC=C(C=C1)C(=O)CC2(C3=C(C=CC(=C3NC2=O)Cl)Cl)O. Drug target is RNA helicase A. Drug target pathway is Other.
The mutations of the cell line are NOTCH1, NOTCH3, PIK3R1, PPP2R1A, TP53, TSC2, WHSC1L1.
Drug Sensitivity: ?
Reasoning:?

Please help me solve this problem by providing the correct answer for each drug and cell line mutation.


In [21]:
### test the cancer drug recommendation prompt
sequences = pipeline(
    'Think step by step and decide the best drug option for the cell line with given mutations: [Drug Name], [Reasoning].\
Drug 1: The drug is BRYOSTATIN-1. The drug SMILES structure is CCCC=CC=CC(=O)OC1C(=CC(=O)OC)CC2CC(OC(=O)CC(CC3CC(C(C(O3)(CC4CC(=CC(=O)OC)CC(O4)C=CC(C1(O2)O)(C)C)O)(C)C)OC(=O)C)O)C(C)O. Drug target is Unknown. Drug target pathway is Unknown.\
Drug 2: The drug is KIN001-135. The drug SMILES structure is COC1=C(C=C2C(=C1)N=CN2C3=CC(=C(S3)C#N)OCC4=CC=CC=C4S(=O)(=O)C)OC. Drug target is IKK. Drug target pathway is Other, kinases.\
Drug 3: The drug is GSK2606414. The drug SMILES structure is CN1C=C(C2=C(N=CN=C21)N)C3=CC4=C(C=C3)N(CC4)C(=O)CC5=CC(=CC=C5)C(F)(F)F. Drug target is PERK. Drug target pathway is Metabolism.\
The mutations of the cell line are NOTCH1, NOTCH3, PIK3R1, PPP2R1A, TP53, TSC2, WHSC1L1.\
Best drug option: ?',

    do_sample=True,

    top_k=10,

    num_return_sequences=1,

    eos_token_id=tokenizer.eos_token_id,

    max_length=1000,
)

for seq in sequences:
    print(f"{seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Think step by step and decide the best drug option for the cell line with given mutations: [Drug Name], [Reasoning].Drug 1: The drug is BRYOSTATIN-1. The drug SMILES structure is CCCC=CC=CC(=O)OC1C(=CC(=O)OC)CC2CC(OC(=O)CC(CC3CC(C(C(O3)(CC4CC(=CC(=O)OC)CC(O4)C=CC(C1(O2)O)(C)C)O)(C)C)OC(=O)C)O)C(C)O. Drug target is Unknown. Drug target pathway is Unknown.Drug 2: The drug is KIN001-135. The drug SMILES structure is COC1=C(C=C2C(=C1)N=CN2C3=CC(=C(S3)C#N)OCC4=CC=CC=C4S(=O)(=O)C)OC. Drug target is IKK. Drug target pathway is Other, kinases.Drug 3: The drug is GSK2606414. The drug SMILES structure is CN1C=C(C2=C(N=CN=C21)N)C3=CC4=C(C=C3)N(CC4)C(=O)CC5=CC(=CC=C5)C(F)(F)F. Drug target is PERK. Drug target pathway is Metabolism.The mutations of the cell line are NOTCH1, NOTCH3, PIK3R1, PPP2R1A, TP53, TSC2, WHSC1L1.Best drug option: ?Answer: Based on the information provided, the best drug option for the cell line with the given mutations would be KIN001-135.KIN001-135 targets IKK, which is invo

In [23]:
model_id = "meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
model.cuda()
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.use_default_system_prompt = False

def chat_with_llama(prompt):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    input_ids = input_ids.to('cuda')
    output = model.generate(input_ids, max_length=256, num_beams=4, no_repeat_ngram_size=2)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

prompt = "Think step by step and decide in a single word reflecting the drug sensitivity of the drug on the cell line with given mutations: [Sensitive/Resistant], [Reasoning].\nDrug and cell line mutations: \nThe drug is YK-4-279. The drug SMILES structure is COC1=CC=C(C=C1)C(=O)CC2(C3=C(C=CC(=C3NC2=O)Cl)Cl)O. Drug target is RNA helicase A. Drug target pathway is Other.\nThe mutations of the cell line are NOTCH1, NOTCH3, PIK3R1, PPP2R1A, TP53, TSC2, WHSC1L1.\nDrug Sensitivity: ?"
response = chat_with_llama(prompt)
print("Llama:", response)

RuntimeError: The NVIDIA driver on your system is too old (found version 11010). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver.