In [1]:
import transformers
import torch
import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset
from transformers import DataCollatorForLanguageModeling
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
from transformers import pipeline
import re

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = "./fine_tuned_model_lora_10"


In [3]:
user_query = "Tell me the deep learning tools that know about drug discovery"

In [4]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
tokenizer.pad_token = tokenizer.eos_token
tokenized_inputs = tokenizer(user_query,return_tensors="pt")

In [5]:
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenized_outputs = model.generate(tokenized_inputs["input_ids"],attention_mask=tokenized_inputs["attention_mask"], max_new_tokens=150, do_sample=True, top_k=50, top_p=0.95)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [6]:
# output = tokenizer.batch_decode(tokenized_outputs, skip_special_tokens=True)
# print(output)

In [7]:
READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.1,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=300,
)

Device set to use cuda:0


In [8]:
prompt_in_chat_format = [
    {
        "role": "system",
        "content": """You are an expert in drug discovery and development. 
        Your task is to explain technologies around drug discovery and new artificial intelligence tools that were introduced to this. 
        Give a small comprehensive answer. Response should be concise and relevant to the question asked.
        """
    },
    {
        "role": "user",
        "content": "Please provide an analysis of the following: {user_query}",
    },
]


In [9]:
PROMPT_TEMPLATE = tokenizer.apply_chat_template(
    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
)
final_prompt = PROMPT_TEMPLATE.format(user_query=user_query)

In [10]:
answer = READER_LLM(final_prompt)[0]["generated_text"]
print(answer)

Deep learning has revolutionized the field of drug discovery by providing AI-powered tools that can analyze vast amounts of data, identify patterns, and make predictions with unprecedented accuracy. Here are some of the key deep learning-based tools used in drug discovery:

1. **Convolutional Neural Networks (CNNs)**: CNNs are widely used for image-based screening of large datasets, such as protein structures, which helps identify potential lead compounds.
2. **Recurrent Neural Networks (RNNs)**: RNNs are applied to text-based data, like molecular sequences, to predict the behavior of molecules and their interactions.
3. **Generative Adversarial Networks (GANs)**: GANs generate novel compounds based on existing ones, helping researchers to design new leads with minimal experimental effort.
4. **Transfer Learning**: This technique leverages pre-trained models from one domain to tackle another, enabling the transfer of knowledge across different types of data, such as chemical structure 