In [2]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
HF_TOKEN = "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"

In [9]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

model_id = "meta-llama/Llama-2-7b-chat-hf"

In [11]:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config,
    device_map={"": 0},
    use_auth_token=HF_TOKEN
)

Downloading shards: 100%|██████████| 2/2 [02:52<00:00, 86.12s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:21<00:00, 10.80s/it]


In [12]:
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful, and knowledgeable dermatology assistant.
You provide accurate, precise, and safe answers related to dermatological diseases and treatments.
Always base your answers on the provided context. If you don't have enough context, say:
"I don't have enough information based on the provided context."
Cite URLs whenever possible to support your responses.
"""

def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT):
    B_INST, E_INST = "[INST]", "[/INST]"
    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    return B_INST + SYSTEM_PROMPT + instruction + E_INST

In [4]:
from bs4 import BeautifulSoup
import requests

def scrape_pubmed(keyword):
    try:
        response = requests.get(f"https://pubmed.ncbi.nlm.nih.gov/?term={keyword}")
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        results = []
        for element in soup.select('.docsum-content'):
            title_tag = element.select_one('.docsum-title')
            title = title_tag.text.strip() if title_tag else ''
            link = f"https://pubmed.ncbi.nlm.nih.gov{title_tag['href']}" if title_tag and title_tag.has_attr('href') else ''
            authors = element.select_one('.full-authors').text.strip() if element.select_one('.full-authors') else ''
            snippet = element.select_one('.docsum-snippet').text.strip() if element.select_one('.docsum-snippet') else ''
            abstract = ''

            if link:
                try:
                    article_response = requests.get(link)
                    article_response.raise_for_status()
                    article_soup = BeautifulSoup(article_response.text, 'html.parser')
                    abstract_tag = article_soup.select_one('.abstract-content')
                    abstract = abstract_tag.text.strip() if abstract_tag else ''
                except Exception as e:
                    print(f"Error fetching abstract for {link}: {e}")

            results.append({
                'title': title,
                'link': link,
                'authors': authors,
                'snippet': snippet,
                'abstract': abstract
            })

        return results
    except Exception as e:
        print(f"Error scraping PubMed: {e}")
        return []

pubmed_data = scrape_pubmed("psoriasis")


In [19]:
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

documents = [
    Document(
        page_content=doc["abstract"],  
        metadata={
            "title": doc["title"],
            "link": doc["link"],
            "authors": doc["authors"],
            "snippet": doc["snippet"]
        }
    )
    for doc in pubmed_data if doc.get("abstract") 
]

In [20]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

In [22]:
db = Chroma.from_documents(
    documents=documents,
    embedding=HuggingFaceEmbeddings(),
    persist_directory="./dermatology_db"
)

instruction = (
    "Given the context that has been provided:\n"
    "{context}\n"
    "Answer the following question:\n{question}"
)

system_prompt = """\
You are an expert in dermatology.
You will be given a context to answer questions from.
Be precise in your answers and provide citations when possible.
If you are unsure, say "I don't have enough information based on the provided context."
"""

  embedding=HuggingFaceEmbeddings(),


In [30]:
from langchain.chains import ConversationalRetrievalChain

class DermatologyBot:
    def __init__(self, memory, prompt, retriever):
        self.memory = memory
        self.prompt = prompt
        self.retriever = retriever

    def create_chat_bot(self, max_new_tokens=512):
        hf_pipe = create_pipeline(max_new_tokens)
        llm = HuggingFacePipeline(pipeline=hf_pipe)

        qa = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=self.retriever,
            memory=self.memory,
            return_source_documents=True,
            combine_docs_chain_kwargs={"prompt": self.prompt}
        )
        return qa


In [23]:
from langchain import PromptTemplate

template = get_prompt(instruction, system_prompt)
prompt = PromptTemplate(template=template, input_variables=["context", "question"])
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(
    memory_key="chat_history",
    k=5,
    return_messages=True
)

retriever = db.as_retriever()

  memory = ConversationBufferWindowMemory(


In [24]:
def create_pipeline(max_new_tokens=512):
    return pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=max_new_tokens,
        temperature=0.7
    )

from langchain import HuggingFacePipeline
from langchain.chains import ConversationalRetrievalChain

class DermatologyBot:
    def __init__(self, memory, prompt, retriever):
        self.memory = memory
        self.prompt = prompt
        self.retriever = retriever

    def create_chat_bot(self, max_new_tokens=512):
        hf_pipe = create_pipeline(max_new_tokens)
        llm = HuggingFacePipeline(pipeline=hf_pipe)
        qa = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=self.retriever,
            memory=self.memory,
            combine_docs_chain_kwargs={"prompt": self.prompt}
        )
        return qa

In [26]:
retriever = db.as_retriever(search_kwargs={"k": 5})
def format_citations(docs):
    """
    Format citations from retrieved documents.
    """
    citations = []
    for doc in docs:
        title = doc.metadata.get("title", "Unknown Title")
        link = doc.metadata.get("link", "No Link Available")
        citations.append(f"- {title}: {link}")
    return "\n".join(citations)


In [31]:
dermatology_bot = DermatologyBot(memory=memory, prompt=prompt, retriever=retriever)
bot = dermatology_bot.create_chat_bot()

Device set to use cuda:0


In [None]:
query = "What are the treatments for psoriasis?"
result = bot({"question": query})
answer = result["answer"]
retrieved_docs = result["source_documents"] 
citations = format_citations(retrieved_docs)
final_response = f"{answer}\n\nCitations:\n{citations}"
print(final_response)


In [5]:
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import Ollama

In [6]:
documents = [
    Document(
        page_content=doc["abstract"],
        metadata={
            "title": doc["title"],
            "link": doc["link"],
            "authors": doc["authors"],
            "snippet": doc["snippet"]
        }
    )
    for doc in pubmed_data if doc.get("abstract")
]


In [9]:
pip install -U langchain-ollama

Collecting langchain-ollamaNote: you may need to restart the kernel to use updated packages.

  Downloading langchain_ollama-0.2.2-py3-none-any.whl.metadata (1.9 kB)
Collecting ollama<1,>=0.4.4 (from langchain-ollama)
  Downloading ollama-0.4.7-py3-none-any.whl.metadata (4.7 kB)
Downloading langchain_ollama-0.2.2-py3-none-any.whl (18 kB)
Downloading ollama-0.4.7-py3-none-any.whl (13 kB)
Installing collected packages: ollama, langchain-ollama
Successfully installed langchain-ollama-0.2.2 ollama-0.4.7


In [11]:
from langchain_ollama import OllamaLLM
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
chroma_db = Chroma.from_documents(documents, embedding=embeddings, persist_directory="./dermatology_db")

llm = OllamaLLM(model="llama2", base_url="http://localhost:11434")

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=chroma_db.as_retriever(search_kwargs={"k": 5}),  
    return_source_documents=True  
)

In [13]:
def ask_question(query):
    result = qa_chain({"query": query})
    answer = result["result"]
    source_docs = result["source_documents"]

    citations = "\n".join([
        f"- {doc.metadata.get('title', 'No Title')}: {doc.metadata.get('link', 'No Link')}"
        for doc in source_docs
    ])

    return f"{answer}\n\nCitations:\n{citations}"

query = "What are the treatments for psoriasis?"
response = ask_question(query)
print(response)

The treatments for psoriasis include:

1. Topical therapy: Corticosteroids, vitamin D analogues, and retinoids can be applied directly to the skin to reduce inflammation and slow down cell growth.
2. Phototherapy: Exposure to ultraviolet (UV) light, either from natural sources or through artificial UV light therapy, can help to reduce inflammation and slow down cell growth.
3. Systemic therapy: Oral or injected medications such as methotrexate, cyclosporine, and TNF-alpha inhibitors can be used to treat psoriasis. These medications work throughout the body and can have a more significant impact on the disease than topical or phototherapy.
4. Biologic therapy: This type of treatment uses genetically engineered drugs that target specific proteins involved in the immune system. Examples include adalimumab, etanercept, and ustekinumab. These medications can be effective in treating moderate to severe psoriasis.
5. Laser therapy: Exposure to specific wavelengths of laser light can help to r