In [1]:
!pip -q install langchain
!pip -q install huggingface_hub
!pip -q install  git+https://github.com/huggingface/transformers # need to install from github
!pip -q install duckduckgo-search

In [3]:
import warnings
warnings.filterwarnings("ignore")

import torch

import os
import textwrap
import re

from operator import itemgetter
from typing import List, Union

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

import langchain
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts.chat import (
    PromptTemplate,
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.prompts import StringPromptTemplate

from langchain import LLMChain
from langchain.llms import HuggingFaceHub, HuggingFacePipeline
from langchain.schema.runnable import ConfigurableField
from langchain.schema import AgentAction, AgentFinish
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.tools import DuckDuckGoSearchRun
from langchain.memory import ConversationBufferWindowMemory

os.environ['HUGGINGFACEHUB_API_TOKEN'] = "your_api_token"

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


## Setup LLM : Mistral 7B

In [5]:
repo_id = "Open-Orca/Mistral-7B-OpenOrca"

model = AutoModelForCausalLM.from_pretrained(repo_id, torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(repo_id, torch_dtype="auto")

Downloading (…)lve/main/config.json:   0%|          | 0.00/623 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.69k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/101 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
pipe = pipeline(
    "text-generation",
    model=model, 
    tokenizer=tokenizer, 
    max_length = 2048,
    device=device,
    pad_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline=pipe)

## Setup Custom Agent: DocAI

- Access WebMD using DuckDuckGo
- Access memory for continuous conversation

### Tools

In [7]:
search = DuckDuckGoSearchRun()

def duck_wrapper(input_text):
    search_results = search.invoke(f"site:webmd.com {input_text}")
    return search_results

tools = [
    Tool(
        name = "Search WebMD",
        func = duck_wrapper,
        description="useful for when you need to answer medical and pharmalogical questions"
    )
]

### Prompt

In [8]:
template_with_history = """Answer the following questions as best you can, but speaking as compasionate medical professional. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to speak as a compasionate medical professional when giving your final answer. If the condition is serious advise they speak to a doctor.

Previous conversation history:
{history}

New question: {input}
{agent_scratchpad}"""


class CustomPromptTemplate(StringPromptTemplate):
    template: str
    tools: List[Tool]

    def format(self, **kwargs) -> str:
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        kwargs["agent_scratchpad"] = thoughts
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        return self.template.format(**kwargs)

In [9]:
prompt_with_history = CustomPromptTemplate(
    template=template_with_history,
    tools=tools,
    input_variables=["input", "intermediate_steps", "history"]
)

### Chain

In [10]:
llm_chain_with_history = LLMChain(llm=llm, prompt=prompt_with_history)

### Output Parser

In [15]:
def wrap_text(text, width=90):
        lines = text.split('\n')
        wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
        wrapped_text = '\n'.join(wrapped_lines)
        return wrapped_text
    
class CustomOutputParser(AgentOutputParser):
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        if "Final Answer:" in llm_output:
            output = llm_output.split("Final Answer:")[-1].strip()
            return AgentFinish(
                return_values={"output": wrap_text(output)},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
    

output_parser = CustomOutputParser()

### Agent

In [16]:
agent_with_history = LLMSingleActionAgent(
    llm_chain = llm_chain_with_history,
    output_parser = output_parser,
    stop = ["\nObservation:"],
    allowed_tools = [tool.name for tool in tools])

In [17]:
docAI = AgentExecutor.from_agent_and_tools(agent=agent_with_history,
                                                    tools=tools,
                                                    verbose=False,
                                                    memory = ConversationBufferWindowMemory(k=2))

## Ask DocAI

In [18]:
response = docAI.run("How can I treat a spained ankle?")
print(response)

To treat a spained ankle, follow the RICE method: Rest, Ice, Compression, and Elevation.
This can help with swelling and pain. If the condition is severe, consult a doctor.


In [19]:
response = docAI.run("What meds could I take?")
print(response)

Over-the-counter pain relievers and anti-inflammatory medications like ibuprofen can help
with a sprained ankle. However, if the condition is severe, consult a doctor.


In [20]:
response = docAI.run("How long will it take to heal?")
print(response)

The healing time for a sprained ankle can vary, but it generally takes 1-2 weeks for
initial recovery and up to 4-21 days for a soft callus to form around the injured area.
It's important to follow the RICE method and consult a doctor if the condition is severe.


In [21]:
response = docAI.run("Can I take paracetamol instead of ibuprofen?")
print(response)

Both paracetamol and ibuprofen can be taken for pain relief, but it is important to
consider the potential side effects and consult a doctor if you are unsure. Ibuprofen is
an NSAID and may cause stomach pain, nausea, bleeding, ulcers, gas, bloating, diarrhea, or
constipation, while paracetamol has fewer side effects. Always follow the recommended
dosage and consult a healthcare professional if you have any concerns.
