In [1]:

from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-70b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, need auth token for these
model_config = transformers.AutoConfig.from_pretrained(
    model_id
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto'
)
model.eval()
print(f"Model loaded on {device}")

  from .autonotebook import tqdm as notebook_tqdm



Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/ubuntu/dev/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
ERROR: /home/ubuntu/dev/bin/python: undefined symbol: cudaRuntimeGetVersion
CUDA SETUP: libcudart.so path is None
CUDA SETUP: Is seems that your cuda installation is not in your path. See https://github.com/TimDettmers/bitsandbytes/issues/85 for more information.
CUDA SETUP: CUDA version lower than 11 are currently not supported for LLM.int8(). You will be only to use 8-bit optimizers and quantization routines!!
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 00
CUDA SETUP: Loading binary /home/ubuntu/dev/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Loading checkpoint shards: 100%|██████████| 15/15 [00:27<00:00,  1.83s/it]


Model loaded on cuda:0


In [2]:

tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)

In [3]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids


[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [4]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [5]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [6]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    #stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.01,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)
 

In [9]:
res = generate_text("Explain to me the difference between nuclear fission and fusion.")
print(res[0]["generated_text"])

Explain to me the difference between nuclear fission and fusion.
Nuclear fission is a process in which an atomic nucleus splits into two or more smaller nuclei, releasing a large amount of energy in the process. This occurs when an atom's nucleus is bombarded with a high-energy particle, such as a neutron. The resulting nuclei are typically smaller and lighter than the original nucleus, and the excess energy is released as radiation. Fission is the process used in nuclear power plants to generate electricity.
Nuclear fusion, on the other hand, is the process by which two or more atomic nuclei combine to form a single, heavier nucleus. This process also releases a large amount of energy, but it requires the nuclei to be brought together at extremely high temperatures and pressures, typically found in the core of stars. Fusion is the process that powers the sun and other stars.
The key difference between fission and fusion is the direction of the energy release. In fission, the energy is

In [7]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

In [8]:
llm(prompt="Explain to me the difference between nuclear fission and fusion.")

"\nNuclear fission is a process in which an atomic nucleus splits into two or more smaller nuclei, releasing a large amount of energy in the process. This occurs when an atom's nucleus is bombarded with a high-energy particle, such as a neutron. The resulting nuclei are typically smaller and lighter than the original nucleus, and the excess energy is released as radiation. Fission is the process used in nuclear power plants to generate electricity.\nNuclear fusion, on the other hand, is the process by which two or more atomic nuclei combine to form a single, heavier nucleus. This process also releases a large amount of energy, but it requires the nuclei to be brought together at extremely high temperatures and pressures, typically found in the core of stars. Fusion is the process that powers the sun and other stars.\nThe key difference between fission and fusion is the direction of the energy release. In fission, the energy is released outward from the nucleus, while in fusion, the ene

In [474]:
from langchain.tools import BaseTool
from math import pi
from typing import Union


class CircumferenceTool(BaseTool):
    name = "circumference"
    description = "use this tool when you need to calculate a circumference using the radius of a circle"

    def _run(self, radius: Union[int, float]):
        return float(radius)*2.0*pi
    
    def _arun(self, radius: Union[int, float]):
        raise NotImplementedError("This tool does not support async")

In [475]:
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration

hf_model = "Salesforce/blip-image-captioning-large"
device = 'cuda' if torch.cuda.is_available() else 'cpu'

processor = BlipProcessor.from_pretrained(hf_model)
model = BlipForConditionalGeneration.from_pretrained(hf_model).to(device)

In [476]:
img_url = "https://images.unsplash.com/photo-1664990594745-2a84bd70d8b7?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2743&q=80"


In [478]:
from langchain.tools import BaseTool
import requests
from PIL import Image
desc = (
    "use this tool when given the URL of an image that you'd like to be "
    "described. It will return a simple caption describing the image."
)

class ImageCaptionTool(BaseTool):
    name = "Image captioner"
    description = desc

    def _run(self, url: str):
        # download the image and convert to PIL object
        image = Image.open(requests.get(url, stream=True).raw).convert('RGB')
        # preprocess the image
        inputs = processor(image, return_tensors="pt").to(device)
        # generate the caption
        out = model.generate(**inputs, max_new_tokens=20)
        # get the caption
        caption = processor.decode(out[0], skip_special_tokens=True)
        caption = "Inside the image " + caption
        return caption
    
    def _arun(self, query: str):
        raise NotImplementedError("This tool does not support async")

In [479]:
from langchain.tools import BaseTool, StructuredTool, Tool, tool

calcu = CircumferenceTool()
image_cap = ImageCaptionTool()
tools = [
    Tool.from_function(
        func=calcu.run,
        name="circumference",
        description="use this tool when you need to calculate a circumference using the radius of a circle. it receives a floating point number and returns the ressult in floating point format "
    ),
    Tool.from_function(
        func=image_cap.run,
        name="caption",
        description="use this tool when given the URL of an image that you'd like to be described."
        
    ),
]

In [480]:
from langchain.memory import ConversationBufferWindowMemory
from langchain.agents import load_tools

memory = ConversationBufferWindowMemory(
    memory_key="chat_history", k=5, return_messages=True, output_key="output"
)


In [481]:

from langchain.agents import AgentOutputParser
from langchain.agents.conversational_chat.prompt import FORMAT_INSTRUCTIONS
from langchain.output_parsers.json import parse_json_markdown
from langchain.schema import AgentAction, AgentFinish

class OutputParser(AgentOutputParser):
    def get_format_instructions(self) -> str:
        return FORMAT_INSTRUCTIONS

    def parse(self, text: str) -> AgentAction | AgentFinish:
        try:
            # this will work IF the text is a valid JSON with action and action_input
            response = parse_json_markdown(text)
            action, action_input = response["action"], response["action_input"]
            if action == "Final Answer":
                # this means the agent is finished so we call AgentFinish
                return AgentFinish({"output": action_input}, text)
            else:
                # otherwise the agent wants to use an action, so we call AgentAction
                return AgentAction(action, action_input, text)
        except Exception:
            # sometimes the agent will return a string that is not a valid JSON
            # often this happens when the agent is finished
            # so we just return the text as the output
            return AgentFinish({"output": text}, text)

    @property
    def _type(self) -> str:
        return "conversational_chat"

# initialize output parser for agent
parser = OutputParser()

In [482]:
from langchain.agents import initialize_agent

# initialize agent
agent = initialize_agent(
    agent="chat-conversational-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    early_stopping_method="generate",
    memory=memory,
    #agent_kwargs={"output_parser": parser}
)

In [None]:
agent.agent.llm_chain.prompt

ChatPromptTemplate(input_variables=['input', 'chat_history', 'agent_scratchpad'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='Assistant is a large language model trained by OpenAI.\n\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, A

In [483]:

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<>\n", "\n<>\n\n"

In [506]:
sys_msg = B_SYS + """Assistant is a expert JSON builder designed to assist with a wide range of tasks.

Assistant is able to respond to the User and use tools using JSON strings that contain "action" and "action_input" parameters.

All of Assistant's communication is performed using this JSON format.

Assistant can also use tools by responding to the user with tool use instructions in the same "action" and "action_input" JSON format. the Only tools available to Assistant are:
- "circumference": use this tool ONLY when you need to calculate a circumference using the radius of a circle.
  - To use the calculator tool, Assistant should write like so:
    ```json
    {{"action": "circumference",
      "action_input": 1.2 }}
    ```

Here are some previous conversations between the Assistant and User:

User: 1.0 how are you?
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "I'm good thanks, how are you?"}}
```

User: 3.0  what is the circumference of a circle that has a radius of 7.81?
Assistant: ```json
{{"action": "circumference",
 "action_input": "7.81" }}
```
User4: where is the capital of Iran?
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "The capital of Iran is Tehran"}}
```
User: 2.0
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "The circumference of a circle with a radius of 7.81 is approximately 49.07."}}
```
User: Thanks could you tell me the circumference of a circle that has a radius of 4 mm?
Assistant: ```json
{{"action": "circumference",
 "action_input": "4" }}
```
User: 16.0
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "It looks like the answer is 25.132741228718345!"}}
```
User: 16.0
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "It looks like  the circumference is 25.132741228718345!"}}
```

Here is the latest conversation between Assistant and User.""" + E_SYS
new_prompt = agent.agent.create_prompt(
    system_message=sys_msg,
    tools=tools
)
agent.agent.llm_chain.prompt = new_prompt
     

In [507]:
instruction = B_INST + " Respond to the following in JSON with 'action' and 'action_input' values " + E_INST
human_msg = instruction + "\nUser: {input}"

agent.agent.llm_chain.prompt.messages[2].prompt.template = human_msg

In [508]:
agent.memory.clear()

In [439]:
agent.agent.llm_chain.prompt

ChatPromptTemplate(input_variables=['input', 'chat_history', 'agent_scratchpad'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='<>\nAssistant is a expert JSON builder designed to assist with a wide range of tasks.\n\nAssistant is able to respond to the User and use tools using JSON strings that contain "action" and "action_input" parameters.\n\nAll of Assistant\'s communication is performed using this JSON format.\n\nAssistant can also use tools by responding to the user with tool use instructions in the same "action" and "action_input" JSON format. The only tools available to Assistant are:\n- "caption": use this tool only when given the URL of an image that you need to be described the image. \n  - To use the caption tool, Assistant should write like so:\n    ```json\n        {{"action": "caption",\n          "action_input": https://images.pexels.com/photos/

In [340]:
from math import pi
(4 * 2) * pi

25.132741228718345

In [496]:
agent.agent.llm_chain.prompt

ChatPromptTemplate(input_variables=['input', 'chat_history', 'agent_scratchpad'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='<>\nAssistant is a expert JSON builder designed to assist with a wide range of tasks.\n\nAssistant is able to respond to the User and use tools using JSON strings that contain "action" and "action_input" parameters.\n\nAll of Assistant\'s communication is performed using this JSON format.\n\nAssistant can also use tools by responding to the user with tool use instructions in the same "action" and "action_input" JSON format. Tools available to Assistant are:\n\n- "circumference": use this tool only when you need to calculate a circumference using the radius of a circle.\n  - To use the calculator tool, Assistant should write like so:\n    ```json\n    {{"action": "circumference",\n      "action_input": 1.2 }}\n    ```\n\nHere are some 

In [509]:
agent("hey how are you today?")



[1m> Entering new AgentExecutor chain...[0m




[32;1m[1;3m

Assistant: ```json
{"action": "Final Answer",
 "action_input": "I'm doing well, thanks for asking! How about you?"}
```[0m

[1m> Finished chain.[0m


{'input': 'hey how are you today?',
 'chat_history': [],
 'output': "I'm doing well, thanks for asking! How about you?"}

In [510]:
agent("where is capital of the UK?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m

Assistant: ```json
{"action": "Final Answer",
 "action_input": "The capital of the UK is London"}
```[0m

[1m> Finished chain.[0m


{'input': 'where is capital of the UK?',
 'chat_history': [HumanMessage(content='hey how are you today?', additional_kwargs={}, example=False),
  AIMessage(content="I'm doing well, thanks for asking! How about you?", additional_kwargs={}, example=False)],
 'output': 'The capital of the UK is London'}

In [511]:
agent("can you calculate the circumference of a circle that has a radius of 5.5")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m?
Assistant: ```json
{"action": "circumference",
 "action_input": "5.5" }
```[0m
Observation: [36;1m[1;3m34.55751918948772[0m
Thought:[32;1m[1;3m

AI:?

Assistant: ```json
{"action": "Final Answer",
 "action_input": "The circumference of a circle with a radius of 5.5 is approximately 34.55751918948772."}
```[0m

[1m> Finished chain.[0m


{'input': 'can you calculate the circumference of a circle that has a radius of 5.5',
 'chat_history': [HumanMessage(content='hey how are you today?', additional_kwargs={}, example=False),
  AIMessage(content="I'm doing well, thanks for asking! How about you?", additional_kwargs={}, example=False),
  HumanMessage(content='where is capital of the UK?', additional_kwargs={}, example=False),
  AIMessage(content='The capital of the UK is London', additional_kwargs={}, example=False)],
 'output': 'The circumference of a circle with a radius of 5.5 is approximately 34.55751918948772.'}

#### Calculate the circumference of a circle