In [None]:
# Required Packages
!pip install --quiet langchain langchain-community duckduckgo-search faiss-gpu pypdf sentence_transformers huggingface_hub transformers bitsandbytes accelerate

In [None]:
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [None]:
# Define Quantization Configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [None]:
# Load model with quantization
model_id = "vilsonrodrigues/falcon-7b-instruct-sharded"
quant_model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        quantization_config=quantization_config
      )
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
# Define Huggingface Pipeline
pipeline = pipeline(
  "text-generation",
  model=quant_model,
  tokenizer=tokenizer,
  use_cache=False,
  device_map="auto",
  truncation=True,
  max_length=256,
  do_sample=True,
  num_return_sequences=1,
  eos_token_id=tokenizer.eos_token_id,
  pad_token_id=tokenizer.eos_token_id,
)

In [None]:
# Langchain LLM Object
hf_llm = HuggingFacePipeline(pipeline=pipeline)

##### Basic LLM Chain

In [None]:
template = """Question: {question}
Answer: Let's think step by step."""

basic_qa_prompt = PromptTemplate(
    template=template,
    input_variables= ["question"]
)

llm_chain = LLMChain(prompt=basic_qa_prompt, llm=hf_llm)

In [None]:
basic_qa_prompt.format(question="what is the meaning of life?")

In [None]:
# Generate response from Basic QA Chain
llm_chain("What is the meaning of life?")

#### Agent with Function Calling

In [None]:
from langchain.agents import initialize_agent
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.agents import load_tools

In [None]:
# Initialize conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)

In [None]:
# Define Tools to be used by Agent
tools = load_tools(["ddg-search"], llm=hf_llm)

In [None]:
# Instantiate Agent
agent = initialize_agent(tools,
                         hf_llm,
                         agent="zero-shot-react-description",
                         verbose=True)

In [None]:
agent.run("What is the best song by linkin park?")