## **Chatbot Running Llama and LangChain**

In [1]:
!pip -q install git+https://github.com/huggingface/transformers # need to install from github
!pip install -q datasets loralib sentencepiece
!pip -q install bitsandbytes accelerate
!pip -q install langchain

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m107.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m79.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m492.4/492.4 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m68.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━

### **Loading Llama7B**

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

import torch
import re

In [3]:
MODEL_NAME = "TinyPixel/Llama-2-7B-bf16-sharded"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.config.use_cache = False

Downloading (…)lve/main/config.json:   0%|          | 0.00/626 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/14 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00014.bin:   0%|          | 0.00/981M [00:00<?, ?B/s]

Downloading (…)l-00002-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00003-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00004-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)l-00005-of-00014.bin:   0%|          | 0.00/944M [00:00<?, ?B/s]

Downloading (…)l-00006-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)l-00007-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00008-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00009-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)l-00010-of-00014.bin:   0%|          | 0.00/944M [00:00<?, ?B/s]

Downloading (…)l-00011-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)l-00012-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00013-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00014-of-00014.bin:   0%|          | 0.00/847M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

In [4]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

Downloading (…)okenizer_config.json:   0%|          | 0.00/676 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

### **Run Llama model**

In [5]:
def run_model(question):
  device = "cuda:0"
  inputs = tokenizer(question, return_tensors="pt").to(device)
  response = model.generate(**inputs, max_new_tokens=10)
  result = tokenizer.decode(response[0], skip_special_tokens=True)

  return re.sub("\\n", " ", result)

In [6]:
question = "What is the capital of England?"
run_model(question)



'What is the capital of England?  nobody knows. What is the capital of'

### **Run Llama with Langchain**

In [7]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=256,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.2
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [8]:
from langchain import PromptTemplate, LLMChain

template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{instruction}

Answer:"""

prompt = PromptTemplate(template=template, input_variables=["instruction"])

In [9]:
llm_chain = LLMChain(prompt=prompt,
                     llm=local_llm
                     )

In [10]:
question = "What is the capital of England?"

print(llm_chain.run(question))

 London, United Kingdom



In [11]:
question = "Write a Python function to calculate the factorial of a number."

print(llm_chain.run(question))

 \begin{code}
def fact(n):
    if n == 0 or n==1 : return 1;
    else :return (fact(n-1)*n)
\end{code}


In [12]:
from pprint import pprint

In [13]:
question = "Explain about diabetes"

pprint(llm_chain.run(question))

(' Diabetes mellitus, commonly referred to as just "diabetes", is a group of '
 'metabolic diseases in which there are high blood sugar levels over a '
 'prolonged period. Symptoms often include frequent urination, increased '
 'thirst and increased appetite. If left untreated, long-term complications '
 'can include cardiovascular disease, stroke, chronic kidney failure, foot '
 'ulcers, and damage to eyes.\n'
 '\n'
 'Diabetes occurs either when pancreas does not produce enough insulin or when '
 'body cannot effectively use the insulin produced. This leads to high blood '
 'glucose level. There are two main types of diabetes - type 1 (insufficient '
 'production) and type 2 (effective utilization). Type I accounts for only 5% '
 'cases while rest 90% fall under category II.\n'
 '\n'
 'Type I diabetics require daily administration of insulin through injection '
 'or pump whereas those with type II may be able to control their condition by '
 'proper')


In [14]:
question = "who is Ronaldo?"

pprint(llm_chain.run(question))

('\n'
 'Ronaldo is a Brazilian professional footballer who plays as a forward for '
 'Italian club Juventus and captains the Portugal national team. Often '
 'considered the best player in the world and widely regarded as one of the '
 "greatest players of all time, he has won five Ballons d'Or (the first player "
 'to win three consecutive ones), four UEFA Champions Leagues, seven league '
 'titles, five FIFA Club World Cups, one European Championship and two Copa '
 'América trophies among several other honours. He holds many records; '
 'including most goals scored in Serie A (120) and the all-time top scorer for '
 'his club level appearances with 438 goals for Real Madrid, becoming the only '
 'player ever to achieve such milestone while playing exclusively for one '
 'single club. His total goal tally for club competitions stands at over 750 '
 'goals across nearly 900 official games during his career.\n')


In [15]:
question = "what do you do in your free time?"

pprint(llm_chain.run(question))

(' I like to play video games and watch movies with my friends on weekends, '
 'but during school days i usually spend most of my time studying for exams or '
 'doing homework assignments.')


In [16]:
question = "good do you like painting too?"

print(llm_chain.run(question))

 I love to paint! It's my favorite hobby and it helps me relax after work or school. 😊


In [17]:
question = "what brand of phone do you prefer have?"

pprint(llm_chain.run(question))

(' I like Samsung phones because they are very reliable and easy to use, but '
 "if it's not available in my country then I would go for Apple or Huawei. \n")


**Chatbot**

In [None]:
while True:
  question = input("User: ")
  print("Bot: ", llm_chain.run(question))

User: hi
Bot:  

```python
def get_name(self):
    return self._name
```
User: how are you?
Bot:   I am fine, thank you! How about yourself?

User: what do you do in your free time?
Bot:   I like to play video games and watch movies with my friends on weekends, but during school days i usually spend most of my time studying for exams or doing homework assignments.
User: what brand of phone do you prefer?
Bot:   I like Samsung phones because they have great features and are very reliable. 

User: who is Lionel Messi?
Bot:  
Lionel Andrés "Leo" Messi (Spanish pronunciation: [ˈljoŋxel ðeˈmesi]; born 24 June 1987) is an Argentine professional footballer who plays as a forward for Spanish club Barcelona and captains both his country's national team.[3] Often considered the best player in the world[4][5] and rated by many commentators, former players,[6] and football experts as one of the greatest players of all time,[7][8][9] he has won five Ballons d'Or, four UEFA Champions Leagues, nine L