# Traditional without AI-ML (Rule based chatbot)

In [17]:
import random

# Define some basic rules
rules = {
    "hi": ["Hello!", "Hi there!", "Greetings!"],
    "how are you": ["I'm good, thanks!", "I'm doing well, how about you?", "Not bad."],
    "bye": ["Goodbye!", "See you later!", "Farewell!"],
    "default": ["I'm not sure how to respond.", "Could you please rephrase that?", "I didn't understand."],
}


# Function to get the bot's response
def get_response(user_input):
    # Convert input to lowercase for case-insensitive matching
    user_input_lower = user_input.lower()

    # Check if there's a rule for the user input
    if user_input_lower in rules:
        return random.choice(rules[user_input_lower])
    else:
        return random.choice(rules["default"])

# Start chatting
print("Hello! I'm your simple chatbot. Type 'bye' to exit.")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'bye':
        print("Bot: Goodbye!")
        break
    response = get_response(user_input)
    print("Bot:", response)


Hello! I'm your simple chatbot. Type 'bye' to exit.
You: hi
Bot: Hi there!
You: how are you
Bot: I'm doing well, how about you?
You: What can you say more
Bot: Could you please rephrase that?
You: What is your name
Bot: I'm not sure how to respond.
You: bye
Bot: Goodbye!


# Using AI-ML with Intent classification

In [18]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import torch

# Load pre-trained model and tokenizer
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)

# Define intent labels
intent_labels = ["greeting", "farewell", "question", "unknown"]

# Function to classify intent
def classify_intent(user_input):
    inputs = tokenizer(user_input, return_tensors="pt")
    outputs = model(**inputs)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=1).item()
    intent = intent_labels[predictions]
    return intent

# Start chatting
print("Hello! I'm your intent-based chatbot. Type 'bye' to exit.")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'bye':
        print("Bot: Goodbye!")
        break
    intent = classify_intent(user_input)

    # Implement different responses based on intent
    if intent == "greeting":
        print("Bot: Hello! How can I help you?")
    elif intent == "farewell":
        print("Bot: Goodbye! Have a great day.")
    elif intent == "question":
        print("Bot: I'm sorry, I don't have the information you're looking for.")
    else:
        print("Bot: I'm not sure how to respond.")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hello! I'm your intent-based chatbot. Type 'bye' to exit.
You: what do you do?
Bot: Hello! How can I help you?
You: what is the colour of indian flag
Bot: Goodbye! Have a great day.
You: bye
Bot: Goodbye!


# Using AI to response user queries

In [19]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained GPT-2 model and tokenizer
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Function to generate a response
def generate_response(prompt, max_length=50):
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    output = model.generate(input_ids, max_length=max_length, num_beams=5, no_repeat_ngram_size=2, top_k=50, top_p=0.95)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Start chatting
print("Hello! I'm your chatbot using transformers. Type 'bye' to exit.")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'bye':
        print("Bot: Goodbye!")
        break
    response = generate_response(user_input)
    print("Bot:", response)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Hello! I'm your chatbot using transformers. Type 'bye' to exit.
You: who is the current president of india?


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Bot: who is the current president of india?

I don't know. I'm not sure. But I do know that there's a lot of people in the country who are very concerned about what's going on. And I think that's
You: what is your name?


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Bot: what is your name?

I don't have a name. I'm not sure if it's my real name or something else. It's just that I have no idea what it is. So I guess I'll have to wait and see
You: bye
Bot: Goodbye!


# Using langchain

With the help of we can use latest LLM models for this tutorial we are using tiiuae/falcon-7b-instruct model from huggingface. We will cover langchain in detail in upcoming tutorial in detail.

Falcon-7B-Instruct is a 7B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets. It is made available under the Apache 2.0 license. Model Card: https://huggingface.co/tiiuae/falcon-7b-instruct

We will be using this model via huggingface api for free. You can get your api key from: https://huggingface.co/settings/tokens

In langchain tutorial we will also check out other ways of using these models.

In [1]:
!pip install langchain --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m776.4 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m396.4/396.4 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.9/141.9 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
!pip install langchain-community langchain-core

Collecting langchain-community
  Downloading langchain_community-0.2.16-py3-none-any.whl.metadata (2.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata (7.2 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.2.16-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (

In [9]:
import os
from langchain import HuggingFaceHub, PromptTemplate, LLMChain

os.environ['API_KEY'] = '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'

In [10]:
model_id = 'tiiuae/falcon-7b-instruct'

falcon_llm = HuggingFaceHub(huggingfacehub_api_token=os.environ['API_KEY'],
                            repo_id=model_id,
                            model_kwargs={"temperature":0.8,"max_new_tokens":2000})

In [11]:
template = """

You are an AI assistant that provides helpful answers to user queries.

{question}

"""
prompt = PromptTemplate(template=template, input_variables=['question'])

In [12]:
falcon_chain = LLMChain(llm=falcon_llm,
                        prompt=prompt,
                        verbose=True)

In [13]:
print(falcon_chain.run("What are the colors in the Rainbow?"))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m

You are an AI assistant that provides helpful answers to user queries.

What are the colors in the Rainbow?

[0m

[1m> Finished chain.[0m


You are an AI assistant that provides helpful answers to user queries.

What are the colors in the Rainbow?

- Red
- Orange
- Yellow
- Green
- Blue
- Purple
- Indigo
- Turquoise
- Teal
- Cyan
- Magenta
- Violet
User 


In [16]:
print(falcon_chain.run("Who is the current president of India?"))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m

You are an AI assistant that provides helpful answers to user queries.

Who is the current president of India?

[0m

[1m> Finished chain.[0m


You are an AI assistant that provides helpful answers to user queries.

Who is the current president of India?

<strong>The current president of India is Ram Nath Kovind. He assumed office on July 25, 2017, following the retirement of Dr. Pranab Mukherjee. He is a former Governor of Bihar. President Kovind was born on December 1, 1950, in a small town named Makhdumpal in Uttarakhand.</strong>
