<a href="https://github.com/jmalbornoz/GDPR/blob/master/2A%20simple%20chatbot%20with%20Langchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A simple chatbot with Langchain
## Jose M Albornoz
### February 2024

# 0.- Imports

In [None]:
!pip install -q langchain

In [None]:
pip install -q sentence-transformers

In [None]:
pip install -q faiss-cpu

# 1.- Load text and split it into chunks

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
with open("./text_file.txt") as f:
    text_file = f.read()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=20)

In [None]:
chunks = text_splitter.split_text(text_file)

Here's what a chunk looks like:

In [None]:
chunks[15]

In [None]:
print(f" There are {len(chunks)} chunks")

# 2.- Import HugginFace embeddings

In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [None]:
embeddings = HuggingFaceEmbeddings()

# 3.- Embed the chunks and store the embeddings in a vector database

In [None]:
from langchain.vectorstores import FAISS

In [None]:
vectorStore = FAISS.from_texts(chunks, embeddings)

# 4.- Load the Falcon-7B-Instruct LLM

In [None]:
from langchain import HuggingFaceHub
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "YOU_HUGGINGFACE_API_KEY"

In [None]:
llm=HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature":0.1 ,"max_length":512})

# 5.- Create a question answering chain

In [None]:
from langchain.chains import RetrievalQA
from langchain.schema import retriever

In [None]:
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorStore.as_retriever())

In [None]:
query="Name examples of Italian symphonic bands whose names begin with A"

In [None]:
chain.invoke(query)