### Import Dependencies
---

In [1]:
import pandas as pd
import faiss

from pathlib import Path
from transformers import pipeline
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

### Import File Structure
---

In [2]:
file_path = ('./data/customers-100.csv')

In [3]:
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,Index,Customer Id,First Name,Last Name,Company,City,Country,Phone 1,Phone 2,Email,Subscription Date,Website
0,1,DD37Cf93aecA6Dc,Sheryl,Baxter,Rasmussen Group,East Leonard,Chile,229.077.5154,397.884.0519x718,zunigavanessa@smith.info,2020-08-24,http://www.stephenson.com/
1,2,1Ef7b82A4CAAD10,Preston,Lozano,Vega-Gentry,East Jimmychester,Djibouti,5153435776,686-620-1820x944,vmata@colon.com,2021-04-23,http://www.hobbs.com/
2,3,6F94879bDAfE5a6,Roy,Berry,Murillo-Perry,Isabelborough,Antigua and Barbuda,+1-539-402-0259,(496)978-3969x58947,beckycarr@hogan.com,2020-03-25,http://www.lawrence.com/
3,4,5Cef8BFA16c5e3c,Linda,Olsen,"Dominguez, Mcmillan and Donovan",Bensonview,Dominican Republic,001-808-617-6467x12895,+1-813-324-8756,stanleyblackwell@benson.org,2020-06-02,http://www.good-lyons.com/
4,5,053d585Ab6b3159,Joanna,Bender,"Martin, Lang and Andrade",West Priscilla,Slovakia (Slovak Republic),001-234-203-0635x76146,001-199-446-3860x3486,colinalvarado@miles.net,2021-04-17,https://goodwin-ingram.com/


In [4]:
loader = CSVLoader(file_path=file_path)
docs = loader.load_and_split()

In [5]:
# Initialize Hugging Face embeddings from langchain_huggingface
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [6]:
# Generate an initial FAISS index with the correct embedding dimension
# We create a dummy embedding to find the dimension size
dummy_embedding = embedding_model.embed_query(" ")
dimension = len(dummy_embedding)
index = faiss.IndexFlatL2(dimension)

In [7]:
# Initialize the vector store
vector_store = FAISS(
    embedding_function=embedding_model,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [8]:
# Add the documents to the FAISS vector store
vector_store.add_documents(documents=docs)

['2ef2bb75-93e4-4f14-b7b5-be584b652152',
 '8d33acc0-78fe-4392-81b8-a5f922cdd5b1',
 '684b63e7-db9d-4e55-b97f-1995d489c457',
 'bb55834e-41b5-468b-baf8-e2de963f802a',
 '64373394-538e-4504-9215-12168613ec97',
 'f174eb74-a231-471f-b495-cfbddc410183',
 '3aacaba4-b83c-4c44-8df7-20d45d5039c0',
 '455c751e-e480-4edb-9ca8-9841b33b4221',
 '32d1e390-167b-438b-a5ff-719f5444777a',
 '661e7204-845a-4108-8134-dae90cc034d9',
 'be17e9df-5463-4371-bfbe-7887e7cc2287',
 '0a83e6c7-d7e6-4b44-843e-f34e2ac79140',
 'eae60e95-d157-451c-9501-d8cb804cde6e',
 '9c2ed292-850c-441b-bf0d-6e979b542a47',
 '53468532-5fe3-44cd-8cbc-7331f8d396ae',
 '053ed2c1-1d2f-4c5a-ab80-370f484c7ecd',
 '2f521196-cd2f-4583-a4d7-d3af3014f0c5',
 '0e4d8b84-b9fc-4d1e-90c5-cc3e795f20a6',
 'd6958b8a-6256-4c9a-b5db-ab232982190c',
 '3e3bd6b5-24d2-435a-b572-a8f03fd20839',
 '07c63f64-a50a-4c4a-af28-ea586317b09b',
 '397c23fd-4b8a-4236-a164-2bf525b678e6',
 '87c86b90-7f24-4657-9dd8-a37c54b98cda',
 'fab230f4-e631-45e9-961e-f2314d37cf00',
 'e3009b0e-0c10-

In [9]:
retriever = vector_store.as_retriever()

In [10]:
# Initialize the Hugging Face pipeline for question-answering
hf_pipeline = pipeline("text2text-generation", model="google/flan-t5-small")

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [11]:
# Wrap the pipeline in HuggingFacePipeline to make it compatible with LangChain
llm = HuggingFacePipeline(pipeline=hf_pipeline)

  llm = HuggingFacePipeline(pipeline=hf_pipeline)


In [12]:
# Set up system prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
    
])

In [13]:
# Create the question-answer chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [15]:
answer = rag_chain.invoke({"input": "which company does sheryl Baxter work for?"})
answer['answer']



'Rasmussen Group City'

In [16]:
answer = rag_chain.invoke({"input": "which country does sheryl Baxter live in?"})
answer['answer']



'Chile'