#### Samples source: [LangChain Docs](https://python.langchain.com/docs/versions/migrating_chains/retrieval_qa/)

In [None]:
# Specify here your MODEL; 
# - e.g. anthropic.claude-3-5-sonnet-20240620-v1:0, meta.llama3-8b-instruct-v1:0, anthropic.claude-3-5-haiku-20241022-v1:0
MODEL_ID="meta.llama3-8b-instruct-v1:0"

In [None]:
import boto3
from pprint import pprint

### Creación de in-memory Vector Store usando URL

In [None]:
# Load docs
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_aws import BedrockEmbeddings
from langchain_aws import ChatBedrock

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

# Embeddings Model
bedrock = boto3.client(service_name='bedrock-runtime')
titan_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v2:0",
                                     client=bedrock)

# Store splits
vectorstore = FAISS.from_documents(documents=all_splits, embedding=titan_embeddings)

# LLM
llm = ChatBedrock(
    model_id=MODEL_ID,
    model_kwargs=dict(temperature=0)
)

### LCEL: Query Vector Store

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate

# Simple prompt
prompt = PromptTemplate.from_template("""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:
""")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# RunnablePassthrough: pass directly the user question, without modification
qa_chain = (
    {
        "context": vectorstore.as_retriever() | format_docs,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
    | StrOutputParser()
)

response = qa_chain.invoke("What are autonomous agents?")
print(response)

---
#### Cómo usar el CSVLoader de LangChain
> Mas info en: [LangChain Docs](https://python.langchain.com/docs/integrations/document_loaders/csv/)

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path="./data/ecomm_sample_etl_output/Mixed_data_Arts_Dolls_Surveillance.csv",
                   source_column="subcategory_1",
                   csv_args={
                       "delimiter": ","
                       },)

data = loader.load()

In [None]:
data

### Crea FAISS Vector Store con los docs de eComm, usando CSVLoader

In [None]:
# Load docs
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_aws import BedrockEmbeddings
from langchain_aws import ChatBedrock
from langchain_community.document_loaders.csv_loader import CSVLoader

# Load from local data
loader = CSVLoader(file_path="./data/ecomm_sample_etl_output/Mixed_data_Arts_Dolls_Surveillance.csv",
                   source_column="subcategory_1",
                   csv_args={
                       "delimiter": ","
                       },)

data = loader.load()

# Do we need chunking?
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

# Embeddings Model
bedrock = boto3.client(service_name='bedrock-runtime')
titan_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v2:0",
                                     client=bedrock)

# Store splits
vectorstore = FAISS.from_documents(documents=all_splits, embedding=titan_embeddings)

In [None]:
# LLM
llm = ChatBedrock(
    model_id=MODEL_ID,
    model_kwargs=dict(temperature=0,
                      max_tokens=None)
)

### Vamos a pausar rápido, para ver el chunking interno

In [None]:
# Load docs
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.csv_loader import CSVLoader

# Load from local data
loader = CSVLoader(file_path="./data/ecomm_sample_etl_output/Mixed_data_Arts_Dolls_Surveillance.csv",
                   source_column="subcategory_1",
                   csv_args={
                       "delimiter": ","
                       },)

data = loader.load()

# Do we need chunking?
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)


In [None]:
pprint(all_splits)

In [None]:
# pprint(all_splits[0].page_content[:1000])
pprint(all_splits[1].metadata)

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are a string parser. 
            Your task is to receive a string input in a specific format, and return it in a different format.
            The format you must return is like the following:

            === Source: Dolls ===

            --- Row: 0 ---
                product_id: a4c653055a9b0e7b5c1fa1cf68be9537
                product_name: Barbie Doll House
                manufacturer: Matell
                price: 34
                number_available_in_stock: 13
                number_of_reviews: 3
                average_review_rating: 3.9
                subcategory_1: Dolls
                subcategory_2: 
                subcategory_3: 
                subcategory_4: 
                category: Toys

            === Source: Art Sand ===

            --- Row: 1 ---
                product_id: 68750ff6d9a5808ed0360e48d1204215
                product_name: Security Fashion Hourglass 10 Minutes Sand Timer -Orange
                manufacturer: Generic
                price: 5.21
                number_available_in_stock: 10
                number_of_reviews: 8
                average_review_rating: 5
                subcategory_1: Art Sand
                subcategory_2: 
                subcategory_3: 
                subcategory_4: 
                category: Arts & Crafts
            
            """,
        ),
        ("human", """Below you have the input, coming from a CSVLoader generated by the LangChain document loader: 
            <input>
            {input}
            </input>
         """),
    ]
)

chain = prompt | llm
response = chain.invoke(
    {
        "input": all_splits,
    }
)

In [None]:
pprint(response.content)

### Query ecommerce data from Vector Store

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate

# Simple prompt
prompt = PromptTemplate.from_template("""You are a friendly assistant for an ecommerce store. Look for the products the customer is looking for. Ask the user if they want more details, such as reviews or price. Try upselling.
Question: {question} 
Context: {context} 
Answer:
""")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# RunnablePassthrough: pass directly the user question, without modification
qa_chain = (
    {
        "context": vectorstore.as_retriever() | format_docs,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
    | StrOutputParser()
)

response = qa_chain.invoke("Do you have dolls? If so, what's the price")
print(response)