### 1. Documents Loader

In [None]:
!pip install pandas
!pip install langchain-community
!pip install langchain-pinecone
!pip install langchain-openai

In [None]:
import pandas as pd

data = pd.read_csv("./data/sample_review_data.csv")
data = data.apply(lambda x: x.str.replace("\xa0", "").str.strip())

data['Answer'] = data['Answer'].str.split("위 도움말이 도움이 되었나요?").str[0]
data['content'] = data['Question'] + " >>> " + data['Answer']

In [None]:
from langchain_community.document_loaders import DataFrameLoader


loader = DataFrameLoader(data, page_content_column="content")

raw_documents = loader.load()
print(raw_documents[0].page_content[:100])
print(raw_documents[0].metadata)

### 2. Document Splitter

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150)
documents = text_splitter.split_documents(raw_documents)

### 3. Upload to Vector DB

In [None]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore


os.environ['PINECONE_API_KEY'] = ""
os.environ['OPENAI_API_KEY'] = ""

index_name = "playground" #qna-rag"
embeddings = OpenAIEmbeddings()

vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
# vectorstore.add_documents(documents)

### 4. Retrieve Data From Vector DB

In [None]:
query = "미성년자도 판매 회원 등록이 가능한가요?"
docs = vectorstore.similarity_search(query)
print(docs[0].page_content)