## 6.0 Dependency

In [None]:
import logging

# logger 객체 생성 및 로그 레벨을 DEBUG로 설정
logger = logging.getLogger("rag")
logger.setLevel(logging.DEBUG)

# 스트림 핸들러 생성 및 logger에 추가 (콘솔 출력을 위해)
handler = logging.StreamHandler()
logger.addHandler(handler)

In [None]:
from langchain.chat_models import ChatOpenAI

# ChatOpenAI 인스턴스 생성
llm = ChatOpenAI()

# logger를 사용하여 디버그 메시지 로깅
logger.debug(type(llm))

## 6.1 Data Loaders and Splitters 

In [None]:
# Dependency
# !pip install "unstructured[all-docs]"

In [None]:
from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader

In [None]:
loader = TextLoader("files/chapter_one.txt")

## 6.1.2 Splitters

In [None]:
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    CharacterTextSplitter,
)

In [None]:
splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

In [None]:
docs = loader.load_and_split(text_splitter=splitter)
len(docs)

## 6.3 Vectors

In [None]:
from langchain.embeddings import OpenAIEmbeddings

In [None]:
embedding = OpenAIEmbeddings()

In [None]:
embedding.embed_documents()

## 6.4 Vector Store

In [None]:
import os
import dotenv
import pinecone
from langchain.vectorstores.pinecone import Pinecone

dotenv.load_dotenv(dotenv.find_dotenv())

In [None]:
# initialize pinecone
pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.getenv("PINECONE_ENV"),  # next to api key in console
)

In [None]:
# First, check if our index already exists. If it doesn't, we create it
index_name = "open-ai"
if index_name not in pinecone.list_indexes():
    # we create a new index
    print("Create a new index")
    pinecone.create_index(name=index_name, metric="cosine", dimension=1536)

In [None]:
# docsearch = Pinecone.from_documents(docs, embedding, index_name='open-ai')
docsearch = Pinecone.from_existing_index(index_name, embedding)

## 6.4.2 Caching

In [None]:
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore

In [None]:
cache_dir = LocalFileStore("./.cache/")
cahced_embeddings = CacheBackedEmbeddings.from_bytes_store(embedding, cache_dir)