In [1]:
from linecache import cache

from IPython.core.debugger import prompt
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
import nltk
import ssl
from dotenv import load_dotenv
from nltk.data import retrieve
from sympy.physics.units import temperature

load_dotenv(dotenv_path=".env")
cache_dir = LocalFileStore("./.cache/")

# SSL 인증서 무시
ssl._create_default_https_context = ssl._create_unverified_context
nltk.download('punkt')

llm = ChatOpenAI()

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)
loader = UnstructuredFileLoader("./files/ohtani.txt")

docs = loader.load_and_split(splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=vectorstore.as_retriever(),
)

chain.run("Which team does ohtani play baseball")

chain.run("Describe Los Angeles Angels")

[nltk_data] Downloading package punkt to /Users/leehamin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
  llm = ChatOpenAI()
  loader = UnstructuredFileLoader("./files/ohtani.txt")
  embeddings = OpenAIEmbeddings()
  chain.run("Which team does ohtani play baseball")


'The Los Angeles Angels are a Major League Baseball team based in Anaheim, California. They are a member of the American League (AL) West division. The team was established in 1961 and has had various names throughout its history, including the Los Angeles Angels and the California Angels, before settling on the Los Angeles Angels in 2005. The team plays its home games at Angel Stadium of Anaheim. The Angels have had notable players throughout their history, with Shohei Ohtani being a prominent figure in recent years due to his exceptional two-way skills as both a pitcher and a hitter. Ohtani signed with the Angels in 2017, amidst initial skepticism from American media following a challenging debut MLB spring training in 2018. However, Ohtani went on to have an impressive 2018 MLB regular season, ultimately winning the AL Rookie of the Year award. His performance on both the mound and at the plate has solidified his status as a standout player for the Angels.'

In [4]:
from linecache import cache

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

import nltk
import ssl
from dotenv import load_dotenv

load_dotenv(dotenv_path=".env")
cache_dir = LocalFileStore("./.cache/")

# SSL 인증서 무시
ssl._create_default_https_context = ssl._create_unverified_context
nltk.download('punkt')

llm = ChatOpenAI(
    temperature=0.1
)

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)
loader = UnstructuredFileLoader("./files/ohtani.txt")

docs = loader.load_and_split(splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
    ),
    ("human", "{question}"),
])

chain = (
        {"context": retriever, "question": RunnablePassthrough()} 
         | prompt
         | llm)

chain.invoke("Describe ohtani")

[nltk_data] Downloading package punkt to /Users/leehamin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


AIMessage(content='Shohei Ohtani is a Japanese professional baseball player who is known for his exceptional skills as both a pitcher and a designated hitter. He has played for the Los Angeles Angels and currently plays for the Los Angeles Dodgers. Ohtani has been compared to Babe Ruth for his rare two-way abilities. He has won multiple awards, including the American League Most Valuable Player Award. In 2023, he signed a record-breaking 10-year, $700 million contract with the Dodgers.')