In [5]:
from linecache import cache

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
import nltk
import ssl
from dotenv import load_dotenv

load_dotenv(dotenv_path=".env")
cache_dir = LocalFileStore("./.cache/")

# SSL 인증서 무시
ssl._create_default_https_context = ssl._create_unverified_context
nltk.download('punkt')

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)
loader = UnstructuredFileLoader("./files/ohtani.txt")

docs = loader.load_and_split(splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)


[nltk_data] Downloading package punkt to /Users/leehamin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
vectorstore.similarity_search("where does ohtani live")

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[Document(page_content='Shohei Ohtani is a Japanese professional baseball player who currently plays for the Los Angeles Angels in Major League Baseball (MLB). Known as one of the most unique and talented players in modern baseball, Ohtani has made history as a successful two-way player, excelling both as a pitcher and a hitter. Born on July 5, 1994, in Oshu, Iwate, Japan, he quickly rose to fame with his extraordinary athleticism and versatility, drawing comparisons to baseball legends such as Babe Ruth.\nHere’s an in-depth look at Ohtani’s career, skills, and impact:\n1. Early Career in Japan Ohtani began his professional career in Japan with the Hokkaido Nippon-Ham Fighters of Nippon Professional Baseball (NPB). From a young age, he demonstrated remarkable potential as both a pitcher and a batter. In NPB, he became known for his high-velocity fastball, reaching speeds up to 102.5 mph (165 km/h), as well as his powerful hitting. His performance in Japan gained international attention

In [26]:
from langchain.embeddings import OpenAIEmbeddings

embedder = OpenAIEmbeddings()

vector = embedder.embed_query("Hi")

[-0.036322986480640175,
 -0.0071525537020427285,
 -0.033717321155373486,
 -0.028688384432652807,
 -0.026890475618989093,
 0.034629305695597364,
 -0.012409485162835595,
 -0.007777913491865437,
 0.0019770491662491297,
 -0.0027147786061717666,
 0.024714744159695334,
 -0.0024949254895786054,
 -0.005817149896880665,
 -0.002981859320638053,
 0.006722618904747273,
 -0.003032343986197924,
 0.03379549264250042,
 -0.001487672592632281,
 0.02107984078880444,
 -0.009015605266425133,
 -0.0216530865643167,
 0.010318437929058481,
 0.006224285453199531,
 0.00710695475442829,
 -0.01225965958358404,
 0.0008386988292500364,
 0.0057878360548693246,
 -0.009895017732797776,
 -0.0030616578282092644,
 -0.02470171682027087,
 0.010774430199170421,
 -0.013731860697250676,
 -0.02453234836923758,
 -0.014122710682305185,
 0.0023874417902547415,
 -0.018891080127441177,
 0.0005988960857640726,
 -0.01128904922198252,
 0.0180963528179077,
 -0.009868961191303812,
 0.01306090242547479,
 -0.01123042153795984,
 -0.00915891