# Setup

## Importing Libraries

In [None]:
import getpass
import os
from langchain_community.document_loaders import JSONLoader
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveJsonSplitter
from langchain_community.vectorstores import Chroma
from langchain.storage import InMemoryByteStore
from langchain.embeddings import CacheBackedEmbeddings
from langchain_community.vectorstores import Qdrant

## Importing ENV Variables

In [None]:
os.environ["OPENAI_API_KEY"] = getpass.getpass()

## Setting Up Embedddings and In-Memory Cache

In [None]:
underlying_embeddings = OpenAIEmbeddings()

store = InMemoryByteStore()

cached_embedder = CacheBackedEmbeddings.from_bytes_store(
    underlying_embeddings, store, namespace=underlying_embeddings.model
)

In [None]:
list(store.yield_keys())

## Loading JSON Data

In [None]:
loader = JSONLoader(
    file_path='./example_data/facebook_chat.json',
    jq_schema='.messages[].content',
    text_content=False
)

json_data = loader.load()

## Splitting Data

In [None]:
splitter = RecursiveJsonSplitter(max_chunk_size=300)

In [None]:
json_chunks = splitter.split_json(json_data=json_data)

In [None]:
documents = splitter.create_documents(texts=[json_data])

## Creating Database

In [None]:
db = Chroma.from_documents(documents, OpenAIEmbeddings())

## Testing Similarity Search

In [None]:
query = "What times are CS100 offered"
docs = db.similarity_search(query)
print(docs[0].page_content)