In [None]:
pip install chromadb

In [None]:
from langchain.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings,ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
import warnings
import sqlite3 , faiss , json
import numpy as np
import os , shutil

In [None]:
warnings.filterwarnings('ignore')

In [None]:
path = "data/books"
DB_PATH = "data/chroma"
embeddings_model = OpenAIEmbeddings()

In [None]:
def load_docs():
    loader = DirectoryLoader(path,glob="*.md")
    document = loader.load()
    return document

In [None]:
def split_in_chunks(document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(document)
    print(f"splitting done with {len(chunks)} chunks")
    return chunks

In [None]:
def store_chunks(chunks):
    if os.path.exists(DB_PATH):
        shutil.rmtree(DB_PATH)
    db = Chroma.from_documents(chunks, embeddings_model, persist_directory = DB_PATH)
    db.persist()
    print('databse created')

In [None]:
def load_chunks(query):
    db = Chroma(persist_directory = DB_PATH, embedding_function = embeddings_model)
    results = db.similarity_search_with_relevance_scores(query , k=3)
    if len(results) == 0 or results[0][1] < 0.7:
        print("no matching results")
        return 
    context_text = '\n\n---\n\n'.join([doc.page_content for doc , _score in results])
    return context_text

In [None]:
document = load_docs()
chunks = split_in_chunks(document)
store_chunks(chunks)

In [None]:
PROMPT_TEMPLATE = """
Answer the query based on the following context:
{context}

---

Answer the query based on the above context: {query}
Don't include the mention of context in the response
"""
query = "alice?"

In [None]:
context = load_chunks(query)
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context = context, query = query)

In [None]:
model = ChatOpenAI()
response_text = model.predict(prompt)

In [None]:
response_text