In [1]:
#import system packages
import os
import sys

#import vectorstores modules
import chromadb
# embeddings uses 384 dimensions, and named sentence-transformers/all-MiniLM-L6-v2
from chromadb.utils import embedding_functions
from langchain.vectorstores.chroma import Chroma
from langchain_core.embeddings import Embeddings
from chromadb.api.types import EmbeddingFunction
from langchain_chroma import Chroma

#import langchain modules
from langchain_ollama import ChatOllama
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (unstructured,
                                                  UnstructuredExcelLoader,
                                                  CSVLoader,
                                                  PyMuPDFLoader,
                                                  Docx2txtLoader,
                                                 )
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

In [2]:
llm = ChatOllama(
   model="llama3",
   temperature=0,)

In [3]:
class ChromaEmbeddingsAdapter(Embeddings):
    def __init__(self, ef: EmbeddingFunction):
        self.ef = ef

    def embed_documents(self, texts):
        return self.ef(texts)

    def embed_query(self, query):
        return self.ef([query])[0]
embeddings = ChromaEmbeddingsAdapter(embedding_functions.DefaultEmbeddingFunction())

collection_name = "ntu_documents"
client = chromadb.PersistentClient("database/")

In [4]:
datapath = "D:/NTU File/Semester 6/Intro to Philosophy/"
def pdf_load(file_path):
    documents = []
    for file in os.listdir(file_path):
        path = file_path + file
        loader = PyMuPDFLoader(path)
        pdf_doc = loader.load()
        documents.extend(pdf_doc)
    return documents
post = pdf_load(datapath)

In [13]:
vectorstore = Chroma.from_documents(documents = post, embedding = embeddings, collection_name = collection_name)
#vectorstore = Chroma(collection_name,embeddings)
retriever = vectorstore.as_retriever()

In [14]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [16]:
prompt = hub.pull("rlm/rag-prompt")
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print(rag_chain.invoke("what do you know about philosophy"))

Based on the provided context, philosophy is a discipline that involves reflecting upon the foundations of almost everything, taking doubts seriously. It's about questioning what we know and seeking good reasons for our beliefs and judgments. Philosophy is also concerned with normativity, exploring what counts as a good reason and what criteria should be used to evaluate knowledge.
