In [1]:
from langchain import hub
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores.supabase import SupabaseVectorStore
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAI
from langchain.output_parsers import RegexParser

In [2]:
from utils.utils import initialize_environment_variables, initialize_subabase_client, initialize_openai_client

initialize_environment_variables("../.env")

# Initialize Supabase Client
supabase_client = initialize_subabase_client()

# Initialize OpenAI Client
openai_client = initialize_openai_client()

In [23]:
def get_uploaded_ids(filepath, upload_table_name):
    data, _cnt = supabase_client.table(upload_table_name).select('id').contains('metadata', {'source':filepath}).execute()
    return [data[1][i].get("id") for i in range(len(data[1]))]

In [26]:
data, count = supabase_client.table('vector_store').select('id').contains('metadata', {'source':'docs/Pedagogy.pdf'}).execute()

2024-01-29 23:26:00,377:INFO - HTTP Request: GET https://smxwnqdpxcvclxhbvhsf.supabase.co/rest/v1/vector_store?select=id&metadata=cs.%7B%22source%22%3A%20%22docs%2FPedagogy.pdf%22%7D "HTTP/1.1 200 OK"


In [27]:
[data[1][i].get("id") for i in range(len(data[1]))]

['b46b69bc-e7f8-4f84-9ff2-3c4c79efb6ac',
 'c2cd9861-fa8e-4841-91cc-845affa22881',
 '1f8dfcc4-fc5b-42de-ba94-a361d031ec23',
 '3e73d183-0a7c-4b94-abd2-11588dfca8e1',
 'd284e98e-ece0-4afc-bbb1-a20ab04cf667',
 'ad07c0e6-2179-4602-aaa8-da24ce9fd605',
 '485cda94-f4b7-4f0d-aa0a-2c418f8bc778',
 '4cfe8ba9-7b5e-4544-af47-307376120b45',
 'b683595d-3a94-428b-80ef-8098878c9df5',
 '9ac50eab-cee1-46fd-83b4-a7c0fd8393c5',
 'c03045c2-1022-4d3b-83cd-3f399b5352fe',
 'a3e8652d-dd73-4857-b1c1-30b13d6cf99f',
 '839828d8-f0dd-4fd7-b98a-1993492d161d',
 '8d70fcbc-8b4a-422a-aef7-563033178612',
 '874e3ccd-a518-41f6-96d7-9535a8dff356',
 'b87b7d77-df65-4cdf-a4be-e6b165235892',
 '51c8fd19-1ada-4613-bed2-420097e576aa',
 '3459b8c8-f61e-44b9-a932-fa810d03f021',
 'aace7419-c530-4455-8197-d1f9eb4e26d4',
 '8d63f78c-90b9-4459-99cc-891948ee9ca2',
 '81f712e9-507c-43e3-ba8b-74fb249265a4',
 '4773fbb8-ef9e-4a33-a8dc-a12ec62016d4',
 'df280eaf-4168-4a39-bb49-2ae01f24668f',
 '97d32354-7b11-4272-a72f-8177c5ea48d4',
 'de91d10b-c037-

In [28]:
data[1][0].get("id")

'b46b69bc-e7f8-4f84-9ff2-3c4c79efb6ac'

In [29]:
len(data[1])

32

In [5]:
count

('count', None)

In [63]:
query = "What is education?"

In [5]:
embeddings = OpenAIEmbeddings()

In [32]:
vector_store = SupabaseVectorStore(
        client=supabase_client,
        embedding=embeddings,
        table_name="documents_new",
        query_name="match_documents_mmr",
    )

In [64]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 3, 'lambda_mult': 0.25})

In [65]:
mmr_docs = retriever.get_relevant_documents(query)

2024-01-28 22:24:14,841:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-01-28 22:24:15,070:INFO - HTTP Request: POST https://smxwnqdpxcvclxhbvhsf.supabase.co/rest/v1/rpc/match_documents_mmr?limit=20 "HTTP/1.1 200 OK"


In [66]:
mmr_docs

[Document(page_content='11. Chazan, Barry (2022). "What is "Education"?" (https://link.springer.com/chapter/10.1007/978-\n3-030-83925-3_3). Principles and Pedagogies in Jewish Education. Springer International\nPublishing. pp. 13–21. doi:10.1007/978-3-030-83925-3_3 (https://doi.org/10.1007%2F978-3-\n030-83925-3_3). ISBN 978-3-030-83925-3. S2CID 239896844 (https://api.semanticscholar.o\nrg/CorpusID:239896844). Archived (https://web.archive.org/web/20220512120351/https://lin\nk.springer.com/chapter/10.1007/978-3-030-83925-3_3) from the original on 12 May 2022.\nRetrieved 13 May 2022.\n12. Marshall, James D. (2006). "The Meaning of the Concept of Education: Searching for the\nLost Arc" (https://www.jstor.org/stable/42589880). Journal of Thought. 41 (3): 33–37.\nISSN 0022-5231 (https://www.worldcat.org/issn/0022-5231). JSTOR 42589880 (https://www.\njstor.org/stable/42589880). Archived (https://web.archive.org/web/20220512120351/https://w\nww.jstor.org/stable/42589880) from the original on 

In [44]:
matched_docs = vector_store.similarity_search_with_relevance_scores(query)

2024-01-28 22:02:48,795:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-01-28 22:02:48,978:INFO - HTTP Request: POST https://smxwnqdpxcvclxhbvhsf.supabase.co/rest/v1/rpc/match_documents_mmr?limit=4 "HTTP/1.1 200 OK"


In [45]:
matched_docs

[(Document(page_content='41. UNESCO (2019). Empowering students for just societies: a handbook for secondary school\nteachers (https://unesdoc.unesco.org/ark:/48223/pf0000370901?posInSet=2&queryId=a055\n95b4-f5b2-48ce-b649-f8177b6036fe). UNESCO. ISBN 978-92-3-100340-0.\n42. Kincheloe, Joe (2008). Critical Pedagogy Primer. New York: Peter Lang.\nISBN 9781433101823.\n43. Kincheloe, Joe L.; Horn, Raymond A., eds. (2007). The Praeger Handbook of Education and\nPsychology (https://books.google.com/books?isbn=0313331235). Praeger. p. 552.\nISBN 978-0313331237.\n44. Jones, Leo. (2007). The Student-Centered Classroom. Cambridge University Press.\n45. Rogers, C. R. (1983). Freedom to Learn for the \'80s. New York: Charles E. Merrill Publishing\nCompany, A Bell & Howell Company.\n46. Pedersen, S., & Liu, M. (2003). Teachers\' beliefs about issues in the implementation of a\nstudent-centered learning environment. Educational Technology Research and\nDevelopment, 51(2), 57–76.\n47. Hannafin, M. J.

In [53]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

In [54]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [55]:
from langchain_openai import ChatOpenAI

In [56]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [67]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [68]:
for chunk in rag_chain.stream(query):
    print(chunk, end="", flush=True)

2024-01-28 22:24:51,115:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-01-28 22:24:51,314:INFO - HTTP Request: POST https://smxwnqdpxcvclxhbvhsf.supabase.co/rest/v1/rpc/match_documents_mmr?limit=20 "HTTP/1.1 200 OK"
2024-01-28 22:24:52,331:INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Education is the theory and practice of learning, encompassing the imparting of knowledge and skills in an educational context. It involves the interactions between teachers and students and is influenced by social, political, and psychological factors. Pedagogy can be seen as both a science and an art, with various approaches and methods used to achieve educational goals.

In [11]:
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_rerank", return_intermediate_steps=True, prompt=PROMPT)

In [12]:
query_embeddings = embeddings.embed_query(query)

2024-01-28 21:03:41,017:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [13]:
relevant_chunks = vector_store.similarity_search_by_vector_with_relevance_scores(query_embeddings,k=2)

2024-01-28 21:03:50,734:INFO - HTTP Request: POST https://smxwnqdpxcvclxhbvhsf.supabase.co/rest/v1/rpc/match_documents?limit=2 "HTTP/1.1 200 OK"


In [11]:
chunk_docs=[]
for chunk in relevant_chunks:
        chunk_docs.append(chunk[0])

In [12]:
results = chain({"input_documents": chunk_docs, "question": query})

  warn_deprecated(
2024-01-27 18:04:01,009:INFO - HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"


In [13]:
text_reference=""
for i in range(len(results["input_documents"])):
    text_reference+=results["input_documents"][i].page_content
output={"Answer":results["output_text"],"Reference":text_reference}

In [14]:
output

{'Answer': ' Unterricht is a German word that translates to "instruction" or "teaching." It refers to the process of imparting knowledge or skills to students in a formal setting, such as a classroom or online course. ',
 'Reference': 'Paulo Reglus Neves Freire (September 19, 1921 – May 2, 1997) was a Brazilian educator and\nphilosophe r who was a leading advoc ate of critical pedagogy . He is best know n for his influential work\nPedagogy  of the Oppressed, which is generally considered one of the founda tional texts of the critical\npedagogy m ovement.[26][27][28]\nConfucius (551–479 BCE) stated that authority has the respons ibility to provide oral and written instruction\nto the people unde r the rule, and "should do them good in every possible way."[21] One of the deepest\nteachings of Confucius may have been the superiority of personal exemplification over explicit rules of\nbehavior. His moral teachings emphasized self-cultivation, emulation of moral exemplars, and the\nattainme

In [15]:
def getanswer(query):
    embeddings = OpenAIEmbeddings()
    vector_store = SupabaseVectorStore(
        client=supabase_client,
        embedding=embeddings,
        table_name="documents_new",
    )
    query_embeddings = embeddings.embed_query(query)
    relevant_chunks = vector_store.similarity_search_by_vector_with_relevance_scores(query_embeddings,k=2)
    chunk_docs=[]
    for chunk in relevant_chunks:
        chunk_docs.append(chunk[0])
    results = chain({"input_documents": chunk_docs, "question": query})
    text_reference=""
    for i in range(len(results["input_documents"])):
        text_reference+=results["input_documents"][i].page_content
    output={"Answer":results["output_text"],"Reference":text_reference}
    return output

In [16]:
getanswer("Was ist Unterricht?")

2024-01-27 18:04:10,677:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-01-27 18:04:10,856:INFO - HTTP Request: POST https://smxwnqdpxcvclxhbvhsf.supabase.co/rest/v1/rpc/match_documents?limit=2 "HTTP/1.1 200 OK"
2024-01-27 18:04:12,146:INFO - HTTP Request: POST https://api.openai.com/v1/completions "HTTP/1.1 200 OK"


{'Answer': ' Unterricht ist ein Prozess, bei dem Wissen und Fähigkeiten vermittelt werden, um das Verständnis und die Fähigkeiten der Lernenden zu fördern. Es kann in verschiedenen Formen stattfinden, wie z.B. im Klassenzimmer, online oder durch Fernunterricht.',
 'Reference': 'Paulo Reglus Neves Freire (September 19, 1921 – May 2, 1997) was a Brazilian educator and\nphilosophe r who was a leading advoc ate of critical pedagogy . He is best know n for his influential work\nPedagogy  of the Oppressed, which is generally considered one of the founda tional texts of the critical\npedagogy m ovement.[26][27][28]\nConfucius (551–479 BCE) stated that authority has the respons ibility to provide oral and written instruction\nto the people unde r the rule, and "should do them good in every possible way."[21] One of the deepest\nteachings of Confucius may have been the superiority of personal exemplification over explicit rules of\nbehavior. His moral teachings emphasized self-cultivation, emul