In [1]:
from tqdm import tqdm
from langchain_groq import ChatGroq
from langchain_community.chat_models import ChatOllama
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.messages.ai import AIMessage
from langchain_core.messages.human import HumanMessage
from langchain_core.messages.system import SystemMessage
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Optional
from datetime import datetime
from enum import Enum
import pandas as pd
import time
import os
import yaml

In [2]:
question_df = pd.read_csv('./multiple_choice_questions.csv')

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
loader = WebBaseLoader(question_df.iloc[0]["url"])
data = loader.load()
splits = text_splitter.split_text(data[0].page_content.replace("\n", " "))

In [5]:
embeddings = OllamaEmbeddings(model='llama3')

In [6]:
vectorstore = FAISS.from_texts(texts=splits, embedding=embeddings)

KeyError: 'question'

In [7]:
out = vectorstore.similarity_search(question_df.iloc[0]['text'], k=5)

In [8]:
out

[Document(page_content="Gruda, N. (2013). “Good agricultural practices for greenhouse vegetable crops—principles for mediterranean climate areas, 12: soilless culture,” in FAO Plant Production and Protection Paper. (Rome: Food and Agriculture Organization of the United Nations).    Smith, P., Bustamante, M., Ahammad, H., Clark, H., Dong, H., Elsiddig, E. A., et al. (2014). “Agriculture, forestry and other land use (AFOLU),” in Climate Change 2014: Mitigation of Climate Change. Contribution of Working Group III to the Fifth Assessment Report of the Intergovernmental Panel on Climate Change, eds. O. Edenhofer, R. Pichs-Madruga, Y. Sokona, E. Farahani, S. Kadner, K. Seyboth,. (New York, NY: Cambridge University Press), 112. Google Scholar    Springmann, M., Clark, M., Mason-D'Croz, D., Wiebe, K., Bodirsky, B. L., Lassaletta, L., et al. (2018). Options for keeping the food system within environmental limits. Nature 562, 519–525. doi: 10.1038/s41586-018-0594-0 PubMed Abstract | CrossRef Ful

In [18]:
class Choice(BaseModel):
    answer: str = Field(..., description="The answer to the question")

In [20]:
parse = JsonOutputParser(pydantic_object=Choice)

In [21]:
template = PromptTemplate(
    template="{format_instructions} Which of the following is the most likely answer to the following question: {question}? Choices: {choices} References: {references}",
    input_variables=["question", "choices", "references"]
)

In [11]:
llm = ChatOllama(model="llama3")

In [12]:
from langchain_core.output_parsers import StrOutputParser

In [22]:
chain = template | llm | parse

In [23]:
chain.invoke({
    "question": question_df.iloc[0]['text'],
    "choices": question_df.iloc[0]['choices'],
    "references": " ".join([i.page_content for i in out]),
    "format_instructions": parse.get_format_instructions()
})

{'answer': '300 times higher'}

In [17]:
question_df.iloc[0]['answer']

'300 times higher'