In [3]:
from langchain.chat_models import init_chat_model
from langchain_chroma import Chroma
from langchain_nomic import NomicEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import ChatPromptTemplate
from langchain_text_splitters import CharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_huggingface import HuggingFaceEndpointEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv
import os

# Load document and chunk it into smaller "sub documents"

In [4]:
file_path = "article/ziegler.pdf"
loader = PyPDFLoader(file_path)
doc = loader.load()

In [5]:
len(doc)

26

In [6]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, chunk_overlap=0
)
pages = text_splitter.split_documents(doc)

# Create LLM | embedding

In [7]:
load_dotenv()
os.environ["GROQ_API_KEY"] = os.environ.get('GROQ_API_KEY')
HUGGINGFACE_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
os.environ["NOMIC_API_KEY"] = os.environ.get('NOMIC_API_KEY')

In [8]:
llm = init_chat_model("llama3-8b-8192", model_provider="groq")

In [9]:
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")

# Create retriever

In [None]:
%%time
vector_store = Chroma(
    embedding_function=embeddings,
    persist_directory="./db")
give souce
vector_store.add_documents(documents=pages)

In [24]:
db = Chroma(persist_directory="./db",
            embedding_function=embeddings)

In [28]:
retriever = db.as_retriever()

In [29]:
print(retriever)

tags=['Chroma', 'NomicEmbeddings'] vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7f702a7e66d0> search_kwargs={}


# Create prompt template

In [30]:
template = """<bos><start_of_turn>user\nAnswer the question based only on the following context and extract out a meaningful answer. \
Please write in full sentences with correct spelling and punctuation. if it makes sense use lists. \
If the context doesn't contain the answer, just respond that you are unable to find an answer. \

CONTEXT: {context}

QUESTION: {question}

<end_of_turn>
<start_of_turn>model\n
ANSWER:"""

In [31]:
prompt = ChatPromptTemplate.from_template(template)

# Create the RAG chain using LCEL with prompt printing and streaming output

In [32]:
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)

# Function to ask questions

In [33]:
def ask_question(question):
    print("Answer:\n\n", end=" ", flush=True)
    for chunk in rag_chain.stream(question):
        print(chunk.content, end="", flush=True)
    print("\n")

# Example usage

In [None]:
if __name__ == "__main__":
    while True:
        user_question = input("Ask a question (or type 'quit' to exit): ")
        if user_question.lower() == 'quit':
            break
        answer = ask_question(user_question)

Ask a question (or type 'quit' to exit):  list the tables and its description mention in the text


Answer:

 Based on the provided context, I was able to extract the following tables and their descriptions:

1. **Table 9**: Agreement probabilities for two tasks, i.e. the probability of both individuals choosing the same sample as best, out of 4. of labelers, and pairs of authors.
2. **Table 10**: Mock sentiment continuation without a KL penalty.
3. **Table 11**: Sentiment continuation.
4. **Table 12**: Continuations for the descriptiveness continuation task, with contexts randomly sampled from BookCorpus test set.
5. **Table 13 to 15**: TL;DR summarization.
6. **Table 16 to 18**: CNN/Daily Mail summarization.

Please let me know if you would like me to extract any other information.



Ask a question (or type 'quit' to exit):  give the extract from table 16


Answer:

 I am unable to find an answer to the question because there is no Table 16 mentioned in the provided context.



Ask a question (or type 'quit' to exit):  give the extract from cnn/daily mail summarization


Answer:

 Based on the provided context, I was unable to find an answer that directly matches the question "give the extract from CNN/Daily Mail summarization". However, I can extract relevant information related to the CNN/Daily Mail summarization task.

From Table 7, we can see that the frequency with which generated summaries copy the first 3 words of the article as the first 3 words of the summary is:

* Zero-shot: 6/30
* 60k fine-tuned: 26/30
* Supervised: 8/30
* Supervised + 60k fine-tuned: 11/30

From Table 4, we can see the ROUGE evaluations of summarization models on the CNN/Daily Mail test set:

* 60k fine-tuned: 16.800 R-1, 2.884 R-2, 14.011 R-L, 11.232 R-A
* Supervised: 17.535 R-1, 3.124 R-2, 14.969 R-L, 11.877 R-A
* Supervised + 60k fine-tuned: 18.434 R-1, 3.542 R-2, 15.457 R-L, 12.478 R-A
* 60k ofﬂine fine-tuned: 16.632 R-1, 2.699 R-2, 13.984 R-L, 11.105 R-A

From Table 5, we can see the human evaluation of summarization models on the CNN/Daily Mail test set:

* 60k fine-

Ask a question (or type 'quit' to exit):  what is ROUGE evaluations?


Answer:

 Based on the provided context, ROUGE evaluations refer to a set of numerical results that measure the performance of summarization models. Specifically, ROUGE (Recall-Oriented Understudy for GIST Evaluation) is a popular evaluation metric for summarization tasks, which assesses the similarity between a generated summary and a reference summary. The ROUGE results in the context are presented in Table 4, which compares the performance of different summarization models on the CNN/Daily Mail and TL;DR datasets.



Ask a question (or type 'quit' to exit):  what measurement are used in the text?


Answer:

 Based on the provided context, the following measurements are mentioned:

1. **n-grams**: consecutive sequences of words in a single sentence in a summary, which count as novel if they do not appear consecutively in the article.
2. **Bigrams**: consecutive sequences of two words in a summary, used to illustrate the location of the longest common subsequence between the context article/post and the summary.
3. **Agreement probabilities**: the probability of both individuals choosing the same sample as best, out of 4 (Table 9).
4. **Sentiment**: the probability of both individuals choosing the same sentiment (Table 9).
5. **Descriptiveness**: the probability of both individuals choosing the same level of vividness in their descriptions (Table 9).
6. **Confidence model**: a model used to evaluate the performance of each labeler on the benchmark distribution.
7. **Rate of agreement**: the rate of agreement between authors and Scale labelers, pairs of authors, and between random r

Ask a question (or type 'quit' to exit):  give overview on the experiment done?


Answer:

 Based on the provided context, I will extract a meaningful answer to the question. 

The experiment done is a fine-tuning of language models from human preferences. The objective is to investigate how human preferences can be used to improve the performance of language models. The experiment is designed to test the effectiveness of different models in generating summaries that are more accurate and informative than the original text.

The experiment consists of several tasks, including sentiment analysis, descriptiveness evaluation, and summarization. The models are trained on a dataset of articles from CNN and Daily Mail, and are evaluated on their ability to generate summaries that accurately capture the main points of the text.

The experiment uses a combination of human labeling and machine learning algorithms to evaluate the performance of the models. Human labelers are used to evaluate the quality of the summaries generated by the models, and the results are used to fin