In [1]:
# import logging
# import sys

# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore
from IPython.display import Markdown, display

In [3]:
import os
import openai

# needed to synthesize responses later
os.environ["OPENAI_API_KEY"] = "sk-..."
openai.api_key = os.environ["OPENAI_API_KEY"]

# Documents

In [4]:
# load documents
documents = SimpleDirectoryReader("/home/surya/NEU/CS5100 FAI/Project/ResearchLens/uploads").load_data()

In [5]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

In [6]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# loads BAAI/bge-small-en
# embed_model = HuggingFaceEmbedding()

# loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")

In [7]:
from llama_index.core import Settings

Settings.embed_model = embed_model
Settings.text_splitter = text_splitter
Settings.llm = None

LLM is explicitly disabled. Using MockLLM.


In [8]:
index = VectorStoreIndex.from_documents(
    documents, embed_model=embed_model, transformations=[text_splitter]
)

disk away

In [9]:
index.storage_context.persist("./storage")

In [10]:
# # save index to disk
# index.storage_context.persist("./storage")

# storage_context = StorageContext.from_defaults(
#     persist_dir="./storage"
# )
# index = load_index_from_storage(storage_context=storage_context)

# Add a new document

In [11]:
len(documents)

14

In [12]:
index.docstore.to_dict().keys()

dict_keys(['docstore/metadata', 'docstore/data', 'docstore/ref_doc_info'])

In [14]:
index.summary

'None'

## CHeck doc exist

In [15]:
def is_exist(index, filepath):
    for node in index.docstore.docs.values():
        if node.metadata['file_path'] == filepath:
            return True
    return False

In [16]:
is_exist(index, '/home/surya/NEU/CS5100 FAI/Project/ResearchLens/uploads/test4.pdf')

False

## Insert doc

In [17]:
len(index.docstore.docs)

38

In [18]:
pdf_file  = "/home/surya/NEU/CS5100 FAI/Project/pdfreader/test2.pdf"

In [19]:
documents = SimpleDirectoryReader(input_files=[pdf_file]).load_data()

In [20]:
len(documents)

27

In [21]:
all_chunks = []

# Iterate over each document's text
for document in documents:
    # Assume each document has a text attribute containing its content
    document_text = document.text
    
    # Split the document's text into chunks
    chunks = text_splitter.split_text(document_text)
    
    # Add the chunks from this document to the list of all chunks
    all_chunks.extend(chunks)

len(all_chunks)

67

Number of documents after inserting these 27 document chunks must be:

67 + 85 = 152

In [22]:
for doc in documents:
    index.insert(doc)

In [23]:
len(index.docstore.docs)

107

close enough

## Test a query

In [24]:
chat_engine = index.as_chat_engine(
    chat_mode="condense_plus_context",
    context_prompt=(
        "You are a chatbot, able to have normal interactions, as well as talk"
        " about an essay discussing Paul Grahams life."
        "Here are the relevant documents for the context:\n"
        "{context_str}"
        "\nInstruction: Based on the above documents, provide a detailed answer for the user question below."
    ),
)

In [25]:
response = chat_engine.chat("""
At a high level, existing methods instill the desired behaviors into a language model using curated
sets of human preferences representing the types of behaviors that humans find safe and helpful. This
preference learning stage occurs after an initial stage of large-scale unsupervised pre-training on
a large text dataset. While the most straightforward approach to preference learning is supervised
fine-tuning on human demonstrations of high quality responses, the most successful class of methods
is reinforcement learning from human (or AI) feedback (RLHF/RLAIF; [12, 2]). 

What does the RLHF model do?
""".strip())

In [26]:
print(response)

system: You are a chatbot, able to have normal interactions, as well as talk about an essay discussing Paul Grahams life.Here are the relevant documents for the context:
page_label: 2
file_path: /home/surya/NEU/CS5100 FAI/Project/pdfreader/test2.pdf

Figure 1: DPO optimizes for human preferences while avoiding reinforcement learning. Existing methods
for fine-tuning language models with human feedback first fit a reward model to a dataset of prompts and
human preferences over pairs of responses, and then use RL to find a policy that maximizes the learned reward.
In contrast, DPO directly optimizes for the policy best satisfying the preferences with a simple classification
objective, fitting an implicit reward model whose corresponding optimal policy can be extracted in closed form.
we will show that the RL-based objective used by existing methods can be optimized exactly with a
simple binary cross-entropy objective, greatly simplifying the preference learning pipeline.
At a high level,

In [27]:
print(response.source_nodes[0].text)

Figure 1: DPO optimizes for human preferences while avoiding reinforcement learning. Existing methods
for fine-tuning language models with human feedback first fit a reward model to a dataset of prompts and
human preferences over pairs of responses, and then use RL to find a policy that maximizes the learned reward.
In contrast, DPO directly optimizes for the policy best satisfying the preferences with a simple classification
objective, fitting an implicit reward model whose corresponding optimal policy can be extracted in closed form.
we will show that the RL-based objective used by existing methods can be optimized exactly with a
simple binary cross-entropy objective, greatly simplifying the preference learning pipeline.
At a high level, existing methods instill the desired behaviors into a language model using curated
sets of human preferences representing the types of behaviors that humans find safe and helpful. This
preference learning stage occurs after an initial stage of large-