In [None]:
# This cell installs all the necessary libraries for our project.

!pip install langchain langchain-community pypdf sentence-transformers faiss-cpu torch arxiv llama-cpp-python huggingface-hub



Collecting langchain-community
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting pypdf
  Downloading pypdf-6.1.0-py3-none-any.whl.metadata (7.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting arxiv
  Downloading arxiv-2.2.0-py3-none-any.whl.metadata (6.3 kB)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.16.tar.gz (50.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting requests<3,>=2 (from langchain)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7,>=0.6.7 (from langchain-communi

In [None]:
# This cell configures our project paths in Google Drive and downloads the papers.

import arxiv
import os

# Define the paths in your Google Drive
# IMPORTANT: Make sure you have created these folders in your Google Drive first!
DRIVE_BASE_PATH = '/content/drive/MyDrive/ResearchNavigator'
DATA_PATH = os.path.join(DRIVE_BASE_PATH, 'papers')
DB_FAISS_PATH = os.path.join(DRIVE_BASE_PATH, 'vectorstore/db_faiss')
MODEL_PATH_DIR = os.path.join(DRIVE_BASE_PATH, 'models')

# Create directories if they don't exist
os.makedirs(DATA_PATH, exist_ok=True)
os.makedirs(os.path.dirname(DB_FAISS_PATH), exist_ok=True)
os.makedirs(MODEL_PATH_DIR, exist_ok=True)

print("Downloading papers from arXiv...")
# Search for and download papers
search = arxiv.Search(
    query = "cat:cs.AI AND reinforcement learning",
    max_results = 25, # Let's start with 25 papers for the first run
    sort_by = arxiv.SortCriterion.SubmittedDate
)

for result in search.results():
    try:
        filename = f"{result.entry_id.split('/')[-1]}.pdf"
        if not os.path.exists(os.path.join(DATA_PATH, filename)):
             result.download_pdf(dirpath=DATA_PATH, filename=filename)
             print(f"Downloaded: {result.title}")
        else:
             print(f"Already exists: {result.title}")
    except Exception as e:
        print(f"Error downloading {result.title}: {e}")

print("\nFinished downloading papers.")


Downloading papers from arXiv...


  for result in search.results():


Downloaded: Adaptive Event-Triggered Policy Gradient for Multi-Agent Reinforcement Learning
Downloaded: Embedding Domain Knowledge for Large Language Models via Reinforcement Learning from Augmented Generation
Downloaded: Discrete Diffusion for Reflective Vision-Language-Action Models in Autonomous Driving
Downloaded: PEPS: Quantum-Inspired Reinforcement Learning for Coherent Reasoning Traces in LLMs
Downloaded: From Pheromones to Policies: Reinforcement Learning for Engineered Biological Swarms
Downloaded: An effective control of large systems of active particles: An application to evacuation problem
Downloaded: Exploration with Foundation Models: Capabilities, Limitations, and Hybrid Approaches
Downloaded: Analysis of approximate linear programming solution to Markov decision problem with log barrier function
Downloaded: bi-GRPO: Bidirectional Optimization for Jailbreak Backdoor Injection on LLMs
Downloaded: Frictional Q-Learning
Downloaded: UserRL: Training Interactive User-Centric 

In [None]:
# This cell will load the PDFs, split them into chunks, create embeddings,
# and save them to a FAISS vector database in your Google Drive.
# This step can take a few minutes.

from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import time

# Model for creating embeddings. "all-MiniLM-L6-v2" is efficient and effective.
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

print("Loading documents from PDF files...")
loader = PyPDFDirectoryLoader(DATA_PATH)
documents = loader.load()
print(f"Loaded {len(documents)} documents.")

print("Splitting documents into chunks...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(documents)
print(f"Split into {len(docs)} chunks.")

print("Creating embeddings using the GPU... (This may take a moment)")
# We specify 'cuda' to use the Colab GPU
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL,
    model_kwargs={'device': 'cuda'}
)

print("Building FAISS vector store...")
start_time = time.time()
db = FAISS.from_documents(docs, embeddings)
db.save_local(DB_FAISS_PATH)
print(f"Vector store created and saved in {time.time() - start_time:.2f} seconds.")
print(f"Database saved at: {DB_FAISS_PATH}")


Loading documents from PDF files...
Loaded 448 documents.
Splitting documents into chunks...
Split into 1982 chunks.
Creating embeddings using the GPU... (This may take a moment)


  embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Building FAISS vector store...
Vector store created and saved in 5.65 seconds.
Database saved at: /content/drive/MyDrive/ResearchNavigator/vectorstore/db_faiss


In [None]:
# We will download a quantized version of a powerful model from Hugging Face.
# This model file is a few gigabytes, so this will take some time.

from huggingface_hub import hf_hub_download

MODEL_NAME = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
MODEL_FILE = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
LLM_PATH = os.path.join(MODEL_PATH_DIR, MODEL_FILE)

if not os.path.exists(LLM_PATH):
    print(f"Downloading model to: {LLM_PATH}")
    hf_hub_download(
        repo_id=MODEL_NAME,
        filename=MODEL_FILE,
        local_dir=MODEL_PATH_DIR,
        local_dir_use_symlinks=False
    )
    print("Model download complete.")
else:
    print(f"Model already exists at: {LLM_PATH}")


Downloading model to: /content/drive/MyDrive/ResearchNavigator/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


mistral-7b-instruct-v0.2.Q4_K_M.gguf:   0%|          | 0.00/4.37G [00:00<?, ?B/s]

Model download complete.


In [None]:
# This is the final, interactive part. We load the database and the LLM,
# then create a simple interface to ask questions.

from langchain.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA

print("Loading resources for the QA chain...")

# Load the previously created database from Google Drive
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL,
    model_kwargs={'device': 'cuda'}
)
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)

# Load the LLM, offloading layers to the GPU for speed
llm = LlamaCpp(
    model_path=LLM_PATH,
    n_ctx=4096,     # Context window
    n_batch=512,    # Batch size for prompt processing
    n_gpu_layers=40, # Number of layers to offload to GPU
    verbose=False,
)

# Define the prompt template
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

prompt = PromptTemplate(template=custom_prompt_template,
                        input_variables=['context', 'question'])

# Create the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=db.as_retriever(search_kwargs={'k': 3}), # Retrieve 3 most relevant chunks
    return_source_documents=True,
    chain_type_kwargs={'prompt': prompt}
)

print("\n--- Ready to Chat! ---")
# Example Query
query = "What are the main differences between Transformer and ViT architectures?"
print(f"QUERY: {query}")
result = qa_chain.invoke({"query": query})
print("\nANSWER:")
print(result['result'])

print("\nSOURCES:")
for doc in result["source_documents"]:
    print(f"- {doc.metadata.get('source', 'Unknown')}")


Loading resources for the QA chain...


llama_context: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized



--- Ready to Chat! ---
QUERY: What are the main differences between Transformer and ViT architectures?

ANSWER:

Transformer architecture and Vision Transformer (ViT) architecture are two popular deep learning architectures for natural language processing (NLP) and computer vision tasks, respectively. While both architectures share some commonalities such as the use of self-attention mechanisms for feature representation, they also have some significant differences that make each architecture uniquely suited for specific types of NLP or computer vision tasks.

One of the main differences between Transformer and ViT architectures is the way in which input data is processed and represented as features. In a Transformer architecture, input text data is first converted to a sequence of continuous vector representations using an embedding layer. These continuous vector representations are then fed into self-attention mechanisms that allow the model to dynamically learn which parts of the i