In [2]:
#!pip install arxiv

In [1]:
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
from llama_index.core.service_context import ServiceContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.llms.ollama import Ollama
import requests
import os
import qdrant_client
from tqdm import tqdm
import arxiv
from langchain_community.embeddings import HuggingFaceEmbeddings
import yaml
from tenacity import retry, stop_after_attempt, wait_fixed

In [3]:

class Data:
    def __init__(self, config):
        self.config = config

    def create_data_folder(self, download_path):
        if not os.path.exists(download_path):
            os.makedirs(download_path)
            print("Output folder created")
        else:
            print("Output folder already exists.")

    def download_papers(self, search_query, download_path, server="arxiv", start_date=None, end_date=None):
        self.create_data_folder(download_path)
        
        if server == "arxiv":
            client = arxiv.Client()

            search = arxiv.Search(
                query=search_query,
                sort_by=arxiv.SortCriterion.SubmittedDate,
            )

            results = list(client.results(search))
            for paper in tqdm(results):
                if os.path.exists(download_path):
                    paper_title = (paper.title).replace(" ", "_")
                    paper.download_pdf(dirpath=download_path, filename=f"{paper_title}.pdf")
                    print(f"{paper.title} Downloaded.")

        elif server == "medrxiv":
            if not start_date or not end_date:
                print("Error: 'start_date' and 'end_date' are required for medRxiv.")
                return

            # Construct the API URL
            api_url = f"https://api.medrxiv.org/details/{server}/{start_date}/{end_date}/0/json"

            response = requests.get(api_url)
            if response.status_code != 200:
                print(f"Failed to retrieve data from MedRxiv API. Status code: {response.status_code}")
                return

            data = response.json()

            if 'collection' not in data or len(data['collection']) == 0:
                print("No papers found with the given search query.")
                return

            papers = data['collection']

            for paper in tqdm(papers):
                title = paper['title'].strip().replace(" ", "_").replace("/", "_")  # Replace spaces and slashes with underscores
                pdf_url = f"https://www.medrxiv.org/content/{paper['doi']}.full.pdf"
                print(f"Attempting to download {title} from {pdf_url}")

                try:
                    pdf_response = requests.get(pdf_url)
                    if pdf_response.status_code == 200:
                        pdf_path = os.path.join(download_path, f"{title}.pdf")
                        with open(pdf_path, 'wb') as pdf_file:
                            pdf_file.write(pdf_response.content)
                        print(f"{title} Downloaded to {pdf_path}.")
                    else:
                        print(f"Failed to download {title}. Status code: {pdf_response.status_code}")
                except Exception as e:
                    print(f"An error occurred while downloading {title}: {e}")

        else:
            print(f"Server '{server}' is not supported.")
    
    def ingest(self, embedder, llm):
        print("Reading pdf from the specified directory...")
        documents = SimpleDirectoryReader(self.config["data_path"]).load_data()

        print("Initializing Qdrant client...")
        client = qdrant_client.QdrantClient(url=self.config["qdrant_url"])
        vector_store = QdrantVectorStore(
            client=client,
            collection_name=self.config["collection_name"]
        )
        storage_context = StorageContext.from_defaults(vector_store=vector_store)

        # Use the Settings object correctly
        service_context = ServiceContext.from_defaults(
            llm=llm, embed_model=embedder, chunk_size=self.config["chunk_size"]
        )
        print("Start embedding the documents and storing the resulting vectors in the Qdrant vector store...")
        index = VectorStoreIndex.from_documents(
            documents, storage_context=storage_context, service_context=service_context
        )
        print(
            f"Data indexed successfully to Qdrant. Collection: {self.config['collection_name']}"
        )
        return index


In [None]:
# Manually set the parameters
query = "heart failure exercise tolerance"  # The topic you are interested in
ingest = True  # Set to True if you want to ingest data
server = "medrxiv"  # Set server to "medrxiv" or "arxiv"
start_date = "2003-07-31"  # Required for medRxiv, it is useful becacuse you don't want outdated papers
end_date = "2024-08-01"  # Required for medRxiv, it is useful becacuse you don't want outdated papers

# Set the configuration file path
config_file = "config.yml"

# Load the configuration
with open(config_file, "r") as conf:
    config = yaml.safe_load(conf)

# Initialize the Data object with the configuration
data = Data(config)

@retry(stop=stop_after_attempt(3), wait=wait_fixed(10))
def download_papers_with_retry(data, search_query, download_path, server="arxiv", start_date=None, end_date=None):
    data.download_papers(search_query=search_query, download_path=download_path, server=server, start_date=start_date, end_date=end_date)

@retry(stop=stop_after_attempt(3), wait=wait_fixed(10))
def ingest_data_with_retry(data, embed_model, llm):
    data.ingest(embedder=embed_model, llm=llm)

# If a query is provided, download papers
if query:
    download_papers_with_retry(data, query, config["data_path"], server=server, start_date=start_date, end_date=end_date)

# If ingest flag is set, ingest data to Qdrant vector database
if ingest:
    print("Loading Embedder...")
    embed_model = LangchainEmbedding(
        HuggingFaceEmbeddings(model_name=config["embedding_model"])
    )
    llm = Ollama(model=config["llm_name"], base_url=config["llm_url"])
    ingest_data_with_retry(data, embed_model, llm)
    
# 13 mins to finish

# 3. Querying

Now that we’ve successfully loaded our data (research papers) into our vector store (Qdrant), we can begin querying it to retrieve relevant data for feeding to our LLM.

Let’s begin by crafting a function that sets up our Qdrant index, which will serve as our query engine.

Query Engine


rag.py


https://github.com/ollama/ollama/blob/main/docs/modelfile.md#build-from-llama3

enter the command in Terminal:ollama show --modelfile llama3

In [19]:
### Loading the embedder



class RAG:
    def __init__(self, config_file, llm):
        self.config = config_file
        self.qdrant_client = qdrant_client.QdrantClient(
            url=self.config['qdrant_url']
        )
        self.llm = llm  # ollama llm
    
    def load_embedder(self):
        embed_model = LangchainEmbedding(
            HuggingFaceEmbeddings(model_name=self.config['embedding_model'])
        )
        return embed_model


    def qdrant_index(self):
        client = qdrant_client.QdrantClient(url=self.config["qdrant_url"])
        qdrant_vector_store = QdrantVectorStore(
            client=client, collection_name=self.config['collection_name']
        )

        service_context = ServiceContext.from_defaults(
            llm=self.llm, embed_model=self.load_embedder(), chunk_size=self.config["chunk_size"]
        )

        index = VectorStoreIndex.from_vector_store(
            vector_store=qdrant_vector_store, service_context=service_context
        )
        return index


### Next, we create our model using the Modelfile.

To use the Modelfile:

1. Save it as a file (e.g. Modelfile)
2. ollama create choose-a-model-name -f <<<location of the file e.g. ./Modelfile>>>'
3. ollama run choose-a-model-name
4. Start using the model!

ollama create ollama3_research -f Modelfile


This shows:
NAME                    ID              SIZE    MODIFIED
ollama3_research:latest 37e83b2fef40    4.7 GB  22 seconds ago

4.7 GB, but actually it is not, it is just a reference to the original llama3 model.

ollama run ollama3_research


By default, Ollama runs on http://localhost:11434

Finally, we create an API endpoint using FastAPI. This endpoint will receive a query, search the documents, and return a response.

An advantage of using FastAPI is its compatibility with Pydantic, which is very helpful in structuring our code and API responses.

Let’s begin by defining two models: one for the Query and one for the Response:

pip install fastapi pydantic uvicorn pyyaml llama-index ollama qdrant-client


https://docs.llamaindex.ai/en/v0.10.17/examples/callbacks/LlamaDebugHandler.html

uvicorn app:app --host 0.0.0.0 --port 8000 --reload