In [1]:
import logging
import os

import requests
from dotenv import load_dotenv

load_dotenv()

True

Step 1: Call the Ask-xDD hybrid endpoint to get

- `paper_id`
- `preprocessor_id`
- `hashed_text` (I treat this as paragraph id)

Step 2: Gather extra information from xDD

- publisher's URL

In [2]:
class USGSRetriever:
    """This is a mockup for the USGS specific retriever."""

    def query_ask_xdd(self, query: str) -> dict:
        """Query the AskXDD API and return the response."""

        ASK_XDD_APIKEY = os.getenv("ASK_XDD_APIKEY")
        ASK_XDD_URL = os.getenv("ASK_XDD_URL")
        headers = {"Content-Type": "application/json", "Api-Key": ASK_XDD_APIKEY}
        data = {
            "topic": "criticalmaas",
            "question": query,
            "top_k": 1,
        }

        response = requests.post(ASK_XDD_URL + "/hybrid", headers=headers, json=data)
        response.raise_for_status()
        paragraph = response.json()[0]
        paragraph["url"] = self.get_url(paragraph["paper_id"])
        return paragraph

    def get_url(self, paper_id: str) -> str | None:
        """Get the URL for a paper in the XDD database."""

        XDD_ARTICLE_ENDPOINT = os.getenv("XDD_ARTICLE_ENDPOINT")
        response = requests.get(f"{XDD_ARTICLE_ENDPOINT}?docid={paper_id}")
        response.raise_for_status()

        try:
            data = response.json()["success"]["data"]
            # Return the first publisher link
            for d in data:
                links = d["link"]
                for link in links:
                    if link["type"] == "publisher":
                        return link["url"]
            return links
        except Exception as e:
            logging.error(f"Error getting URL for paper {paper_id}: {e}")

In [3]:
retriever = USGSRetriever()

In [4]:
retriever.query_ask_xdd("Iron ore in the US.")

{'paper_id': '620e6889ad0e9c819b0c3e0f',
 'preprocessor_id': 'haystack_v0.0.2',
 'doc_type': 'paragraph',
 'topic_list': ['criticalmaas', 'dolomites', 'geoarchive'],
 'text_content': 'Assuming transportation costs of US$ 18.00 per tonne from Port Cartier to the China market, the realised price of the product at Port Cartier is: \uf0b7 Iron concentrate 64.3% Fe\nTotal Revenue FOB Port Cartier (LOM) Total Operating Costs including Royalty (LOM) Total Pre-production Capital Costs including Royalty Buy-back Total Sustaining Capital Costs (LOM) Initial Working Capital Mine Closure Costs Salvage Value\nBEFORE TAX Total Cash Flow Payback Period NPV @ 8% NPV @ 6% NPV @ 10% IRR\nAFTER TAX Total Tax Payments (LOM) Total Cash Flow Payback Period NPV @ 8% NPV @ 6% NPV @ 10% IRR\nUnits $ M $ M $ M $ M $ M $ M $ M $ M years $ M $ M $ M % $ M $ M years $ M $ M $ M %',
 'hashed_text': 'f8d910cc2e84be10b1a567dca256b5a2d61eb4c9bcaabc6326a6f5b80f650be2',
 'cosmos_object_id': None,
 'distance': -72.18213,