# End-to-end example of using Ask-xDD and text2graph_llm (location extraction)

In [3]:
import requests
import os
from dotenv import load_dotenv

load_dotenv()

True

Let's say we are interested in critical minerals and their locations.

In [4]:
# https://www.usgs.gov/news/national-news-release/us-geological-survey-releases-2022-list-critical-minerals

critical_minerals = [
    "Aluminum",
    "antimony",
    "arsenic",
    "barite",
    "beryllium",
    "bismuth",
    "cerium",
    "cesium",
    "chromium",
    "cobalt",
    "dysprosium",
    "erbium",
    "europium",
    "fluorspar",
    "gadolinium",
    "gallium",
    "germanium",
    "graphite",
    "hafnium",
    "holmium",
    "indium",
    "iridium",
    "lanthanum",
    "lithium",
    "lutetium",
    "magnesium",
    "manganese",
    "neodymium",
    "nickel",
    "niobium",
    "palladium",
    "platinum",
    "praseodymium",
    "rhodium",
    "rubidium",
    "ruthenium",
    "samarium",
    "scandium",
    "tantalum",
    "tellurium",
    "terbium",
    "thulium",
    "tin",
    "titanium",
    "tungsten",
    "vanadium",
    "ytterbium",
    "yttrium",
    "zinc",
    "zirconium",
]

### Get sample text from `CriticalMAAS` dataset

In [15]:
ASK_XDD_APIKEY = os.getenv("ASK_XDD_APIKEY")
ENDPOINT = "http://cosmos0001.chtc.wisc.edu:4502/hybrid"

headers = {"Content-Type": "application/json", "Api-Key": ASK_XDD_APIKEY}
data = {
    "topic": "criticalmaas",
    "question": "Aluminum Mines in USA.",
    "top_k": 10,
    "temperature": 1.0,
}

response = requests.post(ENDPOINT, headers=headers, json=data)
response.json()

[{'paper_id': '557ca80ae138239225f86935',
  'preprocessor_id': 'haystack_v0.0.2',
  'doc_type': 'paragraph',
  'topic_list': ['criticalmaas'],
  'text_content': 'ALUMINUM, TOTAL RECOVERABLE (UG/L AS AL) ARSENIC, TOTAL RECOVERABLE (UG/L AS AS) BERYLLIUM, TOTAL RECOVERABLE (UG/L AS BE) CADMIUM TOTAL RECOVERABLE (UG/L AS CD) CHROMIUM, TOTAL RECOVERABLE (UG/L AS CR) COPPER, TOTAL RECOVERABLE (UG/L AS CU) IRON, TOTAL RECOVERABLE (UG/L AS FE) LEAD, TOTAL RECOVERABLE (UG/L AS PB) LITHIUM, TOTAL RECOVERABLE (UG/L AS LI) MANGANESE, TOTAL RECOVERABLE (UG/L AS MN) MERCURY, TOTAL RECOVERABLE (UG/L AS HG) MOLYBDENUM, TOTAL RECOVERABLE (UG/L AS MO) NICKEL, TOTAL RECOVERABLE (UG/L AS NI ) SELENIUM, TOTAL RECOVERABLE (UG/L AS SE) ZINC, TOTAL RECOVERABLE (UG/L AS ZN)\nTABLt 14. SUMMARY OF WATER-QUALITY ANALYSES, MULE CREEK SITE D (07245594) [MG/L=MILLIGRAMS PER LITER, DEC C=DEGREES CELSIUS, PCI/L=PICOCURIES PER LITER, UMHO=MICROMHOS PER CENTIMETER AT 25 DEGREES CELSIUS, UG/L=MICROGRAMS PER LITER]',
  '

In [16]:
sample_text = response.json()[2]["text_content"]
sample_text

'Figure 16. Summary diagram of mass transfer of aluminum in mixing zones downstream from Mineral and Cement Creeks\nFigure 17. Summary diagram of mass transfer of iron in mixing zones downstream from Mineral and Cement Creeks\nFigure 18. Comparison of metal loads for aluminum, iron, zinc, cadmium, copper and lead at low flow (Oct., 1995) and high flow (May-June, 1996)\nFigure 28. Metal distribution profiles for Animas River: Silverton, Colo. to Aztec, New Mexico 74\nFigure 31. Downstream variation of the iron, aluminum, and copper to zinc ratios in colloids and bed sediments in the Animas River: Silverton to Aztec\nWater quality standards for metals, Colorado Lead-isotopic data from mineral deposits and mill sites, Silverton area Semi-quantitative emission spectrographic data from heavy-mineral separates,'

### Feed text to text2graph endpoint

[docs](http://cosmos0002.chtc.wisc.edu:4503/docs)

TODO: Improve documentation



#### GPT-4-turbo-preview

In [10]:
# TEXT2GRAPH_ENDPOINT = "http://cosmos0002.chtc.wisc.edu:4503/llm_graph"
TEXT2GRAPH_ENDPOINT = "http://localhost:4510/llm_graph"
TEXT2GRAPH_APIKEY = os.getenv("API_KEY")

headers = {"Content-Type": "application/json", "Api-Key": TEXT2GRAPH_APIKEY}
data = {"text": sample_text, "model": "gpt-4-turbo-preview", "prompt_version": "v0"}
response = requests.post(TEXT2GRAPH_ENDPOINT, headers=headers, json=data)

In [11]:
response.json()

{'Mineral and Cement Creeks': ['aluminum', 'iron'],
 'Animas River': ['aluminum',
  'iron',
  'zinc',
  'cadmium',
  'copper',
  'lead',
  'colloids',
  'bed sediments'],
 'Silverton, Colo. to Aztec, New Mexico': ['metal distribution profiles'],
 'Colorado': ['water quality standards for metals'],
 'Silverton area': ['Lead-isotopic data',
  'mineral deposits',
  'mill sites',
  'Semi-quantitative emission spectrographic data',
  'heavy-mineral separates']}

#### Mixtral (Open source model)

In [12]:
data = {"text": sample_text, "model": "mixtral", "prompt_version": "v0"}
response = requests.post(TEXT2GRAPH_ENDPOINT, headers=headers, json=data)

In [13]:
response.json()

{'Mining and Cement Creeks': ['aluminum', 'iron'],
 'Animas River: Silverton to Aztec': ['aluminum', 'iron', 'copper', 'zinc'],
 'Silverton, Colo.': ['metals'],
 'Colorado': ['water quality standards for metals'],
 'Silverton area': ['Lead-isotopic data from mineral deposits and mill sites']}