# QA pipline


In [9]:
from spacy import displacy
from promptedgraphs.config import Config
from promptedgraphs.extraction.entities_from_text import entities_from_text
from promptedgraphs.llms.openai_chat import LanguageModel

labels = {
    "person:name": "First and or last name of the person.",
    "person:date_of_birth": "Date of birth of the person.",
    "person:email": "Email address of the person. Must be unique.",
    "person:phone_number": "Phone number of the person.",
    "person:address": "Physical address of the person.",
    "person:profile_picture": "URL or path to the profile picture of the person.",
    "relationship:relationship_type": "Type of relationship (e.g., friend, family, colleague, spouse).",
    "person_relationship:start_date": "Start date of the relationship.",
    "person_relationship:end_date": "End date of the relationship, if applicable.",
}

question = "Where was the husband of Michelle Obama born?"

# Label Sentiment
ents = []
async for msg in entities_from_text(
    text=question,
    labels=labels,
    model=LanguageModel.GPT4,
    config=Config(),  # Reads `OPENAI_API_KEY` from .env file or environment
):
    ents.append(msg)

# Show Results using spacy.displacy
displacy.render(
    {
        "text": question,
        "ents": [e.to_dict() for e in ents],
    },
    style="ent",
    jupyter=True,
    manual=True,
)

## Notes on WikiData

Property definitions are located here:
https://www.wikidata.org/wiki/Wikidata:Database_reports/List_of_properties/all

Here's one example of the API
https://doc.wikimedia.org/Wikibase/master/js/rest-api/
https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_html__title_
https://www.wikidata.org/wiki/Special:ApiSandbox#action=wbgetclaims&format=json&formatversion=2


Property information for a given entity can have
 * qualifiers (like references to times and places)
 * references (references to why the statement is true)


https://www.mediawiki.org/wiki/Wikibase/DataModel#Snaks

In [10]:
import requests
import json

# https://www.wikidata.org/wiki/Special:ApiSandbox#action=wbsearchentities&format=json&search=alphabet&language=en&formatversion=2

q = f"{ents[1].text}"
url = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&search={q}&language=en&formatversion=2"
rs = requests.get(url)

In [11]:
top_id = json.loads(rs.content.decode("utf-8"))["search"][0].get("id")
top_id

'Q212878'

In [12]:
url = "https://www.wikidata.org/wiki/Q13133"
rs = requests.get(url)

In [13]:
# Get ID from the wbsearchentities response
import requests


def fetch_wikidata(params):
    url = "https://www.wikidata.org/w/api.php"
    try:
        return requests.get(url, params=params)
    except:
        return "There was and error"


# Create parameters
top_id = "Q76"
params = {"action": "wbgetentities", "ids": top_id, "format": "json", "languages": "en"}

# fetch the API
data = fetch_wikidata(params)

# Show response
data = data.json()
data

{'entities': {'Q76': {'pageid': 205,
   'ns': 0,
   'title': 'Q76',
   'lastrevid': 2122884857,
   'modified': '2024-04-08T01:09:04Z',
   'type': 'item',
   'id': 'Q76',
   'labels': {'en': {'language': 'en', 'value': 'Barack Obama'}},
   'descriptions': {'en': {'language': 'en',
     'value': 'President of the United States from 2009 to 2017'}},
   'aliases': {'en': [{'language': 'en', 'value': 'Barack Hussein Obama II'},
     {'language': 'en', 'value': 'Barack Obama II'},
     {'language': 'en', 'value': 'Barack Hussein Obama'},
     {'language': 'en', 'value': 'Obama'},
     {'language': 'en', 'value': 'Barak Obama'},
     {'language': 'en', 'value': 'Barry Obama'},
     {'language': 'en', 'value': 'President Obama'},
     {'language': 'en', 'value': 'President Barack Obama'},
     {'language': 'en', 'value': 'BHO'},
     {'language': 'en', 'value': 'Barack'},
     {'language': 'en', 'value': 'Barack H. Obama'},
     {'language': 'en', 'value': 'Honorable Barack Obama'}]},
   'clai

In [14]:
for c, v in data["entities"][top_id]["claims"].items():
    if c == "P26":
        break

c, v

('P26',
 [{'mainsnak': {'snaktype': 'value',
    'property': 'P26',
    'hash': 'f825c6521e7f5d92299c39ee36d3b82dab622a97',
    'datavalue': {'value': {'entity-type': 'item',
      'numeric-id': 13133,
      'id': 'Q13133'},
     'type': 'wikibase-entityid'},
    'datatype': 'wikibase-item'},
   'type': 'statement',
   'qualifiers': {'P580': [{'snaktype': 'value',
      'property': 'P580',
      'hash': '36e31a1f1fc64c0f17750153a36cba27351a1065',
      'datavalue': {'value': {'time': '+1992-10-03T00:00:00Z',
        'timezone': 0,
        'before': 0,
        'after': 0,
        'precision': 11,
        'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'},
       'type': 'time'},
      'datatype': 'time'}],
    'P2842': [{'snaktype': 'value',
      'property': 'P2842',
      'hash': '7794f0ca5420ffdea20c6359a608482dc6e0f27d',
      'datavalue': {'value': {'entity-type': 'item',
        'numeric-id': 10703919,
        'id': 'Q10703919'},
       'type': 'wikibase-entityid'},
     

In [15]:
json.dumps(data["entities"][top_id])

'{"pageid": 205, "ns": 0, "title": "Q76", "lastrevid": 2122884857, "modified": "2024-04-08T01:09:04Z", "type": "item", "id": "Q76", "labels": {"en": {"language": "en", "value": "Barack Obama"}}, "descriptions": {"en": {"language": "en", "value": "President of the United States from 2009 to 2017"}}, "aliases": {"en": [{"language": "en", "value": "Barack Hussein Obama II"}, {"language": "en", "value": "Barack Obama II"}, {"language": "en", "value": "Barack Hussein Obama"}, {"language": "en", "value": "Obama"}, {"language": "en", "value": "Barak Obama"}, {"language": "en", "value": "Barry Obama"}, {"language": "en", "value": "President Obama"}, {"language": "en", "value": "President Barack Obama"}, {"language": "en", "value": "BHO"}, {"language": "en", "value": "Barack"}, {"language": "en", "value": "Barack H. Obama"}, {"language": "en", "value": "Honorable Barack Obama"}]}, "claims": {"P31": [{"mainsnak": {"snaktype": "value", "property": "P31", "hash": "ad7d38a03cdd40cdc373de0dc4e7b7fcb

In [16]:
rs = requests.get(
    f"https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&formatversion=2&property=P1280&entity={top_id}"
)
rs.content

b'{"claims":{"P1280":[{"mainsnak":{"snaktype":"value","property":"P1280","hash":"8829f0dc3f598284792c09e2f332fed1e2c6d3ff","datavalue":{"value":"128417123","type":"string"},"datatype":"external-id"},"type":"statement","qualifiers":{"P1810":[{"snaktype":"value","property":"P1810","hash":"fb5491330919d5445fa9ca72aa697a5afef28472","datavalue":{"value":"Obama, Barack","type":"string"},"datatype":"string"}]},"qualifiers-order":["P1810"],"id":"Q76$b3b4e736-427f-8bea-b432-33c3fb237bbb","rank":"normal"}]}}'

In [17]:
json.loads(rs.content)

{'claims': {'P1280': [{'mainsnak': {'snaktype': 'value',
     'property': 'P1280',
     'hash': '8829f0dc3f598284792c09e2f332fed1e2c6d3ff',
     'datavalue': {'value': '128417123', 'type': 'string'},
     'datatype': 'external-id'},
    'type': 'statement',
    'qualifiers': {'P1810': [{'snaktype': 'value',
       'property': 'P1810',
       'hash': 'fb5491330919d5445fa9ca72aa697a5afef28472',
       'datavalue': {'value': 'Obama, Barack', 'type': 'string'},
       'datatype': 'string'}]},
    'qualifiers-order': ['P1810'],
    'id': 'Q76$b3b4e736-427f-8bea-b432-33c3fb237bbb',
    'rank': 'normal'}]}}

In [18]:
sorted(data["entities"][top_id]["claims"].keys())

['P10',
 'P10021',
 'P1003',
 'P1005',
 'P1006',
 'P101',
 'P10141',
 'P1015',
 'P102',
 'P10234',
 'P10242',
 'P10291',
 'P10297',
 'P103',
 'P10302',
 'P10328',
 'P1036',
 'P10376',
 'P1038',
 'P1048',
 'P1050',
 'P10553',
 'P10565',
 'P106',
 'P10632',
 'P10660',
 'P10757',
 'P10780',
 'P108',
 'P10832',
 'P109',
 'P10916',
 'P11120',
 'P11182',
 'P11194',
 'P11249',
 'P11345',
 'P11386',
 'P1142',
 'P11496',
 'P11500',
 'P1151',
 'P11514',
 'P1153',
 'P1157',
 'P11615',
 'P11625',
 'P11646',
 'P11686',
 'P1185',
 'P11892',
 'P11928',
 'P1207',
 'P12086',
 'P12098',
 'P1213',
 'P12153',
 'P12385',
 'P12458',
 'P1258',
 'P12582',
 'P12597',
 'P1263',
 'P1266',
 'P1273',
 'P1280',
 'P1284',
 'P1296',
 'P1299',
 'P1309',
 'P1315',
 'P1340',
 'P1343',
 'P1344',
 'P1368',
 'P1375',
 'P140',
 'P1411',
 'P1412',
 'P1417',
 'P1424',
 'P1429',
 'P1430',
 'P1449',
 'P1472',
 'P1477',
 'P1559',
 'P1607',
 'P1617',
 'P166',
 'P1670',
 'P1695',
 'P1711',
 'P1712',
 'P172',
 'P1728',
 'P1741',
 '

In [20]:
from spacy import displacy
from promptedgraphs.config import Config

labels = {
    "person:name": "Name of the person.",
    "person:date_of_birth": "Date of birth of the person.",
    "person:email": "Email address of the person. Must be unique.",
    "person:phone_number": "Phone number of the person.",
    "person:address": "Physical address of the person.",
    "person:profile_picture": "URL or path to the profile picture of the person.",
    "relationship:relationship_type": "Type of relationship (e.g., friend, family, colleague, spouse).",
    "person_relationship:start_date": "Start date of the relationship.",
    "person_relationship:end_date": "End date of the relationship, if applicable.",
}

question = "Where was the husband of Michelle Obama born?"

# Label Sentiment
ents = []
async for msg in entities_from_text(
    text=question,
    labels=labels,
    config=Config(),  # Reads `OPENAI_API_KEY` from .env file or environment
):
    ents.append(msg)

# Show Results using spacy.displacy
displacy.render(
    {
        "text": question,
        "ents": [e.to_dict() for e in ents],
    },
    style="ent",
    jupyter=True,
    manual=True,
)

In [None]:
ents