# QA pipline


In [9]:
from spacy import displacy
from promptedgraphs.config import Config
from promptedgraphs.entity_recognition import extract_entities

labels = {
    "person:name": "First and or last name of the person.",
    "person:date_of_birth": "Date of birth of the person.",
    "person:email": "Email address of the person. Must be unique.",
    "person:phone_number": "Phone number of the person.",
    "person:address": "Physical address of the person.",
    "person:profile_picture": "URL or path to the profile picture of the person.",
    "relationship:relationship_type": "Type of relationship (e.g., friend, family, colleague, spouse).",
    "person_relationship:start_date": "Start date of the relationship.",
    "person_relationship:end_date": "End date of the relationship, if applicable."
}

question = "Where was the husband of Michelle Obama born?"

# Label Sentiment
ents = []
async for msg in extract_entities(
    text=question,
    labels=labels,
    config=Config(),  # Reads `OPENAI_API_KEY` from .env file or environment
):
    ents.append(msg)

# Show Results using spacy.displacy
displacy.render(
    {
        "text": question,
        "ents": [e.to_dict() for e in ents],
    },
    style="ent",
    jupyter=True,
    manual=True,
)

('person:name', 'Michelle Obama')

## Notes on WikiData

Property definitions are located here:
https://www.wikidata.org/wiki/Wikidata:Database_reports/List_of_properties/all

Here's one example of the API
https://doc.wikimedia.org/Wikibase/master/js/rest-api/
https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_html__title_
https://www.wikidata.org/wiki/Special:ApiSandbox#action=wbgetclaims&format=json&formatversion=2


Property information for a given entity can have
 * qualifiers (like references to times and places)
 * references (references to why the statement is true)


https://www.mediawiki.org/wiki/Wikibase/DataModel#Snaks

In [33]:
import requests
import json 
# https://www.wikidata.org/wiki/Special:ApiSandbox#action=wbsearchentities&format=json&search=alphabet&language=en&formatversion=2

q = f"{ents[1].text}"
url = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&search={q}&language=en&formatversion=2"
rs = requests.get(url)

In [34]:
top_id = json.loads(rs.content.decode('utf-8'))['search'][0].get('id')
top_id

'Q13133'

In [35]:
url = "https://www.wikidata.org/wiki/Q13133"
rs = requests.get(url)

In [57]:
# Get ID from the wbsearchentities response
import requests
 
def fetch_wikidata(params):
    url = 'https://www.wikidata.org/w/api.php'
    try:
        return requests.get(url, params=params)
    except:
        return 'There was and error'
# Create parameters
top_id = "Q76"
params = {
            'action': 'wbgetentities',
            'ids':top_id, 
            'format': 'json',
            'languages': 'en'
        }
 
# fetch the API
data = fetch_wikidata(params)
 
# Show response
data = data.json()
data 

{'entities': {'Q76': {'pageid': 205,
   'ns': 0,
   'title': 'Q76',
   'lastrevid': 1982189039,
   'modified': '2023-09-26T20:50:39Z',
   'type': 'item',
   'id': 'Q76',
   'labels': {'en': {'language': 'en', 'value': 'Barack Obama'}},
   'descriptions': {'en': {'language': 'en',
     'value': 'president of the United States from 2009 to 2017'}},
   'aliases': {'en': [{'language': 'en', 'value': 'Barack Hussein Obama II'},
     {'language': 'en', 'value': 'Barack Obama II'},
     {'language': 'en', 'value': 'Barack Hussein Obama'},
     {'language': 'en', 'value': 'Obama'},
     {'language': 'en', 'value': 'Barak Obama'},
     {'language': 'en', 'value': 'Barry Obama'},
     {'language': 'en', 'value': 'President Obama'},
     {'language': 'en', 'value': 'President Barack Obama'},
     {'language': 'en', 'value': 'BHO'},
     {'language': 'en', 'value': 'Barack'},
     {'language': 'en', 'value': 'Barack H. Obama'},
     {'language': 'en', 'value': 'Honorable Barack Obama'}]},
   'clai

In [56]:
for c,v in data['entities'][top_id]['claims'].items():
    if c == 'P26':
        break

c,v


('P26',
 [{'mainsnak': {'snaktype': 'value',
    'property': 'P26',
    'hash': '0fbf7f3ccd911618498ffc930f15716461390ec8',
    'datavalue': {'value': {'entity-type': 'item',
      'numeric-id': 76,
      'id': 'Q76'},
     'type': 'wikibase-entityid'},
    'datatype': 'wikibase-item'},
   'type': 'statement',
   'qualifiers': {'P580': [{'snaktype': 'value',
      'property': 'P580',
      'hash': '36e31a1f1fc64c0f17750153a36cba27351a1065',
      'datavalue': {'value': {'time': '+1992-10-03T00:00:00Z',
        'timezone': 0,
        'before': 0,
        'after': 0,
        'precision': 11,
        'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'},
       'type': 'time'},
      'datatype': 'time'}],
    'P2842': [{'snaktype': 'value',
      'property': 'P2842',
      'hash': '7794f0ca5420ffdea20c6359a608482dc6e0f27d',
      'datavalue': {'value': {'entity-type': 'item',
        'numeric-id': 10703919,
        'id': 'Q10703919'},
       'type': 'wikibase-entityid'},
      'data

In [45]:
json.dumps(data['entities'][top_id])

'{"pageid": 14753, "ns": 0, "title": "Q13133", "lastrevid": 1978456816, "modified": "2023-09-20T19:56:52Z", "type": "item", "id": "Q13133", "labels": {"en": {"language": "en", "value": "Michelle Obama"}}, "descriptions": {"en": {"language": "en", "value": "lawyer and former First Lady of the United States"}}, "aliases": {"en": [{"language": "en", "value": "Michelle LaVaughn Obama"}, {"language": "en", "value": "Michelle Robinson"}, {"language": "en", "value": "Michelle LaVaughn Robinson Obama"}, {"language": "en", "value": "First Lady Michelle Obama"}]}, "claims": {"P1280": [{"mainsnak": {"snaktype": "value", "property": "P1280", "hash": "74fe65a11973be93a7744eb5fad7b726a7257bd8", "datavalue": {"value": "162022243", "type": "string"}, "datatype": "external-id"}, "type": "statement", "id": "Q13133$9bf1d5ab-43a4-2cfe-277f-c8d6dcedeafb", "rank": "normal"}], "P735": [{"mainsnak": {"snaktype": "value", "property": "P735", "hash": "10a41aae97ab35773aac349801425770e7feee6e", "datavalue": {"va

In [53]:
rs = requests.get(f"https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&formatversion=2&property=P1280&entity={top_id}")
rs.content

b'{"claims":{"P1280":[{"mainsnak":{"snaktype":"value","property":"P1280","hash":"74fe65a11973be93a7744eb5fad7b726a7257bd8","datavalue":{"value":"162022243","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q13133$9bf1d5ab-43a4-2cfe-277f-c8d6dcedeafb","rank":"normal"}]}}'

In [54]:
json.loads(rs.content)

{'claims': {'P1280': [{'mainsnak': {'snaktype': 'value',
     'property': 'P1280',
     'hash': '74fe65a11973be93a7744eb5fad7b726a7257bd8',
     'datavalue': {'value': '162022243', 'type': 'string'},
     'datatype': 'external-id'},
    'type': 'statement',
    'id': 'Q13133$9bf1d5ab-43a4-2cfe-277f-c8d6dcedeafb',
    'rank': 'normal'}]}}

In [55]:
sorted(data['entities'][top_id]['claims'].keys())

['P1005',
 'P1006',
 'P10097',
 'P1015',
 'P102',
 'P10227',
 'P10297',
 'P103',
 'P10302',
 'P10328',
 'P1038',
 'P106',
 'P10660',
 'P10780',
 'P108',
 'P10832',
 'P10882',
 'P109',
 'P11194',
 'P11249',
 'P11496',
 'P11646',
 'P11683',
 'P11686',
 'P1185',
 'P1207',
 'P1258',
 'P1263',
 'P1266',
 'P1273',
 'P1280',
 'P1284',
 'P1343',
 'P1368',
 'P1375',
 'P140',
 'P1412',
 'P1417',
 'P1430',
 'P1477',
 'P1559',
 'P166',
 'P1695',
 'P1712',
 'P172',
 'P1728',
 'P1741',
 'P18',
 'P1819',
 'P1889',
 'P1890',
 'P19',
 'P1953',
 'P1971',
 'P2002',
 'P2003',
 'P2013',
 'P2019',
 'P2048',
 'P21',
 'P213',
 'P214',
 'P2163',
 'P2190',
 'P22',
 'P2267',
 'P227',
 'P2390',
 'P2435',
 'P244',
 'P2471',
 'P25',
 'P26',
 'P2600',
 'P2604',
 'P2605',
 'P2607',
 'P2611',
 'P2638',
 'P268',
 'P269',
 'P27',
 'P271',
 'P2949',
 'P2963',
 'P3056',
 'P3065',
 'P31',
 'P3106',
 'P3144',
 'P3221',
 'P3222',
 'P3305',
 'P3368',
 'P3373',
 'P3388',
 'P3417',
 'P3430',
 'P345',
 'P3479',
 'P349',
 'P3509'

In [59]:
from spacy import displacy
from promptedgraphs.config import Config
from promptedgraphs.entity_recognition import extract_entities

labels = {
    "person:name": "Name of the person.",
    "person:date_of_birth": "Date of birth of the person.",
    "person:email": "Email address of the person. Must be unique.",
    "person:phone_number": "Phone number of the person.",
    "person:address": "Physical address of the person.",
    "person:profile_picture": "URL or path to the profile picture of the person.",
    "relationship:relationship_type": "Type of relationship (e.g., friend, family, colleague, spouse).",
    "person_relationship:start_date": "Start date of the relationship.",
    "person_relationship:end_date": "End date of the relationship, if applicable."
}

question = "Where was the husband of Michelle Obama born?"

# Label Sentiment
ents = []
async for msg in extract_entities(
    text=question,
    labels=labels,
    config=Config(),  # Reads `OPENAI_API_KEY` from .env file or environment
):
    ents.append(msg)

# Show Results using spacy.displacy
displacy.render(
    {
        "text": question,
        "ents": [e.to_dict() for e in ents],
    },
    style="ent",
    jupyter=True,
    manual=True,
)

In [60]:
ents

[EntityReference(start=14, end=21, label='person_relationship:relationship_type', text='husband', reason='relationship type'),
 EntityReference(start=25, end=39, label='person:name', text='Michelle Obama', reason='person name')]