**[PT]** Português

---

**[EN]** English

# Bloco de experiências com linked data

---

# Notebook to experiment with linked data


## Exemplo de pesquisas

---

## Query examples

### Estudantes da Universidade de Coimbra nascidos antes do século XX

---

### Students of the University of Coimbra born before the 20th century

2022-05-18 : 454 results stored in [inferences/wikidata](../inferences/wikidata/query.json)

In [5]:
# Id of Universidade de Coimbra
uc_qid = 'Q368643'

# Propriedade "educated at"
p_educated = 'P69'

# Birth
p_birth_date = 'P569'
p_birth_place = 'P19'

# Death
p_death_date = 'P570'
p_death_place = 'P20'

# country
p_country = 'P27'

# Biblioteca Nacional ID
p_bnp_id = 'P1005'


# qwikidata



In [None]:
!pip install qwikidata


In [7]:

query = """

    SELECT DISTINCT ?student 
     WHERE {
      ?student p:P69 ?educatedAt.
      ?educatedAt (ps:P69/(wdt:P279*)) wd:Q368643.
      ?student p:P569 ?birthDateStmt.
      ?birthDateStmt psv:P569 ?birthDateValue.
      ?birthDateValue wikibase:timePrecision ?precision_1.
      FILTER(?precision_1 >= 11 )
      ?birthDateValue wikibase:timeValue ?P569_1.
      FILTER(?P569_1 < "+1900-01-01T00:00:00Z"^^xsd:dateTime)
    }
    LIMIT 5

"""

In [9]:
from datetime import datetime

from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty
from qwikidata.linked_data_interface import get_entity_dict_from_api
from qwikidata.sparql import (get_subclasses_of_item,
                              return_sparql_query_results)


# Things we would like to known about the students
claims_of_interest = ['P69','P1559','P1447','P27','P569','P570','P106','P19','P20','P1005','P102','P39','P166','P213','P214']
claims_ignored = set()
prop_labels = {}
cache = {}
results = return_sparql_query_results(query)

# https://qwikidata.readthedocs.io/en/stable/entity.html
for result in results['results']['bindings']:
    uri = result['student']['value']
    qid = uri.split("/")[-1]  # last item in the path
    result_dict = get_entity_dict_from_api(qid)
    std = WikidataItem(result_dict)

    print(std.entity_id,std.get_label())
    for aka in std.get_aliases(lang='pt'):
        print(f"{' '*2} aka:",aka)

    claim_groups = std.get_claim_groups()

    for claim_id in sorted([c for c in claim_groups.keys() if c in claims_of_interest]):
        if claim_id in prop_labels.keys():
            propLabel = prop_labels[claim_id]
        else:
            prop_dict = get_entity_dict_from_api(claim_id)
            propLabel = prop_dict['labels']['en']['value']
            prop_labels[claim_id] = propLabel

        claim_group = claim_groups[claim_id]
        print(f"{' '*2} {claim_id} {propLabel}({len(claim_group)})")
        for claim in claim_group:
            snak = claim.mainsnak
            if snak.snak_datatype == 'wikibase-item':
                qid = snak.datavalue.value["id"]
                entity = WikidataItem(get_entity_dict_from_api(qid))
                print(f"{' '*6}{qid} {entity.get_label(lang='pt')}")
            elif snak.value_datatype == 'string':
                print(f"{' '*6} {snak.datavalue.value}") 
            elif snak.value_datatype == 'time':
                svalue = snak.datavalue.value['time']
                date_value = datetime.strptime(svalue,"+%Y-%m-%dT%H:%M:%SZ")
                print(f"{' '*6} {date_value}") 
            else:
                snak_info = (claim.rank,snak.snaktype,snak.snak_datatype,snak.value_datatype)  
                print(f"{' '*6}{snak_info}")
            
            if claim_id == 'P1005': 
                bnp_id = snak.datavalue.value
                print(f"{' '*6} Publicações: http://urn.bn.pt/bibliografia/unimarc/txt?id={bnp_id}")

            for pid, quals in claim.qualifiers.items():
                prop = WikidataProperty(get_entity_dict_from_api(pid))
                for qual in quals:
                    if qual.snak.snaktype != "value":
                        continue
                    else:
                        print(f"{' '*9}{prop.get_label()} {qual.snak.datavalue}")

    print(f"{' '*3}Other properties:",set(claim_groups.keys())-set(claims_of_interest) )
    claims_ignored = claims_ignored.union(set(claim_groups.keys())-set(claims_of_interest))
    print()

print()
if len(claims_ignored) > 0:
    print("Properties present in the students and ignored in this list")
    for ignored in sorted(claims_ignored):
        if ignored in prop_labels.keys():
            propLabel = prop_labels[prop]
        else:
            prop_dict = get_entity_dict_from_api(ignored)
            propLabel = prop_dict['labels']['en']['value']
            prop_labels[prop] = propLabel  
        print(f"   {ignored:<6s}: {propLabel}")



Q61755108 Manuel Augusto Martins
   P106 occupation(1)
      Q82955 político
   P19 place of birth(1)
      Q2078801 Santa Maria Maior
   P20 place of death(1)
      Q25444 Funchal
   P27 country of citizenship(1)
      Q45 Portugal
   P569 date of birth(1)
       1867-04-10 00:00:00
   P570 date of death(1)
       1936-04-04 00:00:00
   P69 educated at(1)
      Q368643 Universidade de Coimbra
         academic major WikibaseEntityId(id=Q7748)
   Other properties: {'P21', 'P31', 'P103', 'P2671', 'P734', 'P1412', 'P735', 'P18'}

Q16917757 Francisco Correia de Mendonça
   P106 occupation(1)
      Q16533 juiz
   P19 place of birth(1)
      Q732548 Lagos
   P27 country of citizenship(1)
      Q45 Portugal
   P569 date of birth(1)
       1832-01-03 00:00:00
   P570 date of death(1)
       1899-08-04 00:00:00
   P69 educated at(1)
      Q368643 Universidade de Coimbra
   Other properties: {'P21', 'P31', 'P103', 'P2671', 'P1412', 'P735'}

Q17272838 Álvaro Rodrigues Azevedo
   aka: José Rodrig

## Lugares

---

## Places


In [None]:
place = "Soure"

In [None]:
pquery = """

SELECT DISTINCT ?location ?locationName ?instanceOf ?instanceOfLabel ?insideLabel ?inside2Label ?coordinates ?geonamesID WHERE {
                        ?location ?label '$place'@pt .  
                        ?location wdt:P17 wd:Q45 .
                        ?location rdfs:label ?locationName .
                        ?location wdt:P31 ?instanceOf .
                        ?location wdt:P625 ?coordinates .
                        ?location wdt:P131 ?inside .

                        OPTIONAL {

                          ?inside wdt:P131 ?inside2 .
                          ?location wdt:P1566 ?geonamesID .
                        }
                        FILTER(lang(?locationName) = "pt") 
                        { SERVICE wikibase:label { bd:serviceParam wikibase:language "pt". }}
 
                        }
"""

#### Encontrar um lugar na wikipedia

---

### Find a place in Wikipedia

In [None]:
!pip install mkwikidata


In [None]:
import mkwikidata
import pandas as pd

place = 'Soure'

query_result = mkwikidata.run_query(pquery, params={'place':place})
results = query_result['results']['bindings']
table = [{'name':loc['locationName']['value'],
          'type':loc['instanceOfLabel']['value'],
          'uri':loc['location']['value'],
          'inside':loc['insideLabel']['value'],
          'inside2':loc.get('inside2Label',{'value':None})['value'],
          'geonamesID':loc.get('geonamesID',{'value':None})['value'],
          
          } for loc in results]

pd.DataFrame(table).sort_values('uri')

Unnamed: 0,name,type,uri,inside,inside2,geonamesID
2,Soure,freguesia de Portugal,http://www.wikidata.org/entity/Q1977811,Soure,Região de Coimbra,8011895.0
4,Soure,freguesia de Portugal,http://www.wikidata.org/entity/Q1977811,Soure,Região de Coimbra,8011895.0
5,Soure,freguesia de Portugal,http://www.wikidata.org/entity/Q1977811,Soure,Coimbra,8011895.0
8,Soure,freguesia de Portugal,http://www.wikidata.org/entity/Q1977811,Soure,Coimbra,8011895.0
0,Soure,município de Portugal,http://www.wikidata.org/entity/Q543993,Coimbra,Portugal,2733851.0
1,Soure,vila de Portugal,http://www.wikidata.org/entity/Q543993,Coimbra,Portugal,2733851.0
3,Soure,vila de Portugal,http://www.wikidata.org/entity/Q543993,Região de Coimbra,,
7,Soure,município de Portugal,http://www.wikidata.org/entity/Q543993,Região de Coimbra,,
6,Soure,vila de Portugal,http://www.wikidata.org/entity/Q97597938,Soure,,


In [None]:
from timelinknb import current_time,current_machine, get_db
from ucalumni.config import default_db

db_spec = default_db
db = get_db(db_spec)
print(current_machine,current_time,f'db={db_spec}')

imac-jrc.local 2022-05-18 17:43:33.378466 db=('sqlite', 'fauc.db')


### Localizar os lugares mais infrequentes no FAUC

---

### Locate the less frequent places in the FAUC

In [None]:
from timelinknb.pandas import attribute_values


# Get list of different values and the number of occurences
naturalidades = attribute_values('naturalidade')
small_places = naturalidades[naturalidades['count']<10].sort_values('count', ascending=False).head(100)

Test with a small number

In [None]:
from IPython.display import display
import time

for place in small_places.index.values[:10]:
    print("Looking for:", place)
    try:
        query_result = mkwikidata.run_query(pquery, params={'place':place})
    except Exception as e:
        print(e)
    results = query_result['results']['bindings']
    if len(results) > 0:
        table = [{'name':loc['locationName']['value'],
                'type':loc['instanceOfLabel']['value'],
                'uri':loc['location']['value'],
                'inside':loc['insideLabel']['value'],
                'inside2':loc.get('inside2Label',{'value':None})['value'],
                'geonamesID':loc.get('geonamesID',{'value':None})['value'],
                
                } for loc in results]
        candidates = pd.DataFrame(table)
        display(candidates)
        geonames = candidates['geonamesID'].unique()
        for geo in geonames:
            print(f"   https://www.geonames.org/{geo}")
    else:
        print("   not found")
        
    time.sleep(1)  # respect wikidata

Looking for: Alcofra


Unnamed: 0,name,type,uri,inside,inside2,geonamesID
0,Alcofra,freguesia de Portugal,http://www.wikidata.org/entity/Q1021445,Vouzela,Viseu,8014009
1,Alcofra,freguesia de Portugal,http://www.wikidata.org/entity/Q1021445,Vouzela,Comunidade Intermunicipal Viseu Dão Lafões,8014009


   https://www.geonames.org/8014009
Looking for: Lumiar


Unnamed: 0,name,type,uri,inside,inside2,geonamesID
0,Estação Lumiar,estação subterrânea,http://www.wikidata.org/entity/Q374170,Lisboa,,
1,Estação Lumiar,estação de metropolitano,http://www.wikidata.org/entity/Q374170,Lisboa,,
2,Lumiar,freguesia de Portugal,http://www.wikidata.org/entity/Q924723,Lisboa,Lisboa,8012475.0


   https://www.geonames.org/None
   https://www.geonames.org/8012475
Looking for: Seixo de Ansiães


Unnamed: 0,name,type,uri,inside,inside2,geonamesID
0,Seixo de Ansiães,freguesia de Portugal,http://www.wikidata.org/entity/Q1011237,Carrazeda de Ansiães,Trás-os-Montes e Alto Douro,8014442
1,Seixo de Ansiães,freguesia de Portugal,http://www.wikidata.org/entity/Q1011237,Carrazeda de Ansiães,Bragança,8014442
2,Seixo de Ansiães,freguesia de Portugal,http://www.wikidata.org/entity/Q1011237,Carrazeda de Ansiães,Douro,8014442


   https://www.geonames.org/8014442
Looking for: Santa Cristina de Figueiró


Unnamed: 0,name,type,uri,inside,inside2,geonamesID
0,Santa Cristina de Figueiró,localidade,http://www.wikidata.org/entity/Q1023987,Amarante,Porto,8012656
1,Santa Cristina de Figueiró,freguesia de Portugal,http://www.wikidata.org/entity/Q1023987,Amarante,Douro Litoral,8012656
2,Santa Cristina de Figueiró,localidade,http://www.wikidata.org/entity/Q1023987,Amarante,Douro Litoral,8012656
3,Santa Cristina de Figueiró,freguesia de Portugal,http://www.wikidata.org/entity/Q1023987,Amarante,Porto,8012656


   https://www.geonames.org/8012656
Looking for: Sandelgas
   not found
Looking for: Salvaterra
   not found
Looking for: Rio Bom
   not found
Looking for: Ribeira de Homem
   not found
Looking for: Quintãs
   not found
Looking for: Proença
   not found
