In [1]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON 
import pandas as pd 
import numpy as np

In [30]:
from wikidata.client import Client

In [60]:
endpoint_url = "https://query.wikidata.org/sparql"

query = """SELECT DISTINCT ?SoccerPlayer ?SoccerPlayerLabel ?coordinates ?birthLocationLabel ?birthCountryLabel ?image
WHERE {
  ?SoccerPlayer wdt:P106 wd:Q937857; 
                wdt:P21 wd:Q6581097;  
                wdt:P569 ?birthdate; 
                wdt:P27 ?citizenship; 
                wdt:P413 ?position; 
                wdt:P18 ?image;    
                wdt:P1532 ?soccerNationality; 
                wdt:P19 ?birthLocation.
                
  
  ?SoccerPlayer wikibase:sitelinks ?sitelinks .
                
  ?birthLocation wdt:P625 ?coordinates.  
  ?birthLocation wdt:P17 ?birthCountry.
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  
} 
ORDER BY DESC (?sitelinks)
LIMIT 750"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


results = get_results(endpoint_url, query)

In [None]:
res

In [61]:
results['results']['bindings'][0]

{'SoccerPlayer': {'type': 'uri',
  'value': 'http://www.wikidata.org/entity/Q615'},
 'SoccerPlayerLabel': {'xml:lang': 'en',
  'type': 'literal',
  'value': 'Lionel Messi'},
 'coordinates': {'datatype': 'http://www.opengis.net/ont/geosparql#wktLiteral',
  'type': 'literal',
  'value': 'Point(-60.639444444 -32.9575)'},
 'birthLocationLabel': {'xml:lang': 'en',
  'type': 'literal',
  'value': 'Rosario'},
 'birthCountryLabel': {'xml:lang': 'en',
  'type': 'literal',
  'value': 'Argentina'},
 'image': {'type': 'uri',
  'value': 'http://commons.wikimedia.org/wiki/Special:FilePath/Lionel%20Messi%2020180626.jpg'}}

In [63]:
qnum = []
name = []  
coord = []   
blocation = []
bcountry = []
img = [] 
for res in results['results']['bindings']:  
    q = res['SoccerPlayer']['value'] 
    q = q.split('/')[-1]
    qnum.append(q) 
    name.append(res['SoccerPlayerLabel']['value'])  
    
    c = res['coordinates']['value'] 
    long, lat = c.replace('Point', '').strip("()").split() 
    c = (float(lat), float(long))
    coord.append(c) 
    img.append(res['image']['value'])   
    
    blocation.append(res['birthLocationLabel']['value'])
    bcountry.append(res['birthCountryLabel']['value'])
    

primary_dat = pd.DataFrame({
    'qnum': qnum, 
    'name': name, 
    'coordinates': coord,   
    'birth location': blocation,
    'birth country': bcountry,
    'img': img
}) 

primary_dat = primary_dat.drop_duplicates(subset=['qnum']) 

primary_dat

Unnamed: 0,qnum,name,coordinates,birth location,birth country,img
0,Q615,Lionel Messi,"(-32.9575, -60.639444444)",Rosario,Argentina,http://commons.wikimedia.org/wiki/Special:File...
1,Q11571,Cristiano Ronaldo,"(32.65, -16.916666666)",Funchal,Portugal,http://commons.wikimedia.org/wiki/Special:File...
2,Q17515,Diego Maradona,"(-34.7, -58.4)",Lanús,Argentina,http://commons.wikimedia.org/wiki/Special:File...
3,Q12897,Pelé,"(-21.694722222, -45.255277777)",Três Corações,Brazil,http://commons.wikimedia.org/wiki/Special:File...
4,Q1835,Zinedine Zidane,"(43.296666666, 5.376388888)",Marseille,France,http://commons.wikimedia.org/wiki/Special:File...
...,...,...,...,...,...,...
744,Q215812,Shay Given,"(54.835599, -7.477913)",Lifford,Ireland,http://commons.wikimedia.org/wiki/Special:File...
745,Q214513,Mark Hughes,"(53.0463, -2.9932)",Wrexham,United Kingdom,http://commons.wikimedia.org/wiki/Special:File...
746,Q219354,Ian Rush,"(53.2577, -3.4416)",St Asaph,United Kingdom,http://commons.wikimedia.org/wiki/Special:File...
747,Q356126,Gerard Deulofeu,"(41.823888888, 2.7175)",Riudarenes,Spain,http://commons.wikimedia.org/wiki/Special:File...


In [74]:
import json

In [83]:
# primary_dat.to_json('primary_data.json')

In [64]:
# primary_dat.to_csv('primary_data.csv')

In [76]:
pdr = json.loads(primary_dat.to_json())

In [65]:
primary_dat.head(20)

Unnamed: 0,qnum,name,coordinates,birth location,birth country,img
0,Q615,Lionel Messi,"(-32.9575, -60.639444444)",Rosario,Argentina,http://commons.wikimedia.org/wiki/Special:File...
1,Q11571,Cristiano Ronaldo,"(32.65, -16.916666666)",Funchal,Portugal,http://commons.wikimedia.org/wiki/Special:File...
2,Q17515,Diego Maradona,"(-34.7, -58.4)",Lanús,Argentina,http://commons.wikimedia.org/wiki/Special:File...
3,Q12897,Pelé,"(-21.694722222, -45.255277777)",Três Corações,Brazil,http://commons.wikimedia.org/wiki/Special:File...
4,Q1835,Zinedine Zidane,"(43.296666666, 5.376388888)",Marseille,France,http://commons.wikimedia.org/wiki/Special:File...
5,Q10520,David Beckham,"(51.569, 0.01)",Leytonstone,United Kingdom,http://commons.wikimedia.org/wiki/Special:File...
6,Q20110,Francesco Totti,"(41.893055555, 12.482777777)",Rome,Italy,http://commons.wikimedia.org/wiki/Special:File...
7,Q529207,Ronaldo,"(-22.908333333, -43.196388888)",Rio de Janeiro,Brazil,http://commons.wikimedia.org/wiki/Special:File...
9,Q142794,Neymar,"(-23.522777777, -46.193055555)",Mogi das Cruzes,Brazil,http://commons.wikimedia.org/wiki/Special:File...
10,Q17163,Johan Cruyff,"(52.383333333, 4.9)",Amsterdam,Netherlands,http://commons.wikimedia.org/wiki/Special:File...


In [36]:
client = Client()  # doctest: +SKIP
entity = client.get('Q615', load=True)

create hints db 

----  

**first**: define properties we wanna try and get 

properties 
- 

hint_dr = {}
for q in qnum:  
    ent = client.get(q, load=True) 
    hint_list = []
    for prop in properties: 
         val = ent.get(p) 
         elem = prop, val 
         hint_list.append(elem) 
         hint_dr[q] = hint_list 
         

In [46]:
bday = entity.get('P569') 
entity[bday]

KeyError: None

In [44]:
entity.getlist(['P2031', 'P569'])

[]

In [None]:
"""
P569 - birthdate 
P27 - citizenship 
P413 - position 
P1532 - soccerNationality 
P19 - birthLocation 
""" 

In [None]:
player = "..." 

citizenship_query = f"""SELECT ?citizenshipLabel
WHERE {{  

  wd:{player} wdt:P27 ?citizenship 

  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}

}} 
"""   

birthdate_query = f"""SELECT ?birthdate
WHERE {{  

  wd:{player} wdt:P569 ?birthdate 

  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}

}} 
"""  

position_query = f"""SELECT ?position
WHERE {{  

  wd:{player} wdt:P413 ?position 

  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}

}} 
"""  

soccer_nat_query = f"""SELECT ?soccerNationality
WHERE {{  

  wd:{player} wdt:P1532 ?soccerNationality 

  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}

}} 
""" 




In [68]:
res = []
for q in primary_dat['qnum'].values[:5]: 
    
    query = f"""SELECT ?citizenshipLabel
        WHERE {{  

          wd:{q} wdt:P27 ?citizenship 

          SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}

        }} 
        """  
    
    results = get_results(endpoint_url, query)
    
    res.append(results)

In [71]:
res[0]

{'head': {'vars': ['citizenshipLabel']},
 'results': {'bindings': [{'citizenshipLabel': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'Spain'}},
   {'citizenshipLabel': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'Italy'}},
   {'citizenshipLabel': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'Argentina'}}]}}