In [186]:
import requests
import pandas as pd
import wikipedia
import re

In [187]:
def get_wikipedia_url_from_wikidata_id(wikidata_id, lang='en', debug=False):
    from requests import utils
    url = (
        'https://www.wikidata.org/w/api.php'
        '?action=wbgetentities'
        '&props=sitelinks/urls'
        f'&ids={wikidata_id}'
        '&format=json')
    json_response = requests.get(url).json()
    if debug: print(wikidata_id, url, json_response) 

    entities = json_response.get('entities')    
    if entities:
        entity = entities.get(wikidata_id)
        if entity:
            sitelinks = entity.get('sitelinks')
            if sitelinks:
                if lang:
                    # filter only the specified language
                    sitelink = sitelinks.get(f'{lang}wiki')
                    if sitelink:
                        wiki_url = sitelink.get('url')
                        if wiki_url:
                            return requests.utils.unquote(wiki_url)
                else:
                    # return all of the urls
                    wiki_urls = {}
                    for key, sitelink in sitelinks.items():
                        wiki_url = sitelink.get('url')
                        if wiki_url:
                            wiki_urls[key] = requests.utils.unquote(wiki_url)
                    return wiki_urls
    return None

In [188]:
city_data = pd.read_csv('query.csv')
city_data.head()


Unnamed: 0,city,cityLabel,population
0,http://www.wikidata.org/entity/Q11725,Chongqing,32054159
1,http://www.wikidata.org/entity/Q1353,Delhi,26495000
2,http://www.wikidata.org/entity/Q8686,Shanghai,24870895
3,http://www.wikidata.org/entity/Q956,Beijing,21893095
4,http://www.wikidata.org/entity/Q30002,Chengdu,20937757


In [190]:
city_data['city'] = city_data['city'].apply(lambda x: x.split('y/')[1])
city_data

Unnamed: 0,city,cityLabel,population
0,Q11725,Chongqing,32054159
1,Q1353,Delhi,26495000
2,Q8686,Shanghai,24870895
3,Q956,Beijing,21893095
4,Q30002,Chengdu,20937757
...,...,...,...
4217,Q889344,Boconó,100240
4218,Q622633,Nampa,100200
4219,Q1714,Lamitan,100150
4220,Q838785,Temecula,100097


In [194]:
city_data['wiki url'] = city_data['city'].apply(lambda x: get_wikipedia_url_from_wikidata_id(x))

In [202]:
city_data.to_csv('city_data.csv', index=False)

In [201]:
city_data.head()

Unnamed: 0,city,cityLabel,population,wiki url
0,Q11725,Chongqing,32054159,https://en.wikipedia.org/wiki/Chongqing
1,Q1353,Delhi,26495000,https://en.wikipedia.org/wiki/Delhi
2,Q8686,Shanghai,24870895,https://en.wikipedia.org/wiki/Shanghai
3,Q956,Beijing,21893095,https://en.wikipedia.org/wiki/Beijing
4,Q30002,Chengdu,20937757,https://en.wikipedia.org/wiki/Chengdu
