In [1]:
import requests

In [2]:
# https://www.jcchouinard.com/wikidata-api-python/
def fetch_wikidata(params):
    url = 'https://www.wikidata.org/w/api.php'
    try:
        return requests.get(url, params=params)
    except:
        return 'API error'

In [3]:
query = 'uiuc'
 
# Which parameters to use
params = {
        'action': 'wbsearchentities',
        'format': 'json',
        'search': query,
        'language': 'en'
    }
 
# Fetch API
json_data = fetch_wikidata(params).json()
json_data

{'searchinfo': {'search': 'uiuc'},
 'search': [{'id': 'Q457281',
   'title': 'Q457281',
   'pageid': 431380,
   'display': {'label': {'value': 'University of Illinois at Urbana–Champaign',
     'language': 'en'},
    'description': {'value': 'public research university in Urbana and Champaign, Illinois, United States',
     'language': 'en'}},
   'repository': 'wikidata',
   'url': '//www.wikidata.org/wiki/Q457281',
   'concepturi': 'http://www.wikidata.org/entity/Q457281',
   'label': 'University of Illinois at Urbana–Champaign',
   'description': 'public research university in Urbana and Champaign, Illinois, United States',
   'match': {'type': 'alias', 'language': 'en', 'text': 'UIUC'},
   'aliases': ['UIUC']},
  {'id': 'Q7895523',
   'title': 'Q7895523',
   'pageid': 7833069,
   'display': {'label': {'value': 'University of Illinois College of Law',
     'language': 'en'},
    'description': {'value': 'Law school in Illinois, U.S.',
     'language': 'en'}},
   'repository': 'wikida

In [4]:
# for elem in data['search']:
#     print(elem['id'])

In [5]:
id_ = json_data['search'][0]['id']
params = {
            'action': 'wbgetentities',
            'ids': id_, 
            'format': 'json',
            'languages': 'en'
        }
 
# fetch the API
data = fetch_wikidata(params)
 
# Show response
data = data.json()
data

{'entities': {'Q457281': {'pageid': 431380,
   'ns': 0,
   'title': 'Q457281',
   'lastrevid': 1916576797,
   'modified': '2023-06-16T13:19:33Z',
   'type': 'item',
   'id': 'Q457281',
   'labels': {'en': {'language': 'en',
     'value': 'University of Illinois at Urbana–Champaign'}},
   'descriptions': {'en': {'language': 'en',
     'value': 'public research university in Urbana and Champaign, Illinois, United States'}},
   'aliases': {'en': [{'language': 'en', 'value': 'UIUC'},
     {'language': 'en', 'value': 'U of I'},
     {'language': 'en', 'value': 'University of Illinois at Urbana'},
     {'language': 'en', 'value': 'University of Illinois at Champaign'},
     {'language': 'en', 'value': 'University of Illinois at Champaign-Urbana'},
     {'language': 'en', 'value': 'University of Illinois at Urbana-Champaign'},
     {'language': 'en', 'value': 'University of Illinois'},
     {'language': 'en', 'value': 'U. of I.'},
     {'language': 'en', 'value': 'uiuc.edu'},
     {'language'

In [6]:
try:
    title = data['entities'][id_]['labels']['en']['value']
except:
    title = 'not found'
try:
    alternate_names = [v['value'] for v in data['entities'][id_]['aliases']['en']]
except:
    alternate_names = 'not found'
try:
    description = data['entities'][id_]['descriptions']['en']['value'] 
except:
    description = 'not found'
try:
    twitter = data['entities'][id_]['claims']['P2002'][0]['mainsnak']['datavalue']['value']
except:
    twitter = 'not found'
try:
    facebook = data['entities'][id_]['claims']['P2013'][0]['mainsnak']['datavalue']['value']
except:
    facebook = 'not found'
try:
    instagram = data['entities'][id_]['claims']['P2003'][0]['mainsnak']['datavalue']['value']
except:
    instagram = 'not found'
try:
    subreddit = data['entities'][id_]['claims']['P3984'][0]['mainsnak']['datavalue']['value']
except:
    subreddit = 'not found'
try:
    official_websites = [v['mainsnak']['datavalue']['value']for v in data['entities'][id_]['claims']['P856']]
except:
    official_websites = 'not found'
try:
    logo_file = data['entities'][id_]['claims']['P154'][0]['mainsnak']['datavalue']['value'].replace(' ', '_')
    logo_image_url = f'www.wikidata.org/wiki/{id_}#/media/File:{logo_file}'
except:
    logo_image_url = 'not found'
try:
    campus_file = data['entities'][id_]['claims']['P18'][0]['mainsnak']['datavalue']['value'].replace(' ', '_')
    campus_image_url = f'www.wikidata.org/wiki/{id_}#/media/File:{campus_file}'
except:
    campus_image_url = 'not found'
    pass
 


result = {
    'wikidata_id': id_,
    'title': title,
    'description': description,
    'alternate_names': alternate_names,
    'twitter': twitter,
    'facebook': facebook,
    'instagram': instagram,
    'subreddit': subreddit,
    'official_websites': official_websites,
    'logo_image_url': logo_image_url,
    'campus_image_url': campus_image_url
}
 
result

{'wikidata_id': 'Q457281',
 'title': 'University of Illinois at Urbana–Champaign',
 'description': 'public research university in Urbana and Champaign, Illinois, United States',
 'alternate_names': ['UIUC',
  'U of I',
  'University of Illinois at Urbana',
  'University of Illinois at Champaign',
  'University of Illinois at Champaign-Urbana',
  'University of Illinois at Urbana-Champaign',
  'University of Illinois',
  'U. of I.',
  'uiuc.edu',
  'University of Illinois Urbana-Champaign',
  'University of Illinois, Urbana-Champaign'],
 'twitter': 'Illinois_Alma',
 'facebook': 'illinois.edu',
 'instagram': 'illinois1867',
 'subreddit': 'UIUC',
 'official_websites': ['https://illinois.edu/'],
 'logo_image_url': 'www.wikidata.org/wiki/Q457281#/media/File:University_of_Illinois_at_Urbana–Champaign_logo.svg',
 'campus_image_url': 'www.wikidata.org/wiki/Q457281#/media/File:UIUC_Illini_Union_and_Main_Quad.jpg'}

In [3]:
def call_wikidata_api(query):
    params = {
        'action': 'wbsearchentities',
        'format': 'json',
        'search': query,
        'language': 'en'
    }
    json_data = fetch_wikidata(params).json()
    id_ = json_data['search'][0]['id']
    params = {
        'action': 'wbgetentities',
        'ids': id_, 
        'format': 'json',
        'languages': 'en'
    }

    data = fetch_wikidata(params).json()
    try:
        title = data['entities'][id_]['labels']['en']['value']
    except:
        title = 'not found'
    try:
        alternate_names = [v['value'] for v in data['entities'][id_]['aliases']['en']]
    except:
        alternate_names = 'not found'
    try:
        description = data['entities'][id_]['descriptions']['en']['value'] 
    except:
        description = 'not found'
    try:
        twitter = data['entities'][id_]['claims']['P2002'][0]['mainsnak']['datavalue']['value']
    except:
        twitter = 'not found'
    try:
        facebook = data['entities'][id_]['claims']['P2013'][0]['mainsnak']['datavalue']['value']
    except:
        facebook = 'not found'
    try:
        instagram = data['entities'][id_]['claims']['P2003'][0]['mainsnak']['datavalue']['value']
    except:
        instagram = 'not found'
    try:
        subreddit = data['entities'][id_]['claims']['P3984'][0]['mainsnak']['datavalue']['value']
    except:
        subreddit = 'not found'
    try:
        official_websites = [v['mainsnak']['datavalue']['value']for v in data['entities'][id_]['claims']['P856']]
    except:
        official_websites = 'not found'
    try:
        logo_file = data['entities'][id_]['claims']['P154'][0]['mainsnak']['datavalue']['value'].replace(' ', '_')
        logo_image_url = f'www.wikidata.org/wiki/{id_}#/media/File:{logo_file}'
    except:
        logo_image_url = 'not found'
    try:
        campus_file = data['entities'][id_]['claims']['P18'][0]['mainsnak']['datavalue']['value'].replace(' ', '_')
        campus_image_url = f'www.wikidata.org/wiki/{id_}#/media/File:{campus_file}'
    except:
        campus_image_url = 'not found'
 
    result = {
        'wikidata_id': id_,
        'title': title,
        'description': description,
        'alternate_names': alternate_names,
        'twitter': twitter,
        'facebook': facebook,
        'instagram': instagram,
        'subreddit': subreddit,
        'official_websites': official_websites,
        'logo_image_url': logo_image_url,
        'campus_image_url': campus_image_url
    }
    return result

In [4]:
res = call_wikidata_api('uc berkeley')

In [5]:
url = res['logo_image_url']
url

'www.wikidata.org/wiki/Q168756#/media/File:Seal_of_University_of_California,_Berkeley.svg'

In [18]:
res2 = call_wikidata_api('uiuc')
url2 = res2['logo_image_url']
url2

'www.wikidata.org/wiki/Q457281#/media/File:University_of_Illinois_at_Urbana–Champaign_logo.svg'

In [19]:
from bs4 import BeautifulSoup
import urllib

In [21]:
page = urllib.request.urlopen('https://' + url3)
soup = BeautifulSoup(page)
images = soup.findAll('img')
for image in images:
    print('https:' + image['src'])
    print()

https://upload.wikimedia.org/wikipedia/commons/thumb/9/9c/University_of_Illinois_at_Urbana%E2%80%93Champaign_logo.svg/220px-University_of_Illinois_at_Urbana%E2%80%93Champaign_logo.svg.png

https://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/UIUC_Illini_Union_and_Main_Quad.jpg/220px-UIUC_Illini_Union_and_Main_Quad.jpg

https:https://maps.wikimedia.org/img/osm-intl,13,40.110538888889,-88.228411111111,310x180.png?lang=en&domain=www.wikidata.org&title=Q457281&groups=_d105d0fc75d31cc26a2b5f8565a10c37691b6b10

https:https://maps.wikimedia.org/img/osm-intl,13,40.09609,-88.21953,310x180.png?lang=en&domain=www.wikidata.org&title=Q457281&groups=_3958062c04ff59f5638dd0a0d0a3d0002bae9f7a

https:https://maps.wikimedia.org/img/osm-intl,13,40.1163,-88.22632,310x180.png?lang=en&domain=www.wikidata.org&title=Q457281&groups=_bdd1061f5847500eb2d5616abdc07d28afa93451

https:https://maps.wikimedia.org/img/osm-intl,13,40.091080555556,-88.226319444444,310x180.png?lang=en&domain=www.wikidata.org&title=Q