In [10]:
import requests
from datetime import datetime
import urllib.parse
import pandas as pd

In [11]:
def fetch_wikidata(params):
    url = "https://www.wikidata.org/w/api.php"
    try:
        response = requests.get(url, params=params)
        return response.json()  # Return JSON content here
    except requests.exceptions.RequestException as e:
        return f"There was an error: {e}"

In [12]:
def get_wiki_id_from_page(page_title):
    params = {
        "action": "wbgetentities",
        "format": "json",
        "sites": "enwiki",
        "titles": page_title,
        "languages": "en",
        "redirects": "yes",
    }

    # Fetch API
    data = fetch_wikidata(params)

    # Extract the Wikidata entity ID
    if 'entities' in data and len(data['entities']) > 0:
        entity_id = list(data['entities'].keys())[0]
    else:
        entity_id = '-1'
    return entity_id

In [13]:
# Load the CSV file
df = pd.read_csv('nndb.csv')

In [14]:
# Filter rows where WIKI_ID is -1
df_needs_update = df[df['WIKI_ID'] == '-1']

In [15]:
len(df_needs_update)

4912

In [None]:
# Apply the function only to rows that need update
df_needs_update['WIKI_ID'] = df_needs_update['WIKI_PAGE'].apply(get_wiki_id_from_page)

In [None]:
# Merge the updated data back into the original dataframe
df.update(df_needs_update)

In [None]:
len(df)

In [None]:
# Save the updated dataframe to a new CSV file
df.to_csv('updated_nndb_wiki_id.csv', index=False)