In [None]:
import requests
import pandas as pd

In [None]:
def fetch_wikidata(params):
    wikidata_url = "https://www.wikidata.org/w/api.php"
    try:
        response = requests.get(wikidata_url, params=params)
        return response.json()
    except requests.exceptions.RequestException as e:
        return f"There was an error: {e}"

In [None]:
# Function to resolve multiple redirects on Wikipedia
def resolve_redirect(title):
    wikipedia_api_url = "https://en.wikipedia.org/w/api.php"

    def query_wikipedia(t):
        params = {
            "action": "query",
            "titles": t,
            "redirects": 1,
            "format": "json"
        }
        response = requests.get(wikipedia_api_url, params=params)
        return response.json()

    data = query_wikipedia(title)

    # Loop to follow through all redirects
    while 'redirects' in data['query']:
        # Get the last redirect target
        redirects = data['query']['redirects']
        final_redirect = redirects[-1]['to']
        data = query_wikipedia(final_redirect)

    if 'normalized' in data['query']:
        final_title = data['query']['normalized'][0]['to']
    elif 'pages' in data['query']:
        page_id = next(iter(data['query']['pages']))
        final_title = data['query']['pages'][page_id]['title']
    else:
        final_title = title  # No normalization or redirects, use the original title

    return final_title

In [None]:
# Function to get the Wikidata ID from a Wikipedia page title
def get_wiki_id_from_page(page_title):
    final_title = resolve_redirect(page_title)  # Resolve redirects first
    params = {
        "action": "wbgetentities",
        "format": "json",
        "sites": "enwiki",
        "titles": final_title,
        "languages": "en",
        "redirects": "yes",
    }
    data = fetch_wikidata(params)
    if isinstance(data, str) or 'entities' not in data or len(data['entities']) == 0:
        return None

    entity_id = list(data['entities'].keys())[0]
    return entity_id

In [None]:
# Load the CSV file
df = pd.read_csv('nndb.csv')

In [None]:
# Filter rows where WIKI_ID is -1
df_needs_update = df[df['WIKI_ID'] == '-1']

In [None]:
len(df_needs_update)

In [None]:
# Apply the function only to rows that need update
df_needs_update['WIKI_ID'] = df_needs_update['WIKI_PAGE'].apply(get_wiki_id_from_page)

In [None]:
# Merge the updated data back into the original dataframe
df.update(df_needs_update)

In [None]:
# Save the updated dataframe to a new CSV file
df.to_csv('updated_nndb_wiki_id.csv', index=False)