# OpenAlex Work Search

This notebook allows you to search for a work in OpenAlex by providing work information (title, year, authors, DOI).

The search follows the same pipeline as `works_match.py`:
1. If DOI is provided, search by DOI first
2. If no DOI or DOI search fails, search by title/institution/year with multiple fallback attempts
3. Calculate similarity scores for matches
4. Display the best match


In [1]:
import os
import sys
import requests
from pathlib import Path

# Add the project root directory to Python path to enable imports from utilities
# Get the current notebook's directory (works_matching) and go up one level to project root
notebook_dir = Path.cwd()  # Current working directory
# Try to find project root by looking for utilities folder
if (notebook_dir / 'utilities').exists():
    project_root = str(notebook_dir)
elif (notebook_dir.parent / 'utilities').exists():
    project_root = str(notebook_dir.parent)
else:
    # Fallback: assume we're in works_matching folder
    project_root = str(notebook_dir.parent)

if project_root not in sys.path:
    sys.path.insert(0, project_root)

from utilities.file_utils import parse_author_pairs
from utilities.sim_lib import similarity_authors, similarity_titles, similarity_years

# ROR identifier for Politecnico di Torino
ROR_POLITO = 'https://ror.org/00bgk9508'


## Input Work Information

Fill in the information about the work you want to search for. All fields except DOI are required.


In [None]:
# ===== USER INPUT =====
# Fill in the information about the work you want to search for

# Title of the work (required)
title = ""

# Publication year (required)
year = None  # e.g., 2020

# Authors (required)
# Format: "Surname1, Name1; Surname2, Name2" or just "Name1 Surname1; Name2 Surname2"
# Examples:
#   "Smith, John; Doe, Jane"
#   "John Smith; Jane Doe"
authors_string = ""

# DOI (optional) - if provided, will be used for the first search attempt
doi = None  # e.g., "10.1234/example.doi"

# ===== END USER INPUT =====


## Search and Display Results

Run the cell below to search for the work in OpenAlex and display the results.


In [5]:
# Validate input
if not title:
    raise ValueError("Title is required. Please provide a title.")
if year is None:
    raise ValueError("Year is required. Please provide a publication year.")
if not authors_string:
    raise ValueError("Authors are required. Please provide author information.")

# Parse authors
authors = parse_author_pairs(authors_string)

# Display input information
print("=" * 80)
print("SEARCHING FOR WORK:")
print("=" * 80)
print(f"Title: {title}")
print(f"Year: {year}")
print(f"Authors: {authors}")
print(f"DOI: {doi if doi else 'Not provided'}")
print("=" * 80)
print()

# Initialize variables
work_results = []
search_successful = False
search_method = None

try:
    # If DOI is present, search OpenAlex by DOI first
    if doi:
        print(f"üîç Searching by DOI...")
        
        # Search OpenAlex using DOI filter
        url = f"https://api.openalex.org/works?filter=doi:{doi}"
        response = requests.get(url)
        
        if response.status_code == 200 and response.json().get("meta", {}).get("count", 0) >= 1:
            work_results = response.json().get('results', [])
            search_successful = True
            search_method = "DOI"
            print(f"‚úÖ Found {len(work_results)} work(s) by DOI")
        else:
            print(f"‚ö†Ô∏è  No match found by DOI, falling back to title/institution/year search")
    
    # If no DOI or DOI search failed, search by title, institution, and year
    if not search_successful:
        print(f"üîç Searching by title/institution/year...")

        # First attempt: Search OpenAlex using title.search filter
        # Filters by: title search, Politecnico di Torino institution, publication year
        url = f"https://api.openalex.org/works?filter=title.search:{title},institutions.ror:{ROR_POLITO},publication_year:{year}"
        response = requests.get(url) 

        # If first attempt fails or returns no results, try general search
        if response.status_code != 200 or response.json().get("meta", {}).get("count", 0) < 1:
            
            # Second attempt: Use general search instead of title.search filter
            url = f"https://api.openalex.org/works?search={title}&filter=institutions.ror:{ROR_POLITO},publication_year:{year}"
            response = requests.get(url) 
            
            # If second attempt also fails, try general search without filters
            if response.status_code != 200 or response.json().get("meta", {}).get("count", 0) < 1:
                
                # Third attempt: General search without any filters
                print(f"üîç Searching by title only (no filters)...")
                url = f"https://api.openalex.org/works?search={title}"
                response = requests.get(url)
                
                # If third attempt also fails, log error
                if response.status_code != 200:
                    print(f"‚ùå API Error: Status code {response.status_code}")
                    raise Exception(f"API request failed with status code {response.status_code}")
                elif response.json().get("meta", {}).get("count", 0) < 1:
                    print(f"‚ö†Ô∏è  No match found")
                    work_results = []
                    search_method = "title_only"
                else:
                    work_results = response.json().get('results', [])
                    print(f"‚úÖ Found {len(work_results)} work(s) by title only")
                    search_method = "title_only"
            else:
                # Extract work results from OpenAlex API response (second attempt succeeded)
                work_results = response.json().get('results', [])
                print(f"‚úÖ Found {len(work_results)} work(s) by title/institution/year")
                search_method = "title_institution_year_general"
        else:
            # Extract work results from OpenAlex API response (first attempt succeeded)
            work_results = response.json().get('results', [])
            print(f"‚úÖ Found {len(work_results)} work(s) by title/institution/year")
            search_method = "title_institution_year_filter"
    
    print()
    print("=" * 80)
    
    # Process results
    if len(work_results) == 0:
        print("‚ùå NO MATCHES FOUND")
        print("=" * 80)
        print("\nNo works were found in OpenAlex matching your search criteria.")
        print("You may want to try:")
        print("  - Checking the spelling of the title")
        print("  - Verifying the publication year")
        print("  - Providing a DOI if available")
    elif len(work_results) == 1:
        print("‚ú® SINGLE MATCH FOUND")
        print("=" * 80)
        work = work_results[0]
        authors_OA = [a.get("author", {}).get("display_name", "N/A") for a in work.get("authorships", [])]
        
        # Calculate similarity scores
        work_display_name = work.get('display_name', '')
        titles_similarity = similarity_titles(title, work_display_name)
        authors_similarity = similarity_authors(authors, authors_OA)
        years_similarity = similarity_years(year, work.get('publication_year'))
        similarity_score = titles_similarity*0.5 + authors_similarity*0.25 + years_similarity*0.25
        
        print(f"\nüìÑ OpenAlex Work:")
        print(f"   Title: {work.get('display_name', 'N/A')}")
        print(f"   ID: {work.get('id', 'N/A')}")
        print(f"   Type: {work.get('type', 'N/A')}")
        print(f"   Publication Year: {work.get('publication_year', 'N/A')}")
        print(f"   Authors: {', '.join(authors_OA) if authors_OA else 'N/A'}")
        print(f"   DOI: {work.get('doi', 'N/A')}")
        print(f"   OpenAlex URL: {work.get('id', 'N/A')}")
        print(f"\nüìä Similarity Scores:")
        print(f"   Title similarity: {titles_similarity:.3f}")
        print(f"   Authors similarity: {authors_similarity:.3f}")
        print(f"   Year similarity: {years_similarity:.3f}")
        print(f"   Overall similarity: {similarity_score:.3f}")
        print(f"\nüîç Search method: {search_method}")
        print("=" * 80)
    else:
        print(f"üìö MULTIPLE MATCHES FOUND ({len(work_results)})")
        print("=" * 80)
        print("Evaluating similarity for all matches...\n")
        
        best_score = -1
        best_work = None
        best_work_idx = -1
        
        # Evaluate all matches
        for idx, work in enumerate(work_results):
            authors_OA = [a.get("author", {}).get("display_name", "N/A") for a in work.get("authorships", [])]
            
            # Calculate similarity scores
            work_display_name = work.get('display_name', '')
            titles_similarity = similarity_titles(title, work_display_name)
            authors_similarity = similarity_authors(authors, authors_OA)
            years_similarity = similarity_years(year, work.get('publication_year'))
            similarity_score = titles_similarity*0.5 + authors_similarity*0.25 + years_similarity*0.25
            
            print(f"üìÑ Match #{idx+1}/{len(work_results)}:")
            print(f"   Title: {work.get('display_name', 'N/A')}")
            print(f"   ID: {work.get('id', 'N/A')}")
            print(f"   Publication Year: {work.get('publication_year', 'N/A')}")
            print(f"   Authors: {', '.join(authors_OA) if authors_OA else 'N/A'}")
            print(f"   üìä Similarity score: {similarity_score:.3f} (Title: {titles_similarity:.3f}, Authors: {authors_similarity:.3f}, Year: {years_similarity:.3f})")
            print()
            
            if similarity_score > best_score:
                best_score = similarity_score
                best_work = work
                best_work_idx = idx
        
        # Display best match
        print("=" * 80)
        print("üèÜ BEST MATCH SELECTED:")
        print("=" * 80)
        authors_OA = [a.get("author", {}).get("display_name", "N/A") for a in best_work.get("authorships", [])]
        print(f"\nüìÑ OpenAlex Work:")
        print(f"   Title: {best_work.get('display_name', 'N/A')}")
        print(f"   ID: {best_work.get('id', 'N/A')}")
        print(f"   Type: {best_work.get('type', 'N/A')}")
        print(f"   Publication Year: {best_work.get('publication_year', 'N/A')}")
        print(f"   Authors: {', '.join(authors_OA) if authors_OA else 'N/A'}")
        print(f"   DOI: {best_work.get('doi', 'N/A')}")
        print(f"   OpenAlex URL: {best_work.get('id', 'N/A')}")
        
        # Recalculate for display
        work_display_name = best_work.get('display_name', '')
        titles_similarity = similarity_titles(title, work_display_name)
        authors_similarity = similarity_authors(authors, authors_OA)
        years_similarity = similarity_years(year, best_work.get('publication_year'))
        
        print(f"\nüìä Similarity Scores:")
        print(f"   Title similarity: {titles_similarity:.3f}")
        print(f"   Authors similarity: {authors_similarity:.3f}")
        print(f"   Year similarity: {years_similarity:.3f}")
        print(f"   Overall similarity: {best_score:.3f}")
        print(f"\nüîç Search method: {search_method}")
        print("=" * 80)

except Exception as e:
    print(f"\nüí• ERROR: {e}")
    import traceback
    traceback.print_exc()


SEARCHING FOR WORK:
Title: Analisi dell'Influenza delle Incertezze dei Parametri Geotecnici sul Dimensionamento dei Rivestimenti di Gallerie
Year: 1997
Authors: ['D. Peila', 'Pierpaolo Oreste', 'A. Camerotto']
DOI: Not provided

üîç Searching by title/institution/year...
üîç Searching by title only (no filters)...
‚úÖ Found 2 work(s) by title only

üìö MULTIPLE MATCHES FOUND (2)
Evaluating similarity for all matches...

üìÑ Match #1/2:
   Title: Analisi dell'Influenza delle Incertezze dei Parametri Geotecnici sul Dimensionamento dei Rivestimenti di Gallerie
   ID: https://openalex.org/W121731564
   Publication Year: 1997
   Authors: Daniele Peila, Pierpaolo Oreste, Alessandro Camerotto
   üìä Similarity score: 0.967 (Title: 1.000, Authors: 0.867, Year: 1.000)

üìÑ Match #2/2:
   Title: Indagini preliminari nella costruzione di gallerie: analisi della letteratura tecnica
   ID: https://openalex.org/W1268432473
   Publication Year: 2009
   Authors: Daniele Peila
   üìä Similarity 