# User prompt
The user define the research objective a prompt. I use openAI to identify relevant key words for the research.

In [85]:
import os
from dotenv import load_dotenv
from openai import OpenAI

# Load the API key
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Research objective
research_objective = """
At present, one of our clients is looking to speak with professionals who have insights about the emerging technologies 
in soft contact lens manufacturing, particularly non-injection moulded methods. They would broadly like to understand how 
these technologies are reshaping the industry—from on-demand manufacturing to smart, drug-delivery-enabled lenses.
"""

prompt = f"""
You are assisting a researcher in generating targeted search terms.

Return a JSON object with the following structure:
- "openalex": exactly 5 academic search terms, each consisting of a single word
- "patentview": exactly 5 patent-related keywords, each consisting of a single word
- "cpc_codes": exactly 5 relevant CPC classification codes

Instructions:
- All search terms must be single words (no spaces, no hyphens)
- Use only valid JSON
- Do not include markdown, comments, or explanations
- Ensure terms are concise, domain-relevant, and technically meaningful

Research Objective:
\"\"\"
{research_objective}
\"\"\"
"""




# OpenAI API call
response = client.chat.completions.create(
    model="gpt-3.5-turbo", # Cost efficient model
    # model="gpt-4-turbo",
    messages=[{"role": "user", 
               "content": prompt}]
)

# Print the results
print(response.choices[0].message.content)

{
    "openalex": [
        "contactlens",
        "manufacturing",
        "noninjection",
        "ondemand",
        "drugdelivery"
    ],
    "patentview": [
        "innovation",
        "technology",
        "manufacture",
        "smart",
        "delivery"
    ],
    "cpc_codes": [
        "G02C 7/04",
        "G02C 7/02",
        "G02B 1/04",
        "A61F 2/16",
        "A61F 2/02"
    ]
}


# Openalex API
https://docs.openalex.org/

Valid parameters are: 
* apc_sum, 
* cited_by_count_sum, 
* cursor, 
* filter, 
* format, 
* group_by, 
* group-by, 
* group_bys, 
* group-bys, 
* mailto, 
* page, 
* per_page, 
* per-page, 
* q, 
* sample, 
* seed, 
* search, 
* select, 
* sort, 
* warm.'

In [None]:
import requests
import pandas as pd
from typing import Any


def reconstruct_abstract(abstract_inverted_index: dict[str, list[int]]) -> str:
    '''
    Reconstruct the abstract from abstract_inverted_index
    '''
    
    # Some works don't have an abstract
    if not abstract_inverted_index:
        return ""
        
    # Variable to store the highest index
    max_value: int = 0
     # Loop through all the list of position in the abstract_inverted_index dictionary.
    for values in abstract_inverted_index.values():
        # Loop through all the index value
        for value in values:
            # identify the highest value index
            if value >= max_value:
                max_value = value
                
    # Create an empty list with abstract size        
    abstract: list[str] = [None] * (max_value +1)

    # Loop through each word in the abstract_inverted_index:
    for word, positions in abstract_inverted_index.items():
        # For each word, get the list of positions it appears in.
        for position in positions:
            # Insert each word into its correct position in the list.
            abstract[position]= word
                        
    # Join all the words in the list into a single string, separated by spaces.
    abstract_text: str = " ".join(abstract)
    # print("\n", abstract_text)
    
    return abstract_text

url: str = "https://api.openalex.org/works"

key_word: str = "smart"
search_terms: str = f"contact lens AND {key_word}"
mailto: str = "adyl.elguamra@gmail.com" #For best performance, add your email to all API requests

per_page: int = 15 # By default there are 25 results per page
page: int = 1 # Get the result from page number

params: dict = {
    "search": search_terms, # searches across titles, abstracts, and fulltext.
    "per_page": per_page, 
    "page": page, # if needed I can loop over pages. for example from page 1 to 5 with a for loop
    "sort": "relevance_score:desc",
    "mailto": mailto   
}
response: requests.Response = requests.get(url, params=params)

if response.status_code == 200:
    
    data: dict[str, Any] = response.json()

    works: list[dict[str, Any]] = data.get("results", []) # access a key in dictionary

    # Extract info into list of dicts
    records: list = []
    # loop in the list
    for work in works:
        
        abstract_inverted_index: dict[str, list[int]] = work.get("abstract_inverted_index", [])
        # print(type(abstract_inverted_index))
        abstract = reconstruct_abstract(abstract_inverted_index)

        record: dict[str, Any] = {
            "Title": work.get("title"),
            "Abstract": abstract,
            "Year": work.get("publication_year"),
            "Citations": work.get("cited_by_count"),
            "Authors": [auth["author"]["display_name"] for auth in work.get("authorships", [])],
            "OpenAlex ID": work.get("id"),
        }
        records.append(record)

    print("Final URL:", response.url)
    print("JSON Data:", response.json())

    # Convert to DataFrame
    df: pd.DataFrame = pd.DataFrame(records)
    display(df)

else:
    print(f"Failed to fetch data. Status code: {response.status_code}")
    print(response.text)

Final URL: https://api.openalex.org/works?search=contact+lens+AND+smart&per_page=15&page=1&sort=relevance_score%3Adesc&mailto=adyl.elguamra%40gmail.com
JSON Data: {'meta': {'count': 38708, 'db_response_time_ms': 113, 'page': 1, 'per_page': 15, 'groups_count': None}, 'results': [{'id': 'https://openalex.org/W2785523439', 'doi': 'https://doi.org/10.1126/sciadv.aap9841', 'title': 'Soft, smart contact lenses with integrations of wireless circuits, glucose sensors, and displays', 'display_name': 'Soft, smart contact lenses with integrations of wireless circuits, glucose sensors, and displays', 'relevance_score': 811.2458, 'publication_year': 2018, 'publication_date': '2018-01-05', 'ids': {'openalex': 'https://openalex.org/W2785523439', 'doi': 'https://doi.org/10.1126/sciadv.aap9841', 'mag': '2785523439', 'pmid': 'https://pubmed.ncbi.nlm.nih.gov/29387797', 'pmcid': 'https://www.ncbi.nlm.nih.gov/pmc/articles/5787380'}, 'language': 'en', 'primary_location': {'is_oa': True, 'landing_page_url': 

Unnamed: 0,Title,Abstract,Year,Citations,Authors,OpenAlex ID
0,"Soft, smart contact lenses with integrations o...","This study presents a soft, smart contact lens...",2018,581,"[Jihun Park, Joohee Kim, So-Yun Kim, Woon Hyun...",https://openalex.org/W2785523439
1,Wireless smart contact lens for diabetic diagn...,We demonstrate smart contact lenses for noninv...,2020,380,"[Do Hee Keum, Su‐Kyoung Kim, Jahyun Koo, Geon‐...",https://openalex.org/W3006183395
2,Wearable smart sensor systems integrated on so...,Abstract Wearable contact lenses which can mon...,2017,834,"[Joohee Kim, Minji Kim, Mi‐Sun Lee, Kukjoo Kim...",https://openalex.org/W2609205766
3,Smart Contact Lenses with Graphene Coating for...,"Recently, smart contact lenses with electronic...",2017,235,"[Sangkyu Lee, Insu Jo, Sangmin Kang, Bongchul ...",https://openalex.org/W2588631014
4,"Smart, soft contact lens for wireless immunose...",Despite various approaches to immunoassay and ...,2020,220,"[Minjae Ku, Joohee Kim, Jong‐Eun Won, Wonkyu K...",https://openalex.org/W3041770184
5,Recent Advances in Smart Contact Lenses,Abstract The eyes can provide rich physiologic...,2019,96,"[Joohee Kim, Eunkyung Cha, Jang‐Ung Park]",https://openalex.org/W2989939642
6,Printing of wirelessly rechargeable solid-stat...,Wirelessly rechargeable solid-state supercapac...,2019,145,"[Jihun Park, David B. Ahn, Joohee Kim, Eunkyun...",https://openalex.org/W2994565315
7,Smart Contact Lenses for Biosensing Applications,Smart contact lenses have emerged as novel wea...,2021,68,"[Xin Ma, Samad Ahadian, Song Liu, Jingwen Zhan...",https://openalex.org/W3159136874
8,Bimetallic Nanocatalysts Immobilized in Nanopo...,Smart contact lenses for continuous glucose mo...,2022,125,"[Su‐Kyoung Kim, Geon‐Hui Lee, Cheonhoo Jeon, H...",https://openalex.org/W4213420944
9,Wireless theranostic smart contact lens for mo...,Abstract Glaucoma is one of the irreversible o...,2022,86,"[Tae Yeon Kim, Jee Won Mok, Sang Hoon Hong, Sa...",https://openalex.org/W4308709560
