# User prompt
The user define the research objective a prompt. I use openAI to identify relevant key words for the research.
Documentation:
* https://platform.openai.com/docs/guides/text?api-mode=responses&lang=python

In [13]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from openai.types.responses import Response

# Load the API key
load_dotenv()

client: OpenAI = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Research objective
research_objective: str = """
At present, one of our clients is looking to speak with professionals who have insights about the emerging technologies 
in soft contact lens manufacturing, particularly non-injection moulded methods. They would broadly like to understand how 
these technologies are reshaping the industry—from on-demand manufacturing to smart, drug-delivery-enabled lenses.
"""
# TODO: improve the prompt to extract words from the research objective
prompt: str = f"""
You are assisting a researcher in generating targeted search terms for academic and patent literature related to the research topic described below.

Return a JSON object with the following structure:
- "main_topic": a concise list of words (2–3 words) extracted from the Research Objective that reflects the core technological focus
- "openalex": exactly 5 academic search terms, extracted from the Research Objective that reflects the client objective
- "patentview": exactly 5 patent-related keywords, extracted from the Research Objective that reflects the client objective
- "cpc_codes": exactly 5 valid CPC classification codes relevant to the topic

Instructions:
- Output must be valid JSON only — no markdown, comments, or extra text
- All terms in "openalex" and "patentview" must be single or double words
- Do NOT include any words or close variants from "main_topic" in "openalex" or "patentview"
- All terms across the fields must be unique — no repetition or synonyms
- Each term in "openalex" and "patentview" must be conceptually compatible with the "main_topic" so that combining them (e.g. "main_topic" AND "keyword") produces a realistic and meaningful research query
- Use language and terminology commonly found in scientific publications and patent documents

Research Objective:
\"\"\" 
{research_objective} 
\"\"\"
"""

# Selected model
# model="gpt-3.5-turbo",
# model="gpt-4-turbo",
# model="gpt-4o", 
GPT_MODEL = "gpt-4o-mini"

response: Response = client.responses.create(

    model=GPT_MODEL,
    input = prompt
)

print(f"The lenght of research objective is: {len(research_objective)}")

# print(type(response))
print(response.output_text)

The lenght of research objective is: 360
{
  "main_topic": ["contact lenses"],
  "openalex": [
    "manufacturing methods",
    "emerging technologies",
    "smart materials",
    "drug delivery",
    "on-demand production"
  ],
  "patentview": [
    "non-injection moulding",
    "flexible polymers",
    "lens customization",
    "optical coatings",
    "bioactive components"
  ],
  "cpc_codes": [
    "A61F 2/00",
    "B29C 51/00",
    "C08J 5/00",
    "C08L 83/00",
    "G02C 7/10"
  ]
}


In [7]:
import json

def load_json(response_output: Response) -> dict[str, str | list[str]]:
    # Load the json file into dictionary
    json_dict:dict[str, str | list[str]] = json.loads(response_output)
    # print("Parsed dict:", research_key_words)
    return json_dict

# Print the main topic
research_key_words = load_json(response.output_text)
print(research_key_words["main_topic"])
# print(research_key_words["openalex"][0])

['contact lens']


# Openalex API
https://docs.openalex.org/

Valid parameters are: 
* apc_sum, 
* cited_by_count_sum, 
* cursor, 
* filter, 
* format, 
* group_by, 
* group-by, 
* group_bys, 
* group-bys, 
* mailto, 
* page, 
* per_page, 
* per-page, 
* q, 
* sample, 
* seed, 
* search, 
* select, 
* sort, 
* warm.'

## API calls to load papers

In [8]:
import requests
import pandas as pd
from typing import Any

def reconstruct_abstract(abstract_inverted_index: dict[str, list[int]]) -> str:
    '''
    Reconstruct the abstract from abstract_inverted_index
    '''
    
    # Some works don't have an abstract
    if not abstract_inverted_index:
        return ""
        
    # Variable to store the highest index
    max_value: int = 0
     # Loop through all the list of position in the abstract_inverted_index dictionary.
    for values in abstract_inverted_index.values():
        # Loop through all the index value
        for value in values:
            # identify the highest value index
            if value >= max_value:
                max_value = value
                
    # Create an empty list with abstract size        
    abstract: list[str] = [None] * (max_value +1)

    # Loop through each word in the abstract_inverted_index:
    for word, positions in abstract_inverted_index.items():
        # For each word, get the list of positions it appears in.
        for position in positions:
            # Insert each word into its correct position in the list.
            abstract[position]= word
                        
    # Join all the words in the list into a single string, separated by spaces.
    abstract_text: str = " ".join(abstract)
    # print("\n", abstract_text)
    
    return abstract_text

# Openalex url
url: str = "https://api.openalex.org/works"

# TODO: make a search by years: for example in the last 10 years
main_topic: str = research_key_words["main_topic"]
research_key_word: str = research_key_words["openalex"][0]

search_terms: str = f"({main_topic} AND {research_key_word})"
mailto: str = "adyl.elguamra@gmail.com" #For best performance, add your email to all API requests

per_page: int = 15 # By default there are 25 results per page
page: int = 1 # Get the result from page number

params: dict = {
    "search": search_terms, # searches across titles, abstracts, and fulltext.
    "per_page": per_page, 
    "page": page, # if needed I can loop over pages. for example from page 1 to 5 with a for loop
    "sort": "relevance_score:desc",
    "mailto": mailto   
}

response: requests.Response = requests.get(url, params=params)

if response.status_code == 200:
    
    data: dict[str, Any] = response.json()

    works: list[dict[str, Any]] = data.get("results", []) # access a key in dictionary

    # Extract info into list of dicts
    records: list = []
    # loop in the list
    for work in works:
        
        abstract_inverted_index: dict[str, list[int]] = work.get("abstract_inverted_index", [])
        # print(type(abstract_inverted_index))
        abstract = reconstruct_abstract(abstract_inverted_index)
         
        record: dict[str, Any] = {
            "title": work.get("title"),
            "abstract": abstract,
            "publication_date": work.get("publication_date"),
            "year": work.get("publication_year"),
            "citations": work.get("cited_by_count"),
            "authors": [auth["author"]["display_name"] for auth in work.get("authorships", [])],
            "openAlex id": work.get("id"),
        }
        records.append(record)

    print("Final URL:", response.url)
    print("JSON Data:", response.json())

    # Convert to DataFrame
    df: pd.DataFrame = pd.DataFrame(records)
    

else:
    print(f"Failed to fetch data. Status code: {response.status_code}")
    print(response.text)

Final URL: https://api.openalex.org/works?search=%28%5B%27contact+lens%27%5D+AND+emerging+technologies%29&per_page=15&page=1&sort=relevance_score%3Adesc&mailto=adyl.elguamra%40gmail.com
JSON Data: {'meta': {'count': 113664, 'db_response_time_ms': 242, 'page': 1, 'per_page': 15, 'groups_count': None}, 'results': [{'id': 'https://openalex.org/W2909056211', 'doi': 'https://doi.org/10.3390/ma12020261', 'title': 'Contact Lens Materials: A Materials Science Perspective', 'display_name': 'Contact Lens Materials: A Materials Science Perspective', 'relevance_score': 234.32004, 'publication_year': 2019, 'publication_date': '2019-01-14', 'ids': {'openalex': 'https://openalex.org/W2909056211', 'doi': 'https://doi.org/10.3390/ma12020261', 'mag': '2909056211', 'pmid': 'https://pubmed.ncbi.nlm.nih.gov/30646633', 'pmcid': 'https://www.ncbi.nlm.nih.gov/pmc/articles/6356913'}, 'language': 'en', 'primary_location': {'is_oa': True, 'landing_page_url': 'https://doi.org/10.3390/ma12020261', 'pdf_url': 'http

In [9]:
# Print the dataframe
df.head()

Unnamed: 0,title,abstract,publication_date,year,citations,authors,openAlex id
0,Contact Lens Materials: A Materials Science Pe...,More is demanded from ophthalmic treatments us...,2019-01-14,2019,301,"[Christopher S. A. Musgrave, Fengzhou Fang]",https://openalex.org/W2909056211
1,Management of keratoconus: current scenario,Keratoconus is an ectatic corneal dystrophy an...,2010-08-07,2010,223,"[Vishal Jhanji, Nikhil Sharma, R B Vajpayee]",https://openalex.org/W2142581967
2,Versatile Application of Nanocellulose: From I...,Nanocellulose is cellulose in the form of nano...,2019-01-29,2019,334,"[Lucie Bačáková, Júlia Pajorová, Markéta Bačák...",https://openalex.org/W2911675334
3,<i>In Vivo</i>Confocal Microscopy of the Ocula...,In vivo confocal microscopy (IVCM) is an emerg...,2013-11-11,2013,188,"[Edoardo Villani, Christophe Baudouin, Nathan ...",https://openalex.org/W2122769365
4,"Principles and practice of disinfection, prese...",Part 1: Disinfection and Antisepsis: 1. Histor...,1992-01-01,1992,460,"[A.D. Russell, W. B. Hugo, G.A.J. Ayliffe]",https://openalex.org/W1579150349


## Filter relevant papers
Documentation:
* https://platform.openai.com/docs/guides/embeddings
* https://python.langchain.com/docs/tutorials/retrievers/
* https://cookbook.openai.com/examples/get_embeddings_from_dataset

In [None]:
# 1. **Create a new column** in the DataFrame that combines the title and abstract into a single `text` field for each paper.
df["text"] = df["title"] + ". " + df["abstract"]
df.head()

Unnamed: 0,title,abstract,publication_date,year,citations,authors,openAlex id,text
0,Contact Lens Materials: A Materials Science Pe...,More is demanded from ophthalmic treatments us...,2019-01-14,2019,301,"[Christopher S. A. Musgrave, Fengzhou Fang]",https://openalex.org/W2909056211,Contact Lens Materials: A Materials Science Pe...
1,Management of keratoconus: current scenario,Keratoconus is an ectatic corneal dystrophy an...,2010-08-07,2010,223,"[Vishal Jhanji, Nikhil Sharma, R B Vajpayee]",https://openalex.org/W2142581967,Management of keratoconus: current scenario. K...
2,Versatile Application of Nanocellulose: From I...,Nanocellulose is cellulose in the form of nano...,2019-01-29,2019,334,"[Lucie Bačáková, Júlia Pajorová, Markéta Bačák...",https://openalex.org/W2911675334,Versatile Application of Nanocellulose: From I...
3,<i>In Vivo</i>Confocal Microscopy of the Ocula...,In vivo confocal microscopy (IVCM) is an emerg...,2013-11-11,2013,188,"[Edoardo Villani, Christophe Baudouin, Nathan ...",https://openalex.org/W2122769365,<i>In Vivo</i>Confocal Microscopy of the Ocula...
4,"Principles and practice of disinfection, prese...",Part 1: Disinfection and Antisepsis: 1. Histor...,1992-01-01,1992,460,"[A.D. Russell, W. B. Hugo, G.A.J. Ayliffe]",https://openalex.org/W1579150349,"Principles and practice of disinfection, prese..."


In [62]:
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter


# Wrap the string into a Document object
abstract_doc: list[Document] = [Document(page_content=df.loc[3, "abstract"])]

# Type and length of docs
print(type(abstract_doc))
print(len(abstract_doc))


# Splitting
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
abstract_docs = text_splitter.split_documents(abstract_doc)

<class 'list'>
1


In [63]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings

vectorstore= InMemoryVectorStore.from_documents(
    documents=abstract_docs, embedding=OpenAIEmbeddings()
)

retriever = vectorstore.as_retriever()

query = "is the document relevant for the client?"

retrieved_docs = retriever.invoke(query)

print(f"Number of retrieved text chuncks: {len(retrieved_docs)}")

Number of retrieved text chuncks: 4


In [64]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model=GPT_MODEL)

In [65]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Keep the "
    "answer precise and concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [66]:
# Create a chain for passing a list of Documents to a model.
question_answer_chain = create_stuff_documents_chain(llm, prompt)

# Create retrieval chain that retrieves documents and then passes them on.
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

results = rag_chain.invoke({"input": query})

print(f"Query: {query}\n")
print(f"Answer: {results['answer']}\n")
print("Sources:")
for document in results["context"]:
    print(f"\n {document}")
    print()

Query: is the document relevant for the client?

Answer: Yes, the document is relevant for the client as it discusses advancements in In Vivo Confocal Microscopy (IVCM) and its applications in diagnosing and managing eye diseases, understanding corneal alterations, and evaluating ocular responses, which can be valuable for clients in ocular health fields.

Sources:

 page_content='bench to bedside". IVCM allows prompt diagnosis, disease course follow-up, and management of potentially blinding atypical forms of infectious processes, such as acanthamoeba and fungal keratitis. This technology has improved our knowledge of corneal alterations and some of the processes that affect the visual outcome after lamellar keratoplasty and excimer keratorefractive surgery. In dry eye disease, IVCM has provided new information on the whole-ocular surface morphofunctional unit. It has'


 page_content='the conjunctival wound healing process, and to assess corneal changes induced by topical antiglaucom

In [None]:
from langchain_openai import ChatOpenAI

# Set your OpenAI API key
# os.environ["OPENAI_API_KEY"] = config.OPENAI_API_KEY

llm = ChatOpenAI(model=GPT_MODEL)

In [None]:
# 2. **Generate an embedding** for the research objective using OpenAI’s embedding model.
from openai import OpenAI
# client = OpenAI()

# Model selection based on price: https://platform.openai.com/docs/pricing
# Function source: https://platform.openai.com/docs/guides/embeddings
def get_embedding(text, model="text-embedding-3-small"):
    text = text.replace("\n", " ")
    return client.embeddings.create(input = [text], model=model).data[0].embedding

research_objective_embeddings = get_embedding(research_objective)

In [17]:
# This block follows the tutorial: https://python.langchain.com/docs/tutorials/retrievers/
from langchain_text_splitters import RecursiveCharacterTextSplitter

def text_splitting(text:str):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=100, chunk_overlap=20, add_start_index=True
    )
    all_splits = text_splitter.split_documents(text)
    print(len(all_splits))
    return all_splits

text_splitting(research_objective)



AttributeError: 'str' object has no attribute 'page_content'

In [None]:


# 5. For each paper in the DataFrame:

#    * Generate an embedding for the combined `text`.
#    * Calculate the cosine similarity between the paper's embedding and the objective embedding.

# 6. **Filter the dataset** to include only papers with a similarity score above a chosen threshold (e.g., 0.75). These are considered potentially relevant.

# 7. For each filtered paper:

#    * **Check the length** of the abstract.
#    * If the abstract is longer than 2,000 characters:

#      * Split it into smaller, overlapping chunks (e.g., 1,000 characters per chunk with 200-character overlap).
#      * For each chunk:

#        * Create a GPT prompt using the chunk, the paper’s title, and the research objective.
#        * Send the prompt to GPT and collect its response.
#      * Combine the responses from all chunks into one overall justification.
#    * If the abstract is short enough:

#      * Use the full abstract in a single prompt.
#      * Send the prompt to GPT and receive a justification.

# 8. **Validate the GPT response** to ensure it returns a properly formatted JSON object.

# 9. **Extract the justification and relevance flag** from the JSON and store them in new columns in the DataFrame.

# 10. If a response is invalid or fails:

#     * Retry the request or log it as an error for manual review.

# 11. Finally, **export the updated DataFrame** to a CSV or JSON file for further analysis or reporting.


In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
import time

# # The function comes from: https://platform.openai.com/docs/guides/embeddings
# def get_embedding(text, model="text-embedding-ada-002"):
#     text = text.replace("\n", " ")
#     response = client.embeddings.create(input=[text], model=model)
#     return response.data[0].embedding


# print("Embedding query...")
# query_embedding = get_embedding(research_objective)

df["text"] = df["title"] + ". " + df["abstract"]

# print("Embedding papers...")
# df["embedding"] = df["text"].apply(get_embedding)

# df["similarity"] = df["embedding"].apply(
#     lambda x: cosine_similarity([query_embedding], [x])[0][0]
# )

# Get the text length
df["text_length"]= df["text"].apply(lambda x: len(x))

df[["title", "text", "text_length"]].head()

In [None]:
import pandas as pd
from openai import OpenAI

def get_justification(title, abstract, objective):
    prompt = f"""
    You are the CEO, as well as a scientific and regulatory analyst, evaluating academic research for a company exploring new technologies in soft contact lenses.

    Below is a paper's title and abstract, followed by the company's research objective. Assess the paper across eight dimensions critical to industry adoption. Your evaluation should reflect both business and technical perspectives.

    Title:  
    {title}

    Abstract:  
    {abstract}

    Research Objective:  
    {objective}

    For each dimension below, start with **Yes** or **No**, followed by a **1–2 sentence explanation** based only on the title and abstract. Be **concise**, **specific**, and **fact-based**. Avoid speculation or vague generalizations.

    1. **Technical Relevance** – Does the core technology directly relate to soft contact lenses or relevant materials/devices?  
    2. **Innovation** – Is there a clear, specific novelty or improvement over existing technologies?  
    3. **Feasibility** – Is the approach practical for real-world use or scalable manufacturing?  
    4. **Regulatory Fit** – Does it show potential to meet medical device or material safety regulations?  
    5. **Commercial Potential** – Is there a clear path to productization, monetization, or licensing?  
    6. **Research Credibility** – Are the science, methods, or authors/institutions reputable?  
    7. **IP / Competition** – Is the idea likely protectable or positioned ahead of competitors?  
    8. **Overall Relevance** – Is the paper relevant overall? Start with Yes or No, then briefly explain why.

    Format your response exactly like this:

    Technical Relevance: Yes/No – [reason]  
    Innovation: Yes/No – [reason]  
    Feasibility: Yes/No – [reason]  
    Regulatory Fit: Yes/No – [reason]  
    Commercial Potential: Yes/No – [reason]  
    Research Credibility: Yes/No – [reason]  
    IP / Competition: Yes/No – [reason]  
    Overall Relevance: Yes/No – [reason]

    Only use what is stated or implied in the title and abstract.
    
    Return a valid JSON object with exactly the following keys:

    - "justification": a string with the full 8-dimension evaluation in the exact format described above
    - "is_relevant": a string, either "yes" or "no", based on your answer to the "Overall Relevance" question

    Ensure the JSON object is valid and returned as plain text — no markdown or extra explanation.
    """


    try:
        response: Response = client.responses.create(
        model=GPT_MODEL,
        input = prompt
        )
        return response.output_text
    except Exception as e:
        return f"ERROR: {e}"
    
print("Generating GPT justifications...")


# TODO: optimize the code with .apply()
# Function to apply to each row in the DataFrame
def extract_justification_fields(row: pd.Series) -> pd.Series:
    
    # Get the json
    response: str = get_justification(row["title"], row["abstract"], research_objective)
    # Conver the json to a dictionary
    result: dict[str, str | list[str]] = json.loads(response)
    
    return pd.Series({
        "justification": result.get("justification", "Missing"),
        "is_relevant": result.get("is_relevant", "Unknown")
    })

# Apply the function across all rows in the DataFrame
df[["justification", "is_relevant"]] = df.apply(extract_justification_fields, axis=1)

In [None]:
# Show full column contents and more columns
pd.set_option("display.max_colwidth", None)    # Show full text in cells
# pd.set_option("display.max_columns", None)     # Show all columns
# pd.set_option("display.width", 0)              # Auto-detect width (or set a high number)

df[["title","publication_date", "is_relevant", "justification"]].head()

## Openalex: trends in relevant papers
* Could we identify that confirms the trends?



In [None]:
# Imports necessary libraries: pandas, cosine_similarity, OpenAI, and time.
import requests
import pandas as pd
from typing import Any

# Defines a function get_embedding() to clean text and get its vector embedding using OpenAI's API.

# Embeds the research objective and stores the result in query_embedding.

# Creates a new text column in the DataFrame by combining title and abstract.

# Applies get_embedding() to the text column to embed each paper.

# Calculates cosine similarity between each paper’s embedding and the query embedding, storing the result in a new similarity column.

# Defines a get_justification() function that builds a prompt asking GPT to evaluate the paper across 8 dimensions based on title, abstract, and research objective.

# Sends the prompt to the OpenAI API and returns the JSON response containing evaluation and relevance.

# Prints a message to indicate the start of GPT-based justification generation.

# Defines extract_justification_fields() to run get_justification() on a row and extract the justification and relevance flag from the response.

# Applies extract_justification_fields() across all rows in the DataFrame using .apply(), storing the output in justification and is_relevant columns.

# Ends with a likely typo or error: axis=1) break down this code..., which should be cleaned up.

In [None]:
openalex_df = df.copy()
# Filter the dataframe to keep only the relevant rows
openalex_df =  openalex_df[openalex_df ["is_relevant"]=="yes"]

# combine all the text into a single string

openalex_text = " ".join(openalex_df["text"])

print(openalex_text)

# openalex_df[["title","is_relevant", "text"]].head()




In [None]:
import os
import pandas as pd
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.chat_models import ChatOpenAI

# --- CONFIGURATION ---
CHUNK_WORDS = 500
EMBED_MODEL = "text-embedding-ada-002"
GPT_MODEL = "gpt-4o"
# os.environ["OPENAI_API_KEY"] = "your-api-key"  # Replace or use dotenv if needed

# --- STEP 1: CHUNK THE TEXT ---
def chunk_text(text: str, chunk_size: int = CHUNK_WORDS):
    words = text.split()
    return [
        " ".join(words[i:i + chunk_size])
        for i in range(0, len(words), chunk_size)
    ]

# --- STEP 2: CREATE DOCUMENT OBJECTS ---
def make_documents(chunks):
    return [Document(page_content=chunk) for chunk in chunks]

# --- STEP 3: EMBED AND STORE IN FAISS VECTORSTORE ---
def build_vectorstore(documents):
    embedding = OpenAIEmbeddings(model=EMBED_MODEL)
    vectorstore = FAISS.from_documents(documents, embedding)
    return vectorstore

# --- STEP 4: SUMMARIZE TECHNOLOGICAL TRENDS ---
def summarize_trends(chunks: list) -> str:
    research_objective = """
    At present, one of our clients is looking to speak with professionals who have insights about the emerging technologies 
    in soft contact lens manufacturing, particularly non-injection moulded methods. They would broadly like to understand how 
    these technologies are reshaping the industry—from on-demand manufacturing to smart, drug-delivery-enabled lenses.
    """

    prompt = f"""
    # Research Objective
    \"\"\" 
    {research_objective} 
    \"\"\"

    You are a technology analyst mandated by the client. The following text is from academic literature on emerging technologies.

    Based on the information provided in the chunks, and in alignment with the research objective above, identify 3–7 key technological trends that emerge. 

    Avoid generalities and focus on concrete, specific advancements or trends.

    Chunks:
    {" ".join(chunks[:10])}

    Return a clear, concise bullet-point summary of the trends.
    """

    llm = ChatOpenAI(model_name=GPT_MODEL, temperature=0)
    response = llm.predict(prompt)
    return response

# --- MAIN EXECUTION ---

# Step 0: Combine text
openalex_text = " ".join(openalex_df["text"].astype(str))

# Step 1–2: Chunk and create LangChain Documents
chunks = chunk_text(openalex_text)
documents = make_documents(chunks)

# Step 3: Embed and store in FAISS
vectorstore = build_vectorstore(documents)

# Step 4: Summarize trends
tech_trends_summary = summarize_trends(chunks)

# Output
print("\nIdentified Technological Trends:\n")
print(tech_trends_summary)


# Patentview API

# FDA api

Documentation:
* https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpma/pma.cfm
* https://open.fda.gov/apis/