# API access 
API keys are acquired from Perplexity Pro.  

In [73]:
import csv
import requests
import random
from datetime import datetime
from api_keys import api_key


# Function to make API call and get LLM response
def get_llm_response(prompt, model):

    """
    Make API call to get response from LLM
    :param prompt: str: User query
    :param model: str: Model name
    :return: str: Answer from LLM
    """

    if model == "mistral-small": 
        api_key = api_key
        api_url = "https://api.mistral.ai/v1/chat/completions"

    
    else: 
        api_key = api_key
        api_url = "https://api.perplexity.ai/chat/completions"


    headers = {
    "content-type": "application/json",
    "accept" : "application/json",
    "Authorization": f"Bearer {api_key}"
    }

    data = {
    "model": f"{model}",  
    "messages": [
        {"role": "system", "content": "You are a helpful information retrieval assistant"},
        {"role": "user", "content": f"{prompt}"}
    ]
    }
    
    response = requests.post(api_url, headers=headers, json=data)


    try:

        answer = response.json()['choices'][0]['message']['content']
        answer = answer.replace('\n', ' ').replace('\r\n', ' ')
        finish_reason = response.json()['choices'][0]['finish_reason']
        
    except KeyError: 
        answer = response.text
        finish_reason = "error"


    # Generate lodAIC
    random_number = random.randint(0, 4294967295)
    lodAIC = f"pid_graph:AP{random_number:08X}"


    return answer, finish_reason, lodAIC


In [87]:
# Function to write data to CSV
def write_to_csv(data, filename):
    with open(filename, 'a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(data)

# Main script
csv_filename = "llm_responses.csv"

# Write header if the file doesn't exist
try:
    with open(csv_filename, 'x', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["lodAIC", "Model", "Prompt", "Answer", "Date", "Finish Reason"])
except FileExistsError:
    pass



In [94]:
import pandas as pd 
df = pd.read_csv("Identifier.csv")
pids = df["Title"].tolist()

In [90]:
def make_prompt(pid, prop): 

    if prop == "metadata": 
        prompt = f""" 
        **Instruction**:  
        You are an information retrieval assistant. Given a Persistent Identifier (PID), determine its metadata schema rules using registry documentation and known patterns. Choose from:  
        1. **No Metadata Schema Prescribed** (No enforced schema)  
        2. **Common Metadata Schema for Identifier** (Standardized schema for all instances)  
        3. **Metadata Schema per Entity** (Class-specific schemas for different entities)  
        4. **Custom/Non-Standard Metadata Schema** (No restrictions on schema structure)  
        5. **Unknown** (insufficient information)  

        **Desired Format**:  
        <PID>: <structure category>, <confidence>
        **Input**: {pid}  
        **Output**:  
        """
    
    if prop == "structure":
        prompt = f""" 
        **Instruction**:  
        You are an information retrieval assistant. Given a Persistent Identifier (PID), determine the structure of the prefixes and identifier, using PID registry metadata and known PID system patterns. Choose from:  
        1. **Allows User Semantics** (Identifier contains user-defined meaningful components)  
        2. **Allows Managed Prefix** (Prefixes are assigned to different organizations)  
        3. **No Prefix** (No namespace/prefix component)  
        4. **Predefined Identifier Structure** (Fixed format with no user-defined components)  
        5. **Unknown** (insufficient information)  

        **Desired Format**:  
        <PID>: <structure category>, <confidence>
        **Input**: {pid}
        **Output**:   
        """

    if prop == "governance": 

        prompt = f""" 
         **Instruction**:  
        You are an information retrieval assistant. Given a Persistent Identifier (PID), determine its governance mechanism using PID registry metadata and known PID system patterns. Choose from:  
        1. **Community Governance** (open participation)  
        2. **Membership Governance** (requires formal affiliation)  
        3. **Closed Governance** (controlled by specific organization)  
        4. **Unknown** (insufficient information)  

        **Desired Format**:  
        <PID>: <governance category>, <confidence>
        **Input**: {pid}
        **Output**: 
        
        """


    return prompt

In [95]:
models = ["sonar-pro", "llama-3.1-sonar-small-128k-online"] 

In [96]:

prop_types = ["metadata", "structure", "governance"]

for model in models: 
    for pid in pids:
        for prop_type in prop_types:
            prompt = make_prompt(pid, prop_type)

            answer, finish_reason, lodAIC = get_llm_response(prompt, model)
            date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            prompt = prompt.replace('\n', ' ').replace('\r\n', ' ')
            
            # Write to CSV
            write_to_csv([lodAIC, model, prompt, answer, date, finish_reason], csv_filename)
            #print(f"Response for '{prompt}' has been written to {csv_filename}")

            #time.sleep(1)
