# LOGIN AND FETCH TOKEN

In [2]:
import globus_sdk
import requests
import json
from inference_auth_token import get_access_token

# Authenticate and collect access token
# If tokens already exist, this will reuse them and fresh the access token if necessary
access_token = get_access_token()

# LIST ENDPOINTS

In [10]:
# URL to the inference gateway
base_url="https://inference-api.alcf.anl.gov/resource_server/list-endpoints"

# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

response = requests.get(base_url, headers=headers)
print(response.json())

{'clusters': {'sophia': {'base_url': '/resource_server/sophia', 'frameworks': {'vllm': {'models': ['China-NCTIEDA/ChipExpert-8B-Instruct', 'Qwen/QwQ-32B', 'Qwen/Qwen2-VL-72B-Instruct', 'Qwen/Qwen2.5-14B-Instruct', 'Qwen/Qwen2.5-7B-Instruct', 'Qwen/Qwen2.5-VL-72B-Instruct', 'Qwen/Qwen3-235B-A22B', 'Qwen/Qwen3-32B', 'Salesforce/SFR-Embedding-Mistral', 'allenai/Llama-3.1-Tulu-3-405B', 'argonne/AuroraGPT-DPO-UFB-0225', 'argonne/AuroraGPT-IT-v4-0125', 'argonne/AuroraGPT-KTO-UFB-0325', 'argonne/AuroraGPT-Tulu3-SFT-0125', 'google/gemma-3-27b-it', 'meta-llama/Llama-3.2-90B-Vision-Instruct', 'meta-llama/Llama-3.3-70B-Instruct', 'meta-llama/Llama-4-Maverick-17B-128E-Instruct', 'meta-llama/Llama-4-Scout-17B-16E-Instruct', 'meta-llama/Meta-Llama-3-70B-Instruct', 'meta-llama/Meta-Llama-3-8B-Instruct', 'meta-llama/Meta-Llama-3.1-405B-Instruct', 'meta-llama/Meta-Llama-3.1-70B-Instruct', 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'mgoin/Nemotron-4-340B-Instruct-hf', 'mistralai/Mistral-7B-Instruct-v0.3',

## INFERENCE USING VLLM

### CHAT COMPLETIONS

In [11]:
# URL to the inference gateway
base_url="https://inference-api.alcf.anl.gov/resource_server/sophia/vllm/v1/chat/completions"

# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "openai/gpt-oss-20b",
        "max_tokens": 150,
        "messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}],
}

# Convert data into Json
data_json = json.dumps(data)

# Send the post request to the relay server
response = requests.post(base_url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())

200
{'id': 'chatcmpl-330c6fdc0fbf46f3a0fbc1cecaf691ed', 'object': 'chat.completion', 'created': 1758663468, 'model': 'openai/gpt-oss-20b', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': None, 'refusal': None, 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': [], 'reasoning_content': 'We need to list all proteins that interact with RAD51. This could be many proteins. The user likely wants a comprehensive list or at least a list of known protein partners. We need to consider RAD51 interactions across various organisms. In humans, RAD51 interacts with many proteins: BRCA1, BRCA2, ATM, ATR, PALB2, RPA, RAD52, DSS1, BLM, RECQL, FANCI, FANCD2, RAD51 paralogs: XRCC2, XRCC3, SWSAP1, ETAA1, DMC1. Also proteins involved in HR: RAD51B, RAD51C, RAD51D, XRCC3, PALB2, BR'}, 'logprobs': None, 'finish_reason': 'length', 'stop_reason': None}], 'service_tier': None, 'system_fingerprint': None, 'usage': {'prompt_tokens': 79, 'total_tokens': 229, 'completion_to

### CHAT COMPLETIONS USING PYTHON OPENAI API

In [12]:
from openai import OpenAI
 
# Set OpenAI's API key and API base to use vLLM's API server.
# localurl = "http://localhost:8000/resource_server/polaris/vllm/v1"
openai_api_key = access_token
openai_api_base = "https://inference-api.alcf.anl.gov/resource_server/sophia/vllm/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
 
# sampling_params = SamplingParams({"prompt_logprobs": 1, "logprobs": 1))
chat_response = client.chat.completions.create(
    model="openai/gpt-oss-20b",
    logprobs=True,
    top_logprobs=1,
    messages=[
        {"role": "user", "content": "A detailed description of the biochemical \
            function 5-(hydroxymethyl)furfural/furfural transporter is"},
    ],
    max_tokens=2056
)
print("Chat response:", chat_response)

Chat response: ChatCompletion(id='chatcmpl-84ebf86135204feb88cffbeb00e5bc61', choices=[Choice(finish_reason='length', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='<|channel|>', bytes=[60, 124, 99, 104, 97, 110, 110, 101, 108, 124, 62], logprob=0.0, top_logprobs=[TopLogprob(token='<|channel|>', bytes=[60, 124, 99, 104, 97, 110, 110, 101, 108, 124, 62], logprob=0.0)]), ChatCompletionTokenLogprob(token='analysis', bytes=[97, 110, 97, 108, 121, 115, 105, 115], logprob=-1.0728830375228426e-06, top_logprobs=[TopLogprob(token='analysis', bytes=[97, 110, 97, 108, 121, 115, 105, 115], logprob=-1.0728830375228426e-06)]), ChatCompletionTokenLogprob(token='<|message|>', bytes=[60, 124, 109, 101, 115, 115, 97, 103, 101, 124, 62], logprob=-2.50339189733495e-06, top_logprobs=[TopLogprob(token='<|message|>', bytes=[60, 124, 109, 101, 115, 115, 97, 103, 101, 124, 62], logprob=-2.50339189733495e-06)]), ChatCompletionTokenLogprob(token='The', bytes=[84, 104, 101], logprob=-

### LEGACY COMPLETIONS

In [13]:
# URL to the inference gateway
base_url = "https://inference-api.alcf.anl.gov/resource_server/sophia/vllm/v1/completions"

# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "openai/gpt-oss-20b",
        "temperature": 0.2,
        "max_tokens": 150,
        "prompt": "List all proteins that interact with RAD51",
        "logprobs":1
}

# Convert data into Json
data_json = json.dumps(data)

# Send the post request to the relay server
response = requests.post(base_url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())

200
{'id': 'cmpl-2e5a08b8c4ed45fcb16fdc99e20c89e2', 'object': 'text_completion', 'created': 1758663632, 'model': 'openai/gpt-oss-20b', 'choices': [{'index': 0, 'text': "AP1. For each protein, provide the following information:\n\n- Gene symbol\n- Entrez Gene ID\n- Gene name\n- Gene type\n- Chromosome location (cytoband)\n\nWe need to list all proteins that interact with RAD51AP1. So we need to find known protein-protein interactions for RAD51AP1. Then for each interacting protein, provide gene symbol, Entrez Gene ID, gene name, gene type, chromosome location (cytoband). So we need to compile a list.\n\nFirst, what is RAD51AP1? It's a protein that interacts with RAD51. It's called RAD51-associated protein 1. It's involved in homologous recombination. It may have interactions with RAD51,", 'logprobs': {'text_offset': [0, 2, 3, 4, 8, 13, 21, 22, 30, 34, 44, 56, 59, 60, 65, 72, 73, 74, 78, 81, 86, 89, 90, 91, 96, 101, 102, 103, 108, 113, 114, 115, 121, 126, 135, 137, 139, 140, 142, 145, 14

In [15]:
import time
start_time = time.time()

# URL to the inference gateway
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/chat/completions"

# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "mistralai/Mixtral-8x22B-Instruct-v0.1",
        "temperature": 0.2,
        "messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}],
}

# Convert data into Json
data_json = json.dumps(data)

# Send the post request to the relay server
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())
print("Time for globus to submit and respond",time.time()-start_time)

200
{'server_response': '{\n    "id": "chat-4fc6c8569f5a43e78b967714e65c5ab1",\n    "object": "chat.completion",\n    "created": 1722571630,\n    "model": "mistralai/Mixtral-8x22B-Instruct-v0.1",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": " RAD51 is a key protein involved in the homologous recombination (HR) repair of DNA double-strand breaks (DSBs). It interacts with several other proteins to carry out its function. Here are some proteins that interact with RAD51:\\n\\n1. BRCA1 (Breast cancer type 1 susceptibility protein): BRCA1 interacts with RAD51 and promotes its nuclear localization and accumulation at sites of DNA damage.\\n\\n2. BRCA2 (Breast cancer type 2 susceptibility protein): BRCA2 plays a crucial role in the HR repair pathway by mediating the recruitment of RAD51 to DSBs.\\n\\n3. PALB2 (Partner and localizer of BRCA2): PALB2 interacts with BRCA2 and RAD51 to facilitate t