# LOGIN AND FETCH TOKEN

In [5]:
import globus_sdk
import requests
import json

# Globus ID and Scope
# ===================
auth_client_id = "58fdd3bc-e1c3-4ce5-80ea-8d6b87cfb944"

# Define the inference-gateway resource service scope
# This will be publicly available to users
gateway_client_id = "681c10cc-f684-4540-bcd7-0b4df3bc26ef"
gateway_scope = f"https://auth.globus.org/scopes/{gateway_client_id}/action_all"

# Authentication and Access Token
# ===============================

# Start an Auth client with the vLLM scope
auth_client = globus_sdk.NativeAppAuthClient(auth_client_id)
auth_client.oauth2_start_flow(requested_scopes=gateway_scope)

# Authenticate with your Globus account
authorize_url = auth_client.oauth2_get_authorize_url()
print(f"Please go to this URL and login:\n\n{authorize_url}\n")


Please go to this URL and login:

https://auth.globus.org/v2/oauth2/authorize?client_id=58fdd3bc-e1c3-4ce5-80ea-8d6b87cfb944&redirect_uri=https%3A%2F%2Fauth.globus.org%2Fv2%2Fweb%2Fauth-code&scope=https%3A%2F%2Fauth.globus.org%2Fscopes%2F681c10cc-f684-4540-bcd7-0b4df3bc26ef%2Faction_all&state=_default&response_type=code&code_challenge=OoAuaudTe3A18OPNdXhoX7TJM23BIfxjVF3oqIBVcgg&code_challenge_method=S256&access_type=online



In [6]:
# Collect access token to vLLM service
auth_code = "..paste.your.auth.code.here.."
token_response = auth_client.oauth2_exchange_code_for_tokens(auth_code)
access_token = token_response.by_resource_server[gateway_client_id]["access_token"]

# LIST ENDPOINTS

In [22]:
# URL to the inference gateway (needs to end with forward slash /)
import requests
import json
url = "https://data-portal-dev.cels.anl.gov/resource_server/list-endpoints"
#url = "http://localhost:8000/resource_server/list-endpoints"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}
response = requests.get(url, headers=headers, verify=False)
for endpoint in response.json():
    print(endpoint)
          

{'completion_endpoint_url': '/resource_server/polaris/vllm/v1/completions/', 'chat_endpoint_url': '/resource_server/polaris/vllm/v1/chat/completions/', 'model_name': 'meta-llama/Meta-Llama-3-8B-Instruct'}
{'completion_endpoint_url': '/resource_server/polaris/vllm/v1/completions/', 'chat_endpoint_url': '/resource_server/polaris/vllm/v1/chat/completions/', 'model_name': 'meta-llama/Meta-Llama-3-70B-Instruct'}
{'completion_endpoint_url': '/resource_server/polaris/vllm/v1/completions/', 'chat_endpoint_url': '/resource_server/polaris/vllm/v1/chat/completions/', 'model_name': 'mistralai/Mistral-7B-Instruct-v0.3'}
{'completion_endpoint_url': '/resource_server/polaris/llama-cpp/v1/completions/', 'chat_endpoint_url': '/resource_server/polaris/llama-cpp/v1/chat/completions/', 'model_name': 'meta-llama-3-8b-instruct'}
{'completion_endpoint_url': '/resource_server/polaris/llama-cpp/v1/completions/', 'chat_endpoint_url': '/resource_server/polaris/llama-cpp/v1/chat/completions/', 'model_name': 'mist

## INFERENCE USING VLLM

In [14]:
# URL to the inference gateway (needs to end with forward slash /)
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/chat/completions"
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/chat/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama/Meta-Llama-3-8B-Instruct",
        "temperature": 0.2,
        "max_tokens": 150,
        "messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}],
        'logprobs':True
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers, verify=False)

# Print inference response
print(response.status_code)
print(response.json())

200
{'server_response': '{\n    "id": "cmpl-a4ea600455354f0398a4a8a6b664991e",\n    "object": "chat.completion",\n    "created": 1719893560,\n    "model": "meta-llama/Meta-Llama-3-8B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": "RAD51 is a key protein involved in homologous recombination (HR) and DNA repair. It interacts with numerous proteins to facilitate its functions. Here\'s a list of some proteins that interact with RAD51:\\n\\n1. **BRCA1**: A tumor suppressor protein that interacts with RAD51 to regulate HR and DNA repair.\\n2. **BRCA2**: Another tumor suppressor protein that interacts with RAD51 to facilitate HR and DNA repair.\\n3. **RAD52**: A protein that interacts with RAD51 to facilitate HR and DNA repair.\\n4. **RAD54**: A protein that interacts with RAD51 to regulate HR and DNA repair.\\n5. **RAD55**: A protein that interacts with RAD51 to regulate HR and DNA r

In [18]:
from openai import OpenAI
 
# Set OpenAI's API key and API base to use vLLM's API server.
# localurl = "http://localhost:8000/resource_server/polaris/vllm/v1"
openai_api_key = access_token
openai_api_base = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1"


client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
 
# sampling_params = SamplingParams({"prompt_logprobs": 1, "logprobs": 1))
chat_response = client.chat.completions.create(
    model="meta-llama/Meta-Llama-3-8B-Instruct",
    logprobs=True,
    top_logprobs=1,
    messages=[
        {"role": "user", "content": "A detailed description of the biochemical \
            function 5-(hydroxymethyl)furfural/furfural transporter is"},
    ],
    temperature=0.0,
    max_tokens=2056,
)
print("Chat response:", chat_response)

Chat response: ChatCompletion(id=None, choices=None, created=None, model=None, object=None, service_tier=None, system_fingerprint=None, usage=None, server_response='{\n    "id": "cmpl-7596caf46d9b47fab3fc6db4a6e2af16",\n    "object": "chat.completion",\n    "created": 1719893789,\n    "model": "meta-llama/Meta-Llama-3-8B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": "I apologize, but there is no such thing as a \\"5-(hydroxymethyl)furfural/furfural transporter\\" that is a well-characterized biochemical entity with a known function.\\n\\nHowever, I can provide some information on 5-(hydroxymethyl)furfural (HMF) and furfural, which are both bioactive compounds with potential applications in various fields.\\n\\nHMF is a naturally occurring compound that is formed through the degradation of sugars, such as fructose and glucose, under heat, acid, or enzymatic conditions. It has b

In [52]:
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/completions"
url = "https://data-portal-dev.cels.anl.gov/resource_server/polaris/sophia/v1/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama/Meta-Llama-3-8B-Instruct",
        "temperature": 0.2,
        "max_tokens": 150,
        "prompt": "List all proteins that interact with RAD51",
        "logprobs":True
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers, verify=False)

# Print inference response
print(response.status_code)
print(response.json())

200
{'server_response': '{\n    "id": "cmpl-26f6b2a1d21944e49b28a0ca30e250c4",\n    "object": "text_completion",\n    "created": 1719437284,\n    "model": "meta-llama/Meta-Llama-3-8B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "text": "\\nProtein-protein interactions are crucial for many cellular processes, including DNA repair. RAD51 is a key protein involved in homologous recombination, a major pathway for repairing DNA double-strand breaks. Here, we provide a comprehensive list of proteins that interact with RAD51, which is essential for understanding the mechanisms of DNA repair and the regulation of RAD51 activity.\\nRAD51 interacts with a wide range of proteins, including:\\n1. BRCA1: A tumor suppressor protein that interacts with RAD51 to facilitate homologous recombination.\\n2. BRCA2: Another tumor suppressor protein that interacts with RAD51 to promote homologous recombination.\\n3. PALB2: A protein that interacts with BRCA2 and",\n          

In [53]:
# URL to the inference gateway (needs to end with forward slash /)
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/chat/completions"
url = "https://data-portal-dev.cels.anl.gov/resource_server/sohia/vllm/v1/chat/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama/Meta-Llama-3-70B-Instruct",
        "temperature": 0.2,
        "max_tokens": 150,
        "messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}],
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())

200
{'server_response': '{\n    "id": "cmpl-482e4087aa8c4118bf1a0b98c10d1257",\n    "object": "chat.completion",\n    "created": 1719437979,\n    "model": "meta-llama/Meta-Llama-3-70B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": "A great question!\\n\\nRAD51 is a key protein involved in homologous recombination, a crucial process for maintaining genome stability. It interacts with many other proteins to facilitate this process. Here\'s a list of some of the proteins that interact with RAD51:\\n\\n**1. BRCA1 and BRCA2**: Tumor suppressor proteins that interact with RAD51 to regulate homologous recombination and maintain genome stability.\\n\\n**2. RAD52**: A protein that interacts with RAD51 to facilitate the assembly of RAD51 nucleoprotein filaments.\\n\\n**3. RAD54**: A protein that interacts with RAD51 to promote the exchange of DNA strands during homologous recombination.\

In [54]:
# URL to the inference gateway (needs to end with forward slash /)
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/completions"
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama/Meta-Llama-3-70B-Instruct",
        "temperature": 0.2,
        "max_tokens": 150,
        "prompt":"List all proteins that interact with RAD51"
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())

200
{'server_response': '{\n    "id": "cmpl-ff42694387654162a6954d1cb90ea13d",\n    "object": "text_completion",\n    "created": 1719437991,\n    "model": "meta-llama/Meta-Llama-3-70B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "text": " (Homo sapiens)\\nProtein Name Gene Name Organism Interaction Type\\nATR ATR Homo sapiens (human) protein-protein interaction\\nATRIP ATRIP Homo sapiens (human) protein-protein interaction\\nBARD1 BARD1 Homo sapiens (human) protein-protein interaction\\nBRCA1 BRCA1 Homo sapiens (human) protein-protein interaction\\nBRCA2 BRCA2 Homo sapiens (human) protein-protein interaction\\nCASP3 CASP3 Homo sapiens (human) protein-protein interaction\\nCHK1 CHK1 Homo sapiens (human) protein-protein interaction\\nCHK2 CHK2 Homo sapiens (human) protein-protein interaction\\nDDB1 D",\n            "logprobs": null,\n            "finish_reason": "length",\n            "stop_reason": null\n        }\n    ],\n    "usage": {\n        "prompt

In [55]:
# URL to the inference gateway (needs to end with forward slash /)
import time
start_time = time.time()
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/completions"
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "mistralai/Mistral-7B-Instruct-v0.3",
        "temperature": 0.2,
        "max_tokens": 150,
        #"messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}],
        "prompt" : "List all proteins that interact with RAD51",
        "logprobs":True
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())
#print("Time for globus to submit and respond",time.time()-start_time)

200
{'server_response': '{\n    "id": "cmpl-dcfb0acb34604f09a9f27064a7135ffa",\n    "object": "text_completion",\n    "created": 1719438693,\n    "model": "mistralai/Mistral-7B-Instruct-v0.3",\n    "choices": [\n        {\n            "index": 0,\n            "text": "\\n\\nRAD51 is a key protein in the homologous recombination (HR) pathway of DNA repair. It forms a nucleoprotein filament on single-stranded DNA (ssDNA) and promotes strand exchange with a homologous double-stranded DNA (dsDNA) template. Several proteins interact with RAD51 to regulate its activity during HR. Here are some of the proteins that interact with RAD51:\\n\\n1. BRCA2: BRCA2 is a tumor suppressor protein that interacts with RAD51 and plays a crucial role in the recruitment of RAD51 to DNA double-strand breaks (",\n            "logprobs": {\n                "text_offset": [\n                    0,\n                    1,\n                    2,\n                    3,\n                    5,\n                   

In [56]:
# URL to the inference gateway (needs to end with forward slash /)
import time
start_time = time.time()
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/chat/completions"
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/chat/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "mistralai/Mistral-7B-Instruct-v0.3",
        "temperature": 0.2,
        "max_tokens": 150,
        "messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}],
        "logprobs":True
        #"prompt" : "List all proteins that interact with RAD51",
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())
#print("Time for globus to submit and respond",time.time()-start_time)

200
{'server_response': '{\n    "id": "cmpl-5e25d27f4d0c4e678a3649bcd6a533cf",\n    "object": "chat.completion",\n    "created": 1719438705,\n    "model": "mistralai/Mistral-7B-Instruct-v0.3",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": " RAD51 is a key protein in the homologous recombination (HR) pathway of DNA repair and replication. Several proteins interact with RAD51 to facilitate its function. Here is a list of some proteins that interact with RAD51:\\n\\n1. BRCA1 (Breast Cancer 1, Early Onset): BRCA1 interacts with RAD51 and plays a role in the recruitment of RAD51 to DNA double-strand breaks (DSBs).\\n\\n2. BRCA2 (Breast Cancer 2, Early Onset): BRCA2 interacts with RAD51 and is essential for the loading of RAD",\n                "tool_calls": []\n            },\n            "logprobs": {\n                "content": [\n                    {\n                        "token": " R"

## FINDINGS
* Globus adds an average of 6 seconds to the latency


# INFERENCE USING LLAMACPP

In [None]:
# URL to the inference gateway (needs to end with forward slash /)
#url = "https://data-portal-dev-vmw-01.cels.anl.gov/resource_server/polaris"
url = "http://localhost:8000/resource_server/polaris/llama-cpp/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "mistral-7B-instruct-v03",
        "temperature": 0.2,
        "prompt": "List all proteins that interact with RAD51",
        #"n_probs":1
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers, verify=False)

# Print inference response
print(response.status_code)
print(response.json())

In [None]:
# URL to the inference gateway (needs to end with forward slash /)
#url = "https://data-portal-dev-vmw-01.cels.anl.gov/resource_server/polaris"
url = "http://localhost:8000/resource_server/polaris/llama-cpp/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama-3-70b-instruct",
        "temperature": 0.2,
        "max_tokens": 150,
        "prompt": "List all proteins that interact with RAD51"    
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers, verify=False)

# Print inference response
print(response.status_code)
print(response.json())