# LOGIN AND FETCH TOKEN

In [1]:
import globus_sdk
import requests
import json

# Globus ID and Scope
# ===================
auth_client_id = "58fdd3bc-e1c3-4ce5-80ea-8d6b87cfb944"

# Define the inference-gateway resource service scope
# This will be publicly available to users
gateway_client_id = "681c10cc-f684-4540-bcd7-0b4df3bc26ef"
gateway_scope = f"https://auth.globus.org/scopes/{gateway_client_id}/action_all"

# Authentication and Access Token
# ===============================

# Start an Auth client with the vLLM scope
auth_client = globus_sdk.NativeAppAuthClient(auth_client_id)
auth_client.oauth2_start_flow(requested_scopes=gateway_scope)

# Authenticate with your Globus account
authorize_url = auth_client.oauth2_get_authorize_url()
print(f"Please go to this URL and login:\n\n{authorize_url}\n")


Please go to this URL and login:

https://auth.globus.org/v2/oauth2/authorize?client_id=58fdd3bc-e1c3-4ce5-80ea-8d6b87cfb944&redirect_uri=https%3A%2F%2Fauth.globus.org%2Fv2%2Fweb%2Fauth-code&scope=https%3A%2F%2Fauth.globus.org%2Fscopes%2F681c10cc-f684-4540-bcd7-0b4df3bc26ef%2Faction_all&state=_default&response_type=code&code_challenge=Ky86GIVIRXzNLzxjKl-pj7JrPhwkr8aeVcRT-RyNUWg&code_challenge_method=S256&access_type=online



In [2]:
# Collect access token to vLLM service
auth_code = "..paste.your.auth.code.here.."
token_response = auth_client.oauth2_exchange_code_for_tokens(auth_code)
access_token = token_response.by_resource_server[gateway_client_id]["access_token"]

# LIST ENDPOINTS

In [4]:
# URL to the inference gateway (needs to end with forward slash /)
import requests
import json
url = "https://data-portal-dev.cels.anl.gov/resource_server/list-endpoints"
#url = "http://localhost:8000/resource_server/list-endpoints"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}
response = requests.get(url, headers=headers)
for endpoint in response.json():
    print(endpoint)
          

{'completion_endpoint_url': '/resource_server/sophia/vllm/v1/completions/', 'chat_endpoint_url': '/resource_server/sophia/vllm/v1/chat/completions/', 'model_name': 'meta-llama/Meta-Llama-3-70B-Instruct'}
{'completion_endpoint_url': '/resource_server/sophia/vllm/v1/completions/', 'chat_endpoint_url': '/resource_server/sophia/vllm/v1/chat/completions/', 'model_name': 'meta-llama/Meta-Llama-3-8B-Instruct'}
{'completion_endpoint_url': '/resource_server/sophia/vllm/v1/completions/', 'chat_endpoint_url': '/resource_server/sophia/vllm/v1/chat/completions/', 'model_name': 'mistralai/Mistral-7B-Instruct-v0.3'}
{'completion_endpoint_url': '/resource_server/polaris/vllm/v1/completions/', 'chat_endpoint_url': '/resource_server/polaris/vllm/v1/chat/completions/', 'model_name': 'meta-llama/Meta-Llama-3-8B-Instruct'}
{'completion_endpoint_url': '/resource_server/polaris/vllm/v1/completions/', 'chat_endpoint_url': '/resource_server/polaris/vllm/v1/chat/completions/', 'model_name': 'meta-llama/Meta-Lla

## INFERENCE USING VLLM

### CHAT COMPLETIONS

In [5]:
# URL to the inference gateway (needs to end with forward slash /)
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/chat/completions"
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/chat/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama/Meta-Llama-3-8B-Instruct",
        "max_tokens": 150,
        "messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}],
}

# Convert data into Json
data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())

200
{'server_response': '{\n    "id": "cmpl-364f2af1491a4a3ebf4d670ac55867e9",\n    "object": "chat.completion",\n    "created": 1721315660,\n    "model": "meta-llama/Meta-Llama-3-8B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": "RAD51 is a key protein involved in homologous recombination (HR) and DNA repair. It has been shown to interact with numerous proteins to regulate its activity and function. Here is a list of some of the proteins that interact with RAD51:\\n\\n1. BRCA1: RAD51 interacts with BRCA1, a tumor suppressor protein, to promote HR and DNA repair.\\n2. BRCA2: RAD51 also interacts with BRCA2, another tumor suppressor protein, to facilitate HR and DNA repair.\\n3. PALB2: PALB2, a protein involved in HR and DNA repair, interacts with RAD51 and BRCA2 to regulate their activity.\\n4. FANCD2: FANCD2,",\n                "tool_calls": []\n            },\n            "lo

### CHAT COMPLETIONS USING PYTHON OPENAI API

In [6]:
from openai import OpenAI
 
# Set OpenAI's API key and API base to use vLLM's API server.
# localurl = "http://localhost:8000/resource_server/polaris/vllm/v1"
openai_api_key = access_token
openai_api_base = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1"


client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
 
# sampling_params = SamplingParams({"prompt_logprobs": 1, "logprobs": 1))
chat_response = client.chat.completions.create(
    model="meta-llama/Meta-Llama-3-8B-Instruct",
    logprobs=True,
    top_logprobs=1,
    messages=[
        {"role": "user", "content": "A detailed description of the biochemical \
            function 5-(hydroxymethyl)furfural/furfural transporter is"},
    ],
    max_tokens=2056
)
print("Chat response:", chat_response)

Chat response: ChatCompletion(id=None, choices=None, created=None, model=None, object=None, service_tier=None, system_fingerprint=None, usage=None, server_response='{\n    "id": "cmpl-ca39505882604f8485ab51c2447444f0",\n    "object": "chat.completion",\n    "created": 1721315678,\n    "model": "meta-llama/Meta-Llama-3-8B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": "I apologize, but there is no biochemical function specifically described as the \\"5-(hydroxymethyl)furfural/furfural transporter.\\" \\n\\nHowever, I can provide some information on Furfural and 5-Hydroxymethylfurfural (HMF), which are both bioactive compounds with potential applications in various fields.\\n\\nFurfural is a naturally occurring hydroxymethylfuran derivative that is formed during the degradation of biomass, such as agricultural waste, and can be produced through the acid-catalyzed dehydration of p

### LEGACY COMPLETIONS

In [11]:
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/completions"
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama/Meta-Llama-3-8B-Instruct",
        "temperature": 0.2,
        "max_tokens": 150,
        "prompt": "List all proteins that interact with RAD51",
        "logprobs":1
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())



200
{'server_response': '{\n    "id": "cmpl-80f94a8925954ea78c2938ff7f737f94",\n    "object": "text_completion",\n    "created": 1721315916,\n    "model": "meta-llama/Meta-Llama-3-8B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "text": "\\nRAD51 is a key protein involved in homologous recombination (HR) and DNA repair. It is a member of the RecA/RAD51 family of proteins, which are involved in the repair of DNA double-strand breaks. RAD51 is a key component of the HR pathway, and it plays a central role in the repair of DNA double-strand breaks by promoting the formation of a DNA repair intermediate called a D-loop.\\nRAD51 interacts with a number of other proteins to facilitate its function in HR. Some of the proteins that interact with RAD51 include:\\n1. BRCA1: BRCA1 is a tumor suppressor protein that is involved in the regulation of HR. It interacts with RAD51 and helps to recruit it",\n            "logprobs": {\n                "text_offset": [\n   

### meta-llama/Meta-Llama-3-70B-Instruct

In [15]:
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/chat/completions"
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/chat/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama/Meta-Llama-3-70B-Instruct",
        "max_tokens": 150,
        "messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}]
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())

200
{'server_response': '{\n    "id": "cmpl-bfffc899941340afabf4507ef8689f76",\n    "object": "chat.completion",\n    "created": 1721316070,\n    "model": "meta-llama/Meta-Llama-3-70B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": "A great question!\\n\\nRAD51 is a key protein involved in homologous recombination, a crucial process for maintaining genome stability. It interacts with many other proteins to execute its functions. Here\'s a non-exhaustive list of proteins that interact with RAD51:\\n\\n1. **BRCA1**: Breast cancer type 1 susceptibility protein, a tumor suppressor that regulates RAD51 activity.\\n2. **BRCA2**: Breast cancer type 2 susceptibility protein, a tumor suppressor that interacts with RAD51 to facilitate homologous recombination.\\n3. **DSS1**: Deleted in split-hand/split-foot syndrome 1, a protein that binds to RAD51 and helps to stabilize it.\\n4. **RAD52*

In [14]:
#url = "http://localhost:8000/resource_server/polaris/vllm/v1/completions"
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "meta-llama/Meta-Llama-3-70B-Instruct",
        "temperature": 0.2,
        "max_tokens": 150,
        "prompt":"List all proteins that interact with RAD51"
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())

200
{'server_response': '{\n    "id": "cmpl-e831fecb05d445cdbac07dceaa0a4749",\n    "object": "text_completion",\n    "created": 1721316012,\n    "model": "meta-llama/Meta-Llama-3-70B-Instruct",\n    "choices": [\n        {\n            "index": 0,\n            "text": " (Homo sapiens)\\nRAD51 (Homo sapiens) is a protein that interacts with the following proteins:\\n1. ATR (Homo sapiens) - [ 1 ]\\n2. ATM (Homo sapiens) - [ 1 ]\\n3. BARD1 (Homo sapiens) - [ 1 ]\\n4. BRCA1 (Homo sapiens) - [ 1 ]\\n5. BRCA2 (Homo sapiens) - [ 1 ]\\n6. CDC5L (Homo sapiens) - [ 1 ]\\n7. CHEK1 (Homo sapiens) - [ 1 ]\\n8. CHEK2 (Homo sapiens) - [ 1 ]\\n9",\n            "logprobs": null,\n            "finish_reason": "length",\n            "stop_reason": null\n        }\n    ],\n    "usage": {\n        "prompt_tokens": 9,\n        "total_tokens": 159,\n        "completion_tokens": 150\n    },\n    "response_time": 6.212867259979248,\n    "throughput_tokens_per_second": 25.59204846113694\n}'}


### Mistral-7B-Instruct-v0.3

In [16]:
import time
start_time = time.time()
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "mistralai/Mistral-7B-Instruct-v0.3",
        "temperature": 0.2,
        "max_tokens": 150,
        "prompt" : "List all proteins that interact with RAD51",
        "logprobs":1
}

# Convert data into Json

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())
#print("Time for globus to submit and respond",time.time()-start_time)

200
{'server_response': '{\n    "id": "cmpl-c3c5cc8c66734e64a255668d10e10bab",\n    "object": "text_completion",\n    "created": 1721316132,\n    "model": "mistralai/Mistral-7B-Instruct-v0.3",\n    "choices": [\n        {\n            "index": 0,\n            "text": " in human cells.\\n\\nIn human cells, RAD51 interacts with several proteins involved in homologous recombination, DNA repair, and other cellular processes. Here are some of the key proteins that interact with RAD51:\\n\\n1. BRCA1 (Breast Cancer 1, Susceptibility Gene 1): BRCA1 is a tumor suppressor protein that plays a crucial role in DNA repair through homologous recombination. It interacts with RAD51 to form a complex that promotes DNA repair and maintains genomic stability.\\n\\n2. BRCA2 (Breast Cancer 1, Susceptibility Gene 2): BRCA",\n            "logprobs": {\n                "text_offset": [\n                    0,\n                    3,\n                    9,\n                    15,\n                    16,\n  

In [18]:
import time
start_time = time.time()
url = "https://data-portal-dev.cels.anl.gov/resource_server/sophia/vllm/v1/chat/completions"
# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
        "model": "mistralai/Mistral-7B-Instruct-v0.3",
        "temperature": 0.2,
        "max_tokens": 150,
        "messages":[{"role": "user", "content": "List all proteins that interact with RAD51"}],
        "logprobs":True
        #"prompt" : "List all proteins that interact with RAD51",
}

# Convert data into Json51633660

data_json = json.dumps(data)

# Send the post request to the relay server
# Verify=False is temporary since I use a self-signed certificate
response = requests.post(url, data=data_json, headers=headers)

# Print inference response
print(response.status_code)
print(response.json())
print("Time for globus to submit and respond",time.time()-start_time)

200
{'server_response': '{\n    "id": "cmpl-6cd021661611444790439aa7b0c443eb",\n    "object": "chat.completion",\n    "created": 1721316175,\n    "model": "mistralai/Mistral-7B-Instruct-v0.3",\n    "choices": [\n        {\n            "index": 0,\n            "message": {\n                "role": "assistant",\n                "content": " RAD51 is a key protein in the homologous recombination (HR) pathway of DNA repair and replication. Several proteins interact with RAD51 to facilitate its function. Here are some of the proteins that interact with RAD51:\\n\\n1. BRCA1 (Breast Cancer 1, Early Onset): BRCA1 interacts with RAD51 and plays a role in the recruitment of RAD51 to DNA double-strand breaks (DSBs).\\n\\n2. BRCA2 (Breast Cancer 2, Early Onset): BRCA2 interacts with RAD51 and helps to load RAD51 onto DNA",\n                "tool_calls": []\n            },\n            "logprobs": {\n                "content": [\n                    {\n                        "token": " R",\n      

## FINDINGS
* Globus adds an average of 5 seconds to the latency
