In [18]:
import globus_sdk
import requests
import json

# Globus ID and Scope
# ===================

# Define your Globus thick-client ID
# https://app.globus.org/settings/developers
# This needs to be created by users (they can also re-use one of the existing clients)
my_client_id = "99bc7f15-ebed-4d6b-b783-8f7ae8735135"

# Define the inference-gateway resource service scope
# This will be publicly available to users
vllm_client_id = "681c10cc-f684-4540-bcd7-0b4df3bc26ef"
vllm_scope = f"https://auth.globus.org/scopes/{vllm_client_id}/action_all"

# Authentication and Access Token
# ===============================

# Start an Auth client with the vLLM scope
auth_client = globus_sdk.NativeAppAuthClient(my_client_id)
auth_client.oauth2_start_flow(requested_scopes=vllm_scope)

# Authenticate with your Globus account
authorize_url = auth_client.oauth2_get_authorize_url()
print(f"Please go to this URL and login:\n\n{authorize_url}\n")


Please go to this URL and login:

https://auth.globus.org/v2/oauth2/authorize?client_id=99bc7f15-ebed-4d6b-b783-8f7ae8735135&redirect_uri=https%3A%2F%2Fauth.globus.org%2Fv2%2Fweb%2Fauth-code&scope=https%3A%2F%2Fauth.globus.org%2Fscopes%2F681c10cc-f684-4540-bcd7-0b4df3bc26ef%2Faction_all&state=_default&response_type=code&code_challenge=XP3D2gwS0yUwxg5IZ1cGg_SVCocwljSWKWuOy8CrR00&code_challenge_method=S256&access_type=online



In [19]:
# Collect access token to vLLM service
auth_code = "NYUtFj3H9mhRuRvE5F7rvWJrwdxkky"
token_response = auth_client.oauth2_exchange_code_for_tokens(auth_code)
access_token = token_response.by_resource_server[vllm_client_id]["access_token"]

In [23]:


# Make API Requests
# =================

# URL to the inference gateway (needs to end with forward slash /)
url = "https://data-portal-dev-vmw-01.cels.anl.gov/resource_server/polaris/"

# Add access token to the headers
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Prepare the inference request
data = {
    "framework": "llama_cpp", # Parameter ignored at the moment
    "model_params": {
        "model_name": "meta-Llama3-8b-instruct", # Parameter ignored at the moment
        "temperature": 0.7, # Parameter ignored at the moment
        "max_tokens": 150, # Parameter ignored at the moment
        "prompt": "Where are we?",
        "logprobs": True # Parameter ignored at the moment
    }
}

# Convert data into Json
data_json = json.dumps(data)

# Send the post request to the relay server
# verify=False should not be used in production, this is because we currently use self-signed SSL certificate
response = requests.post(url, data=data_json, headers=headers, verify=False)

# Print inference response
print(response.status_code)
if response.status_code == 200:
    print(response.json())
else:
    print("failed")
    print(response.json())



200
{'server_response': '{\n    "id": "chatcmpl-6f822eac-b64b-4896-8724-0de45c90daf9",\n    "choices": [\n        {\n            "finish_reason": "stop",\n            "index": 0,\n            "logprobs": null,\n            "message": {\n                "content": "We\'re in a digital space, and I\'m here to assist you with any questions or tasks you may have!",\n                "role": "assistant"\n            }\n        }\n    ],\n    "created": 1717094392,\n    "model": "Meta-Llama-3-8B-Instruct-Q8_0.gguf",\n    "object": "chat.completion",\n    "usage": {\n        "completion_tokens": 23,\n        "prompt_tokens": 26,\n        "total_tokens": 49\n    }\n}'}


In [15]:
print(globus_sdk.__version__)

3.41.0
