Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions .github/workflows/test-cortexso-model-hub.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
name: Test cortexso Model Hub

on:
schedule:
- cron: "0 16 * * 5" # every Friday at 23:00 UTC+7
workflow_dispatch:

jobs:
build-and-test:
runs-on: ${{ matrix.runs-on }}
timeout-minutes: 1440
strategy:
fail-fast: false
matrix:
include:
- os: "linux"
name: "amd64"
runs-on: "ubuntu-20-04-e2e-cortexcpp-model-hub"
cmake-flags: "-DCORTEX_CPP_VERSION=${{github.head_ref}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
build-deps-cmake-flags: ""
ccache-dir: ""
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive

- name: use python
uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Install tools on Linux
run: |
sudo chown -R runner:runner /home/runner/cortexcpp
python3 -m pip install awscli

- name: Download vcpkg cache from s3
continue-on-error: true
run: |
aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux /home/runner/.cache/vcpkg --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

- name: Configure vcpkg
run: |
cd engine
make configure-vcpkg

- name: Build
run: |
cd engine
make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"

- name: Run unit tests
run: |
cd engine
make run-unit-tests

- name: Run setup config for linux
shell: bash
run: |
cd engine
./build/cortex --version
sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc

- name: Run e2e tests
run: |
cd engine
cp build/cortex build/cortex-nightly
cp build/cortex build/cortex-beta
python -m pip install --upgrade pip
python -m pip install -r e2e-test/requirements.txt
pytest e2e-test/test_api_cortexso_hub_llamacpp_engine.py
rm build/cortex-nightly
rm build/cortex-beta
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_E2E }}

- name: Pre-package
run: |
cd engine
make pre-package DESTINATION_BINARY_NAME="cortex"

- name: Package
run: |
cd engine
make package

- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
name: cortex-${{ matrix.os }}-${{ matrix.name }}
path: ./engine/cortex


- name: Upload linux vcpkg cache to s3
continue-on-error: true
if: always()
run: |
aws s3 sync /home/runner/.cache/vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
2 changes: 2 additions & 0 deletions engine/e2e-test/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
asyncio_default_fixture_loop_scope = function
1 change: 1 addition & 0 deletions engine/e2e-test/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ websockets
pytest
pytest-asyncio
requests
pyyaml
166 changes: 166 additions & 0 deletions engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import pytest
import requests
import os
import yaml

from pathlib import Path
from test_runner import (
run,
start_server,
stop_server,
wait_for_websocket_download_success_event,
)

collection_id = "cortexso/local-models-6683a6e29e8f3018845b16db"
token = os.getenv("HF_TOKEN")
if not token:
raise ValueError("HF_TOKEN environment variable not set")

def get_repos_in_collection(collection_id, token):
# API endpoint to get list of repos in the collection
url = f"https://huggingface.co/api/collections/{collection_id}"
headers = {"Authorization": f"Bearer {token}"}
response = requests.get(url, headers=headers)

# Check response and retrieve repo IDs if successful
if response.status_code == 200:
return [repo['id'] for repo in response.json()["items"]]
else:
print("Error fetching repos:", response.status_code, response.json())
return []

def get_repo_default_branch(repo_id, token):
# Direct link to metadata.yaml on the main branch
url = f"https://huggingface.co/{repo_id}/resolve/main/metadata.yml"
headers = {"Authorization": f"Bearer {token}"}
response = requests.get(url, headers=headers)

# Check response and retrieve the 'default' field value
if response.status_code == 200:
# Read YAML content from response text
metadata = yaml.safe_load(response.text)
return metadata.get("default")
else:
print(f"Error fetching metadata for {repo_id}:", response.status_code, response.json())
return None

def get_all_repos_and_default_branches_from_metadata(collection_id, token):
# Get list of repos from the collection
repos = get_repos_in_collection(collection_id, token)
combined_list = []

# Iterate over each repo and fetch the default branch from metadata
for repo_id in repos:
default_branch = get_repo_default_branch(repo_id, token)
if default_branch and "gguf" in default_branch:
combined_list.append(f"{repo_id.split('/')[1]}:{default_branch}")

return combined_list

#Call the function and print the results
repo_branches = get_all_repos_and_default_branches_from_metadata(collection_id, token)

class TestCortexsoModels:

@pytest.fixture(autouse=True)
def setup_and_teardown(self, request):
# Setup
success = start_server()
if not success:
raise Exception("Failed to start server")
# Delete model if exists
for model_url in repo_branches:
run(
"Delete model",
[
"models",
"delete",
model_url,
],
)
yield

# Teardown
for model_url in repo_branches:
run(
"Delete model",
[
"models",
"delete",
model_url,
],
)
stop_server()

@pytest.mark.parametrize("model_url", repo_branches)
@pytest.mark.asyncio
async def test_models_on_cortexso_hub(self, model_url):

# Pull model from cortexso hub
json_body = {
"model": model_url
}
response = requests.post("http://localhost:3928/models/pull", json=json_body)
assert response.status_code == 200, f"Failed to pull model: {model_url}"

await wait_for_websocket_download_success_event(timeout=None)

# Check if the model was pulled successfully
get_model_response = requests.get(
f"http://127.0.0.1:3928/models/{model_url}"
)
assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
assert (
get_model_response.json()["model"] == model_url
), f"Unexpected model name for: {model_url}"

# Check if the model is available in the list of models
response = requests.get("http://localhost:3928/models")
assert response.status_code == 200
models = [i["id"] for i in response.json()["data"]]
assert model_url in models, f"Model not found in list: {model_url}"

# Install Engine
exit_code, output, error = run(
"Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False
)
root = Path.home()
assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt")
assert exit_code == 0, f"Install engine failed with error: {error}"

# Start the model
response = requests.post("http://localhost:3928/models/start", json=json_body)
assert response.status_code == 200, f"status_code: {response.status_code}"

# Send an inference request
inference_json_body = {
"frequency_penalty": 0.2,
"max_tokens": 4096,
"messages": [
{
"content": "",
"role": "user"
}
],
"model": model_url,
"presence_penalty": 0.6,
"stop": [
"End"
],
"stream": False,
"temperature": 0.8,
"top_p": 0.95
}
response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"})
assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"

# Stop the model
response = requests.post("http://localhost:3928/models/stop", json=json_body)
assert response.status_code == 200, f"status_code: {response.status_code}"

# Uninstall Engine
exit_code, output, error = run(
"Uninstall engine", ["engines", "uninstall", "llama-cpp"]
)
assert "Engine llama-cpp uninstalled successfully!" in output
assert exit_code == 0, f"Install engine failed with error: {error}"
Loading