Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions src/arbiterAI/hardwareDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,14 @@ void HardwareDetector::detectVulkanGpus()
VkPhysicalDeviceProperties props{};
getProperties(devices[i], &props);

// Skip CPU-based software renderers (e.g. llvmpipe) — they report
// system RAM as VRAM and are not useful for model inference.
if(props.deviceType==VK_PHYSICAL_DEVICE_TYPE_CPU)
{
spdlog::debug("Skipping Vulkan software renderer: {}", props.deviceName);
continue;
}

// Skip devices already detected via NVML (match by name)
bool alreadyDetected=false;
for(const GpuInfo &existing:m_systemInfo.gpus)
Expand Down Expand Up @@ -665,8 +673,16 @@ void HardwareDetector::detectUnifiedMemory()

if(matchedCardPath.empty())
{
spdlog::debug("No amdgpu sysfs match for integrated GPU {}: {}",
gpu.index, gpu.name);
// No sysfs match — fall back to system RAM as GPU-accessible pool.
// Unified memory GPUs share system RAM, so the GPU can access most of it.
// Use total system RAM as the accessible pool estimate.
gpu.gpuAccessibleRamMb=m_systemInfo.totalRamMb;
gpu.gpuAccessibleRamFreeMb=m_systemInfo.freeRamMb;

spdlog::info("Unified memory GPU {}: {} — no sysfs match, "
"falling back to system RAM ({}MB total, {}MB free) as GPU-accessible pool",
gpu.index, gpu.name,
gpu.gpuAccessibleRamMb, gpu.gpuAccessibleRamFreeMb);
continue;
}

Expand Down Expand Up @@ -730,7 +746,14 @@ void HardwareDetector::detectUnifiedMemory()
}
else
{
spdlog::debug("No GTT info for integrated GPU {}: {}", gpu.index, gpu.name);
// sysfs card matched but no GTT info — fall back to system RAM
gpu.gpuAccessibleRamMb=m_systemInfo.totalRamMb;
gpu.gpuAccessibleRamFreeMb=m_systemInfo.freeRamMb;

spdlog::info("Unified memory GPU {}: {} — no GTT info, "
"falling back to system RAM ({}MB total, {}MB free) as GPU-accessible pool",
gpu.index, gpu.name,
gpu.gpuAccessibleRamMb, gpu.gpuAccessibleRamFreeMb);
}
}
#endif
Expand Down
47 changes: 28 additions & 19 deletions src/server/routes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -808,30 +808,39 @@ void handleGetLoadedModels(const httplib::Request &, httplib::Response &res)

void handleLoadModel(const httplib::Request &req, httplib::Response &res)
{
std::string modelName=req.matches[1];
std::string variant;
int contextSize=0;
try
{
std::string modelName=req.matches[1];
std::string variant;
int contextSize=0;

if(req.has_param("variant"))
variant=req.get_param_value("variant");
if(req.has_param("context"))
contextSize=std::stoi(req.get_param_value("context"));
if(req.has_param("variant"))
variant=req.get_param_value("variant");
if(req.has_param("context"))
contextSize=std::stoi(req.get_param_value("context"));

ErrorCode err=ArbiterAI::instance().loadModel(modelName, variant, contextSize);
ErrorCode err=ArbiterAI::instance().loadModel(modelName, variant, contextSize);

if(err==ErrorCode::Success)
{
res.set_content(nlohmann::json{{"status", "loaded"}, {"model", modelName}}.dump(), "application/json");
}
else if(err==ErrorCode::ModelDownloading)
{
res.status=202;
res.set_content(nlohmann::json{{"status", "downloading"}, {"model", modelName}}.dump(), "application/json");
if(err==ErrorCode::Success)
{
res.set_content(nlohmann::json{{"status", "loaded"}, {"model", modelName}}.dump(), "application/json");
}
else if(err==ErrorCode::ModelDownloading)
{
res.status=202;
res.set_content(nlohmann::json{{"status", "downloading"}, {"model", modelName}}.dump(), "application/json");
}
else
{
res.status=400;
res.set_content(errorJson("Failed to load model: "+errorCodeToString(err), "invalid_request_error", "model", errorCodeToString(err)).dump(), "application/json");
}
}
else
catch(const std::exception &e)
{
res.status=400;
res.set_content(errorJson("Failed to load model: "+errorCodeToString(err), "invalid_request_error", "model", errorCodeToString(err)).dump(), "application/json");
spdlog::error("Exception in handleLoadModel: {}", e.what());
res.status=500;
res.set_content(errorJson(std::string("Internal error: ")+e.what(), "server_error").dump(), "application/json");
}
}

Expand Down
179 changes: 177 additions & 2 deletions tests/llamaProviderTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
#include "arbiterAI/telemetryCollector.h"
#include "arbiterAI/modelManager.h"

#include <nlohmann/json.hpp>
#include <filesystem>
#include <string>

namespace arbiterAI
{

static const std::string MODEL_NAME="qwen2.5-7b-instruct";
static const std::string SMALL_MODEL_NAME="qwen2.5-1.5b-instruct";
static const std::string MODEL_NAME="Qwen2.5-7B-Instruct";
static const std::string SMALL_MODEL_NAME="Qwen2.5-1.5B-Instruct";
static const std::string MODEL_FILE="Qwen2.5-7B-Instruct-Q4_K_M.gguf";
static const std::string SMALL_MODEL_FILE="Qwen2.5-1.5B-Instruct-Q4_K_M.gguf";

class LlamaProviderTest : public ::testing::Test {
protected:
Expand All @@ -37,6 +40,13 @@ class LlamaProviderTest : public ::testing::Test {
{
GTEST_SKIP() << "Model '" << MODEL_NAME << "' has no variants configured";
}

// Check that the GGUF file actually exists on disk
std::string filePath="/models/"+MODEL_FILE;
if(!std::filesystem::exists(filePath))
{
GTEST_SKIP() << "Model file not found at " << filePath;
}
}

void TearDown() override
Expand Down Expand Up @@ -210,6 +220,13 @@ TEST_F(LlamaProviderTest, ModelSwitching)
GTEST_SKIP() << "Model '" << SMALL_MODEL_NAME << "' not found in config or has no variants";
}

// Verify the small model file exists
std::string smallFilePath="/models/"+SMALL_MODEL_FILE;
if(!std::filesystem::exists(smallFilePath))
{
GTEST_SKIP() << "Small model file not found at " << smallFilePath;
}

// Create a client with the 7B model
ChatConfig config;
config.model=MODEL_NAME;
Expand Down Expand Up @@ -256,4 +273,162 @@ TEST_F(LlamaProviderTest, ModelSwitching)
EXPECT_NE(response1.model, response2.model);
}

// ─── Config injection tests ──────────────────────────────────────────────

static const std::string INJECTED_MODEL_NAME="injected-qwen-test";

class LlamaConfigInjectionTest : public ::testing::Test {
protected:
void SetUp() override
{
ModelRuntime::reset();
TelemetryCollector::reset();

ArbiterAI &ai=ArbiterAI::instance();
ai.initialize({"tests/config"});

// Use the small model file for injection tests (faster to load)
std::string filePath="/models/"+SMALL_MODEL_FILE;
if(!std::filesystem::exists(filePath))
{
GTEST_SKIP() << "Small model file not found at " << filePath;
}
}

void TearDown() override
{
ModelRuntime::instance().unloadModel(INJECTED_MODEL_NAME);
ModelRuntime::reset();
TelemetryCollector::reset();
}

nlohmann::json buildInjectedModelJson() const
{
return nlohmann::json{
{"model", INJECTED_MODEL_NAME},
{"provider", "llama"},
{"ranking", 1},
{"version", "1.1.0"},
{"context_window", 4096},
{"max_tokens", 2048},
{"max_output_tokens", 1024},
{"hardware_requirements", {
{"min_system_ram_mb", 2048},
{"parameter_count", "1.5B"}
}},
{"context_scaling", {
{"base_context", 4096},
{"max_context", 32768},
{"vram_per_1k_context_mb", 32}
}},
{"variants", nlohmann::json::array({
{
{"quantization", "Q4_K_M"},
{"file_size_mb", 941},
{"min_vram_mb", 1536},
{"recommended_vram_mb", 2048},
{"download", {
{"url", ""},
{"sha256", ""},
{"filename", SMALL_MODEL_FILE}
}}
}
})}
};
}
};

TEST_F(LlamaConfigInjectionTest, InjectAndLoad)
{
nlohmann::json modelJson=buildInjectedModelJson();

std::string error;
bool added=ModelManager::instance().addModelFromJson(modelJson, error);
ASSERT_TRUE(added) << "addModelFromJson failed: " << error;

// Verify it was registered
std::optional<ModelInfo> info=ModelManager::instance().getModelInfo(INJECTED_MODEL_NAME);
ASSERT_TRUE(info.has_value());
EXPECT_EQ(info->provider, "llama");
EXPECT_EQ(info->variants.size(), 1u);
EXPECT_EQ(info->variants[0].quantization, "Q4_K_M");
EXPECT_EQ(info->variants[0].download.filename, SMALL_MODEL_FILE);

// Load the injected model via ModelRuntime
ErrorCode loadResult=ModelRuntime::instance().loadModel(INJECTED_MODEL_NAME, "Q4_K_M", 4096);
EXPECT_EQ(loadResult, ErrorCode::Success);

// Verify model state
std::optional<LoadedModel> state=ModelRuntime::instance().getModelState(INJECTED_MODEL_NAME);
ASSERT_TRUE(state.has_value());
EXPECT_EQ(state->state, ModelState::Loaded);
EXPECT_EQ(state->variant, "Q4_K_M");
EXPECT_NE(state->llamaModel, nullptr);
EXPECT_NE(state->llamaCtx, nullptr);
}

TEST_F(LlamaConfigInjectionTest, InjectAndRunCompletion)
{
nlohmann::json modelJson=buildInjectedModelJson();

std::string error;
bool added=ModelManager::instance().addModelFromJson(modelJson, error);
ASSERT_TRUE(added) << "addModelFromJson failed: " << error;

// Create a ChatClient with the injected model
ChatConfig config;
config.model=INJECTED_MODEL_NAME;
config.maxTokens=32;

std::shared_ptr<ChatClient> client=ArbiterAI::instance().createChatClient(config);
ASSERT_NE(client, nullptr) << "Failed to create ChatClient for injected model";
EXPECT_EQ(client->getModel(), INJECTED_MODEL_NAME);

CompletionRequest request;
request.model=INJECTED_MODEL_NAME;
request.max_tokens=32;
request.messages={{"user", "What is 1+1? Answer with just the number."}};

CompletionResponse response;
ErrorCode result=client->completion(request, response);

EXPECT_EQ(result, ErrorCode::Success);
EXPECT_FALSE(response.text.empty());
EXPECT_EQ(response.provider, "llama");
EXPECT_EQ(response.model, INJECTED_MODEL_NAME);
EXPECT_GT(response.usage.total_tokens, 0);
}

TEST_F(LlamaConfigInjectionTest, InjectDuplicateFails)
{
nlohmann::json modelJson=buildInjectedModelJson();

std::string error;
bool added=ModelManager::instance().addModelFromJson(modelJson, error);
ASSERT_TRUE(added) << "First injection failed: " << error;

// Try to inject same model name again
bool addedAgain=ModelManager::instance().addModelFromJson(modelJson, error);
EXPECT_FALSE(addedAgain);
EXPECT_NE(error.find("already exists"), std::string::npos);
}

TEST_F(LlamaConfigInjectionTest, InjectWithoutVariantsFails)
{
nlohmann::json modelJson={
{"model", "no-variants-llama"},
{"provider", "llama"},
{"ranking", 1},
{"version", "1.1.0"}
};

std::string error;
bool added=ModelManager::instance().addModelFromJson(modelJson, error);
ASSERT_TRUE(added) << "addModelFromJson failed: " << error;

// Loading a llama model without variants should fail
ErrorCode loadResult=ModelRuntime::instance().loadModel("no-variants-llama");
EXPECT_EQ(loadResult, ErrorCode::InvalidRequest);
}

} // namespace arbiterAI
Loading