Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .redocly.lint-ignore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,4 @@ docs/openapi.json:
- '#/paths/~1tokenize/post'
- '#/paths/~1v1~1chat~1completions/post'
- '#/paths/~1v1~1completions/post'
- '#/paths/~1v1~1models/get'
60 changes: 60 additions & 0 deletions docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,37 @@
}
}
}
},
"/v1/models": {
"get": {
"tags": [
"Text Generation Inference"
],
"summary": "Get model info",
"operationId": "openai_get_model_info",
"responses": {
"200": {
"description": "Served model info",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ModelInfo"
}
}
}
},
"404": {
"description": "Model not found",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
}
}
}
}
}
}
}
},
"components": {
Expand Down Expand Up @@ -1747,6 +1778,35 @@
}
]
},
"ModelInfo": {
"type": "object",
"required": [
"id",
"object",
"created",
"owned_by"
],
"properties": {
"created": {
"type": "integer",
"format": "int64",
"example": 1686935002,
"minimum": 0
},
"id": {
"type": "string",
"example": "gpt2"
},
"object": {
"type": "string",
"example": "model"
},
"owned_by": {
"type": "string",
"example": "openai"
}
}
},
"OutputMessage": {
"oneOf": [
{
Expand Down
28 changes: 28 additions & 0 deletions router/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,34 @@ pub(crate) struct ErrorResponse {
pub error_type: String,
}

#[derive(Serialize, Deserialize, ToSchema)]
pub(crate) struct ModelInfo {
#[schema(example = "gpt2")]
pub id: String,
#[schema(example = "model")]
pub object: String,
#[schema(example = 1686935002)]
pub created: u64,
#[schema(example = "openai")]
pub owned_by: String,
}

#[derive(Serialize, Deserialize, ToSchema)]
pub(crate) struct ModelsInfo {
#[schema(example = "list")]
pub object: String,
pub data: Vec<ModelInfo>,
}

impl Default for ModelsInfo {
fn default() -> Self {
ModelsInfo {
object: "list".to_string(),
data: Vec::new(),
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
29 changes: 28 additions & 1 deletion router/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::{
VertexResponse,
};
use crate::{FunctionDefinition, HubPreprocessorConfig, ToolCall, ToolChoice, ToolType};
use crate::{ModelInfo, ModelsInfo};
use async_stream::__private::AsyncStream;
use axum::extract::Extension;
use axum::http::{HeaderMap, HeaderValue, Method, StatusCode};
Expand Down Expand Up @@ -116,6 +117,29 @@ async fn get_model_info(info: Extension<Info>) -> Json<Info> {
Json(info.0)
}

#[utoipa::path(
get,
tag = "Text Generation Inference",
path = "/v1/models",
responses(
(status = 200, description = "Served model info", body = ModelInfo),
(status = 404, description = "Model not found", body = ErrorResponse),
)
)]
#[instrument(skip(info))]
/// Get model info
async fn openai_get_model_info(info: Extension<Info>) -> Json<ModelsInfo> {
Json(ModelsInfo {
data: vec![ModelInfo {
id: info.0.model_id.clone(),
object: "model".to_string(),
created: 0, // TODO: determine how to get this
owned_by: info.0.model_id.clone(),
}],
..Default::default()
})
}

#[utoipa::path(
post,
tag = "Text Generation Inference",
Expand Down Expand Up @@ -1501,6 +1525,7 @@ chat_completions,
completions,
tokenize,
metrics,
openai_get_model_info,
),
components(
schemas(
Expand Down Expand Up @@ -1553,6 +1578,7 @@ ToolCall,
Function,
FunctionDefinition,
ToolChoice,
ModelInfo,
)
),
tags(
Expand Down Expand Up @@ -2246,7 +2272,8 @@ async fn start(
.route("/info", get(get_model_info))
.route("/health", get(health))
.route("/ping", get(health))
.route("/metrics", get(metrics));
.route("/metrics", get(metrics))
.route("/v1/models", get(openai_get_model_info));

// Conditional AWS Sagemaker route
let aws_sagemaker_route = if messages_api_enabled {
Expand Down