Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
618 changes: 618 additions & 0 deletions llm-access-kiro/src/anthropic/converter/convert.rs

Large diffs are not rendered by default.

245 changes: 245 additions & 0 deletions llm-access-kiro/src/anthropic/converter/document.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
//! Document attachment normalization: media-type canonicalization, name
//! sanitization/generation, and conversion into Kiro document blocks.

use base64::Engine as _;
use sha2::{Digest, Sha256};

use super::{
invalid_request, normalize::push_normalization_event, ConversionError, NormalizationEvent,
};
use crate::wire::KiroDocument;

pub fn normalize_user_document_block(
block: &serde_json::Map<String, serde_json::Value>,
message_index: usize,
block_index: usize,
events: &mut Vec<NormalizationEvent>,
) -> Result<serde_json::Value, ConversionError> {
let normalized = normalize_document_block_payload(block, message_index, block_index)?;
if normalized != serde_json::Value::Object(block.clone()) {
push_normalization_event(
events,
message_index,
"user",
Some(block_index),
Some("document"),
"rewrite_content_block",
"document_block_normalized",
);
}
Ok(normalized)
}

pub fn normalize_document_block_payload(
block: &serde_json::Map<String, serde_json::Value>,
message_index: usize,
block_index: usize,
) -> Result<serde_json::Value, ConversionError> {
let Some(source) = block.get("source").and_then(serde_json::Value::as_object) else {
return Err(invalid_request(format!(
"message {message_index} document block {block_index} is missing source"
)));
};
let Some(source_type) = source
.get("type")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
else {
return Err(invalid_request(format!(
"message {message_index} document block {block_index} is missing source.type"
)));
};
let Some(media_type) = source
.get("media_type")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
else {
return Err(invalid_request(format!(
"message {message_index} document block {block_index} is missing source.media_type"
)));
};
let Some(source_data) = source
.get("data")
.and_then(serde_json::Value::as_str)
.filter(|value| !value.trim().is_empty())
else {
return Err(invalid_request(format!(
"message {message_index} document block {block_index} is missing source.data"
)));
};

let normalized_media_type = canonical_document_media_type(media_type).ok_or_else(|| {
invalid_request(format!(
"message {message_index} document block {block_index} has unsupported \
source.media_type `{media_type}`"
))
})?;
let normalized_data = match source_type {
"base64" => source_data.trim().to_string(),
"text" => {
if !document_media_type_supports_text_source(normalized_media_type) {
return Err(invalid_request(format!(
"message {message_index} document block {block_index} only supports \
source.type=`text` for plain text, markdown, html, or csv documents"
)));
}
source_data.replace("\r\n", "\n").replace('\r', "\n")
},
_ => {
return Err(invalid_request(format!(
"message {message_index} document block {block_index} must use \
source.type=`base64` or source.type=`text`"
)))
},
};
let normalized_name = normalize_document_name(
block.get("name").and_then(serde_json::Value::as_str),
normalized_media_type,
&normalized_data,
);

Ok(serde_json::json!({
"type": "document",
"name": normalized_name,
"source": {
"type": source_type,
"media_type": normalized_media_type,
"data": normalized_data,
}
}))
}

fn canonical_document_media_type(media_type: &str) -> Option<&'static str> {
match media_type.trim().to_ascii_lowercase().as_str() {
"application/pdf" => Some("application/pdf"),
"text/csv" => Some("text/csv"),
"application/msword" => Some("application/msword"),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" => {
Some("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
},
"application/vnd.ms-excel" => Some("application/vnd.ms-excel"),
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => {
Some("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
},
"text/html" => Some("text/html"),
"text/plain" => Some("text/plain"),
"text/markdown" | "text/md" | "text/x-markdown" => Some("text/markdown"),
_ => None,
}
}

fn document_media_type_supports_text_source(media_type: &str) -> bool {
matches!(media_type, "text/plain" | "text/markdown" | "text/html" | "text/csv")
}

fn document_format_from_media_type(media_type: &str) -> Option<&'static str> {
match media_type {
"application/pdf" => Some("pdf"),
"text/csv" => Some("csv"),
"application/msword" => Some("doc"),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
"application/vnd.ms-excel" => Some("xls"),
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
"text/html" => Some("html"),
"text/plain" => Some("txt"),
"text/markdown" => Some("md"),
_ => None,
}
}

fn normalize_document_name(raw_name: Option<&str>, media_type: &str, data: &str) -> String {
match raw_name.map(str::trim).filter(|value| !value.is_empty()) {
Some(raw_name) => sanitize_document_name(raw_name),
None => generate_document_name(media_type, data),
}
}

fn sanitize_document_name(name: &str) -> String {
let without_extension = name.rsplit_once('.').map(|(stem, _)| stem).unwrap_or(name);
let mut sanitized = String::with_capacity(without_extension.len());
let mut previous_dash = false;
let mut previous_space = false;
for ch in without_extension.chars() {
let normalized = if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '(' | ')' | '[' | ']')
{
previous_dash = false;
previous_space = false;
Some(ch)
} else if ch.is_ascii_whitespace() {
if previous_space {
None
} else {
previous_space = true;
previous_dash = false;
Some(' ')
}
} else if previous_dash {
None
} else {
previous_dash = true;
previous_space = false;
Some('-')
};
if let Some(ch) = normalized {
sanitized.push(ch);
}
}
let trimmed = sanitized.trim();
if trimmed.is_empty() {
"document".to_string()
} else {
trimmed.chars().take(200).collect()
}
}

pub fn generate_document_name(media_type: &str, data: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(media_type.as_bytes());
hasher.update([0]);
hasher.update(data.as_bytes());
let hash_hex = format!("{:x}", hasher.finalize());
format!("document-{}", &hash_hex[..12])
}

fn kiro_document_from_source(
name: String,
source: crate::anthropic::types::ImageSource,
) -> Option<KiroDocument> {
let format = document_format_from_media_type(&source.media_type)?;
let bytes = match source.source_type.as_str() {
"base64" => source.data,
"text" if document_media_type_supports_text_source(&source.media_type) => {
base64::engine::general_purpose::STANDARD.encode(source.data.as_bytes())
},
_ => return None,
};
Some(KiroDocument::from_base64(name, format, bytes))
}

pub fn kiro_document_from_block(
name: Option<String>,
mut source: crate::anthropic::types::ImageSource,
) -> Result<Option<KiroDocument>, ConversionError> {
let Some(normalized_media_type) = canonical_document_media_type(&source.media_type) else {
return Ok(None);
};
let normalized_data = match source.source_type.as_str() {
"base64" => {
let normalized_data = source.data.trim().to_string();
source.data = normalized_data.clone();
normalized_data
},
"text" if document_media_type_supports_text_source(normalized_media_type) => {
let normalized_data = source.data.replace("\r\n", "\n").replace('\r', "\n");
source.data = normalized_data.clone();
normalized_data
},
_ => source.data.clone(),
};
let normalized_name =
normalize_document_name(name.as_deref(), normalized_media_type, &normalized_data);
source.media_type = normalized_media_type.to_string();
Ok(kiro_document_from_source(normalized_name, source))
}
143 changes: 143 additions & 0 deletions llm-access-kiro/src/anthropic/converter/identity.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
//! Model-identity handling: identity probes, Claude Code identity override,
//! and stripping volatile Claude Code billing headers from system text.

use super::{
ResponseModelIdentity, CLAUDE_AGENT_SDK_SYSTEM_IDENTITY_LINE,
CLAUDE_CODE_BILLING_HEADER_PREFIX, CLAUDE_CODE_CLI_SYSTEM_IDENTITY_LINE,
GENERIC_ANTHROPIC_IDENTITY_OVERRIDE,
};
use crate::anthropic::types::MessagesRequest;

fn requested_model_identity_id(model: &str) -> &str {
model.strip_suffix("-thinking").unwrap_or(model)
}

fn requested_model_identity_name(model: &str) -> Option<&'static str> {
match requested_model_identity_id(model) {
"claude-opus-4-8" => Some("Opus 4.8"),
"claude-opus-4-7" => Some("Opus 4.7"),
"claude-opus-4-6" => Some("Opus 4.6"),
"claude-sonnet-4-6" => Some("Sonnet 4.6"),
"claude-sonnet-4-5" => Some("Sonnet 4.5"),
"claude-haiku-4-5" => Some("Haiku 4.5"),
_ => None,
}
}

fn response_model_identity(model: &str) -> Option<ResponseModelIdentity> {
let model_name = requested_model_identity_name(model)?;
Some(ResponseModelIdentity {
model_name: format!("Claude {model_name}"),
model_id: requested_model_identity_id(model).to_string(),
})
}

pub fn response_identity_for_current_turn(
req: &MessagesRequest,
current_content: &str,
) -> Option<ResponseModelIdentity> {
is_model_identity_probe(current_content).then(|| response_model_identity(&req.model))?
}

fn is_model_identity_probe(content: &str) -> bool {
let lower = content.to_lowercase();
let compact = lower
.chars()
.filter(|ch| !ch.is_whitespace() && *ch != '-' && *ch != '_' && *ch != '`')
.collect::<String>();
let asks_identity = lower.contains("who are you")
|| lower.contains("what are you")
|| lower.contains("your identity")
|| lower.contains("are you claude")
|| lower.contains("are you kiro")
|| content.contains("你是谁")
|| content.contains("你是什么")
|| content.contains("你的身份")
|| content.contains("你是Claude")
|| content.contains("你是 Claude")
|| content.contains("你是Kiro")
|| content.contains("你是 Kiro");
let asks_model_identity = lower.contains("what model are you")
|| lower.contains("which model are you")
|| lower.contains("your model")
|| (compact.contains("modelid") && (lower.contains("you") || lower.contains("your")))
|| content.contains("你的模型")
|| content.contains("你是什么模型")
|| content.contains("你是哪种模型")
|| ((content.contains("模型ID") || content.contains("模型 ID"))
&& (content.contains("你") || content.contains("你的")));

asks_identity || asks_model_identity
}

pub fn anthropic_identity_override(requested_model: &str) -> String {
let Some(identity) = response_model_identity(requested_model) else {
return GENERIC_ANTHROPIC_IDENTITY_OVERRIDE.to_string();
};
format!(
"<identity_override>\nYou are Claude, made by Anthropic. For this request, your model \
name is {model_name} and your public API model ID is {model_id}. When asked about your \
identity, model name, or model ID, answer with this Claude identity. Never claim to be \
Kiro, Warp, or any other product. You are Claude, running on the Anthropic API \
platform.\n</identity_override>",
model_name = identity.model_name,
model_id = identity.model_id
)
}

pub fn normalize_claude_code_model_identity(content: String, requested_model: &str) -> String {
let Some(model_name) = requested_model_identity_name(requested_model) else {
return content;
};
let model_id = requested_model_identity_id(requested_model);
let replacement = format!(
"You are powered by the model named {model_name}. The exact model ID is {model_id}."
);
let has_existing_model_identity = content.lines().any(|line| {
let trimmed = line.trim_start();
trimmed.contains("You are powered by the model named")
&& trimmed.contains("The exact model ID is")
});
let mut replaced_existing = false;
let mut inserted_after_identity = false;
content
.lines()
.map(|line| {
let trimmed = line.trim_start();
if trimmed.contains("You are powered by the model named")
&& trimmed.contains("The exact model ID is")
{
replaced_existing = true;
let indent = &line[..line.len() - trimmed.len()];
format!("{indent}{replacement}")
} else if !has_existing_model_identity
&& !replaced_existing
&& !inserted_after_identity
&& (trimmed == CLAUDE_CODE_CLI_SYSTEM_IDENTITY_LINE
|| trimmed == CLAUDE_AGENT_SDK_SYSTEM_IDENTITY_LINE)
{
inserted_after_identity = true;
format!("{line}\n{replacement}")
} else {
line.to_string()
}
})
.collect::<Vec<_>>()
.join("\n")
}

pub fn strip_volatile_claude_code_billing_header(content: String) -> String {
content
.lines()
.filter(|line| !is_claude_code_billing_header_text(line))
.collect::<Vec<_>>()
.join("\n")
}

fn is_claude_code_billing_header_text(content: &str) -> bool {
let trimmed = content.trim_start();
trimmed.starts_with(CLAUDE_CODE_BILLING_HEADER_PREFIX)
&& (trimmed.contains("cc_version=")
|| trimmed.contains("cc_entrypoint=")
|| trimmed.contains("cch="))
}
Loading
Loading