acking-you · acking-you · May 31, 2026 · May 31, 2026 · May 31, 2026
diff --git a/llm-access-kiro/src/anthropic/converter/convert.rs b/llm-access-kiro/src/anthropic/converter/convert.rs
diff --git a/llm-access-kiro/src/anthropic/converter/document.rs b/llm-access-kiro/src/anthropic/converter/document.rs
@@ -0,0 +1,245 @@
+//! Document attachment normalization: media-type canonicalization, name
+//! sanitization/generation, and conversion into Kiro document blocks.
+
+use base64::Engine as _;
+use sha2::{Digest, Sha256};
+
+use super::{
+    invalid_request, normalize::push_normalization_event, ConversionError, NormalizationEvent,
+};
+use crate::wire::KiroDocument;
+
+pub fn normalize_user_document_block(
+    block: &serde_json::Map<String, serde_json::Value>,
+    message_index: usize,
+    block_index: usize,
+    events: &mut Vec<NormalizationEvent>,
+) -> Result<serde_json::Value, ConversionError> {
+    let normalized = normalize_document_block_payload(block, message_index, block_index)?;
+    if normalized != serde_json::Value::Object(block.clone()) {
+        push_normalization_event(
+            events,
+            message_index,
+            "user",
+            Some(block_index),
+            Some("document"),
+            "rewrite_content_block",
+            "document_block_normalized",
+        );
+    }
+    Ok(normalized)
+}
+
+pub fn normalize_document_block_payload(
+    block: &serde_json::Map<String, serde_json::Value>,
+    message_index: usize,
+    block_index: usize,
+) -> Result<serde_json::Value, ConversionError> {
+    let Some(source) = block.get("source").and_then(serde_json::Value::as_object) else {
+        return Err(invalid_request(format!(
+            "message {message_index} document block {block_index} is missing source"
+        )));
+    };
+    let Some(source_type) = source
+        .get("type")
+        .and_then(serde_json::Value::as_str)
+        .map(str::trim)
+        .filter(|value| !value.is_empty())
+    else {
+        return Err(invalid_request(format!(
+            "message {message_index} document block {block_index} is missing source.type"
+        )));
+    };
+    let Some(media_type) = source
+        .get("media_type")
+        .and_then(serde_json::Value::as_str)
+        .map(str::trim)
+        .filter(|value| !value.is_empty())
+    else {
+        return Err(invalid_request(format!(
+            "message {message_index} document block {block_index} is missing source.media_type"
+        )));
+    };
+    let Some(source_data) = source
+        .get("data")
+        .and_then(serde_json::Value::as_str)
+        .filter(|value| !value.trim().is_empty())
+    else {
+        return Err(invalid_request(format!(
+            "message {message_index} document block {block_index} is missing source.data"
+        )));
+    };
+
+    let normalized_media_type = canonical_document_media_type(media_type).ok_or_else(|| {
+        invalid_request(format!(
+            "message {message_index} document block {block_index} has unsupported \
+             source.media_type `{media_type}`"
+        ))
+    })?;
+    let normalized_data = match source_type {
+        "base64" => source_data.trim().to_string(),
+        "text" => {
+            if !document_media_type_supports_text_source(normalized_media_type) {
+                return Err(invalid_request(format!(
+                    "message {message_index} document block {block_index} only supports \
+                     source.type=`text` for plain text, markdown, html, or csv documents"
+                )));
+            }
+            source_data.replace("\r\n", "\n").replace('\r', "\n")
+        },
+        _ => {
+            return Err(invalid_request(format!(
+                "message {message_index} document block {block_index} must use \
+                 source.type=`base64` or source.type=`text`"
+            )))
+        },
+    };
+    let normalized_name = normalize_document_name(
+        block.get("name").and_then(serde_json::Value::as_str),
+        normalized_media_type,
+        &normalized_data,
+    );
+
+    Ok(serde_json::json!({
+        "type": "document",
+        "name": normalized_name,
+        "source": {
+            "type": source_type,
+            "media_type": normalized_media_type,
+            "data": normalized_data,
+        }
+    }))
+}
+
+fn canonical_document_media_type(media_type: &str) -> Option<&'static str> {
+    match media_type.trim().to_ascii_lowercase().as_str() {
+        "application/pdf" => Some("application/pdf"),
+        "text/csv" => Some("text/csv"),
+        "application/msword" => Some("application/msword"),
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => {
+            Some("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
+        },
+        "application/vnd.ms-excel" => Some("application/vnd.ms-excel"),
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => {
+            Some("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
+        },
+        "text/html" => Some("text/html"),
+        "text/plain" => Some("text/plain"),
+        "text/markdown" | "text/md" | "text/x-markdown" => Some("text/markdown"),
+        _ => None,
+    }
+}
+
+fn document_media_type_supports_text_source(media_type: &str) -> bool {
+    matches!(media_type, "text/plain" | "text/markdown" | "text/html" | "text/csv")
+}
+
+fn document_format_from_media_type(media_type: &str) -> Option<&'static str> {
+    match media_type {
+        "application/pdf" => Some("pdf"),
+        "text/csv" => Some("csv"),
+        "application/msword" => Some("doc"),
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => Some("docx"),
+        "application/vnd.ms-excel" => Some("xls"),
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => Some("xlsx"),
+        "text/html" => Some("html"),
+        "text/plain" => Some("txt"),
+        "text/markdown" => Some("md"),
+        _ => None,
+    }
+}
+
+fn normalize_document_name(raw_name: Option<&str>, media_type: &str, data: &str) -> String {
+    match raw_name.map(str::trim).filter(|value| !value.is_empty()) {
+        Some(raw_name) => sanitize_document_name(raw_name),
+        None => generate_document_name(media_type, data),
+    }
+}
+
+fn sanitize_document_name(name: &str) -> String {
+    let without_extension = name.rsplit_once('.').map(|(stem, _)| stem).unwrap_or(name);
+    let mut sanitized = String::with_capacity(without_extension.len());
+    let mut previous_dash = false;
+    let mut previous_space = false;
+    for ch in without_extension.chars() {
+        let normalized = if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '(' | ')' | '[' | ']')
+        {
+            previous_dash = false;
+            previous_space = false;
+            Some(ch)
+        } else if ch.is_ascii_whitespace() {
+            if previous_space {
+                None
+            } else {
+                previous_space = true;
+                previous_dash = false;
+                Some(' ')
+            }
+        } else if previous_dash {
+            None
+        } else {
+            previous_dash = true;
+            previous_space = false;
+            Some('-')
+        };
+        if let Some(ch) = normalized {
+            sanitized.push(ch);
+        }
+    }
+    let trimmed = sanitized.trim();
+    if trimmed.is_empty() {
+        "document".to_string()
+    } else {
+        trimmed.chars().take(200).collect()
+    }
+}
+
+pub fn generate_document_name(media_type: &str, data: &str) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(media_type.as_bytes());
+    hasher.update([0]);
+    hasher.update(data.as_bytes());
+    let hash_hex = format!("{:x}", hasher.finalize());
+    format!("document-{}", &hash_hex[..12])
+}
+
+fn kiro_document_from_source(
+    name: String,
+    source: crate::anthropic::types::ImageSource,
+) -> Option<KiroDocument> {
+    let format = document_format_from_media_type(&source.media_type)?;
+    let bytes = match source.source_type.as_str() {
+        "base64" => source.data,
+        "text" if document_media_type_supports_text_source(&source.media_type) => {
+            base64::engine::general_purpose::STANDARD.encode(source.data.as_bytes())
+        },
+        _ => return None,
+    };
+    Some(KiroDocument::from_base64(name, format, bytes))
+}
+
+pub fn kiro_document_from_block(
+    name: Option<String>,
+    mut source: crate::anthropic::types::ImageSource,
+) -> Result<Option<KiroDocument>, ConversionError> {
+    let Some(normalized_media_type) = canonical_document_media_type(&source.media_type) else {
+        return Ok(None);
+    };
+    let normalized_data = match source.source_type.as_str() {
+        "base64" => {
+            let normalized_data = source.data.trim().to_string();
+            source.data = normalized_data.clone();
+            normalized_data
+        },
+        "text" if document_media_type_supports_text_source(normalized_media_type) => {
+            let normalized_data = source.data.replace("\r\n", "\n").replace('\r', "\n");
+            source.data = normalized_data.clone();
+            normalized_data
+        },
+        _ => source.data.clone(),
+    };
+    let normalized_name =
+        normalize_document_name(name.as_deref(), normalized_media_type, &normalized_data);
+    source.media_type = normalized_media_type.to_string();
+    Ok(kiro_document_from_source(normalized_name, source))
+}
diff --git a/llm-access-kiro/src/anthropic/converter/identity.rs b/llm-access-kiro/src/anthropic/converter/identity.rs
@@ -0,0 +1,143 @@
+//! Model-identity handling: identity probes, Claude Code identity override,
+//! and stripping volatile Claude Code billing headers from system text.
+
+use super::{
+    ResponseModelIdentity, CLAUDE_AGENT_SDK_SYSTEM_IDENTITY_LINE,
+    CLAUDE_CODE_BILLING_HEADER_PREFIX, CLAUDE_CODE_CLI_SYSTEM_IDENTITY_LINE,
+    GENERIC_ANTHROPIC_IDENTITY_OVERRIDE,
+};
+use crate::anthropic::types::MessagesRequest;
+
+fn requested_model_identity_id(model: &str) -> &str {
+    model.strip_suffix("-thinking").unwrap_or(model)
+}
+
+fn requested_model_identity_name(model: &str) -> Option<&'static str> {
+    match requested_model_identity_id(model) {
+        "claude-opus-4-8" => Some("Opus 4.8"),
+        "claude-opus-4-7" => Some("Opus 4.7"),
+        "claude-opus-4-6" => Some("Opus 4.6"),
+        "claude-sonnet-4-6" => Some("Sonnet 4.6"),
+        "claude-sonnet-4-5" => Some("Sonnet 4.5"),
+        "claude-haiku-4-5" => Some("Haiku 4.5"),
+        _ => None,
+    }
+}
+
+fn response_model_identity(model: &str) -> Option<ResponseModelIdentity> {
+    let model_name = requested_model_identity_name(model)?;
+    Some(ResponseModelIdentity {
+        model_name: format!("Claude {model_name}"),
+        model_id: requested_model_identity_id(model).to_string(),
+    })
+}
+
+pub fn response_identity_for_current_turn(
+    req: &MessagesRequest,
+    current_content: &str,
+) -> Option<ResponseModelIdentity> {
+    is_model_identity_probe(current_content).then(|| response_model_identity(&req.model))?
+}
+
+fn is_model_identity_probe(content: &str) -> bool {
+    let lower = content.to_lowercase();
+    let compact = lower
+        .chars()
+        .filter(|ch| !ch.is_whitespace() && *ch != '-' && *ch != '_' && *ch != '`')
+        .collect::<String>();
+    let asks_identity = lower.contains("who are you")
+        || lower.contains("what are you")
+        || lower.contains("your identity")
+        || lower.contains("are you claude")
+        || lower.contains("are you kiro")
+        || content.contains("你是谁")
+        || content.contains("你是什么")
+        || content.contains("你的身份")
+        || content.contains("你是Claude")
+        || content.contains("你是 Claude")
+        || content.contains("你是Kiro")
+        || content.contains("你是 Kiro");
+    let asks_model_identity = lower.contains("what model are you")
+        || lower.contains("which model are you")
+        || lower.contains("your model")
+        || (compact.contains("modelid") && (lower.contains("you") || lower.contains("your")))
+        || content.contains("你的模型")
+        || content.contains("你是什么模型")
+        || content.contains("你是哪种模型")
+        || ((content.contains("模型ID") || content.contains("模型 ID"))
+            && (content.contains("你") || content.contains("你的")));
+
+    asks_identity || asks_model_identity
+}
+
+pub fn anthropic_identity_override(requested_model: &str) -> String {
+    let Some(identity) = response_model_identity(requested_model) else {
+        return GENERIC_ANTHROPIC_IDENTITY_OVERRIDE.to_string();
+    };
+    format!(
+        "<identity_override>\nYou are Claude, made by Anthropic. For this request, your model \
+         name is {model_name} and your public API model ID is {model_id}. When asked about your \
+         identity, model name, or model ID, answer with this Claude identity. Never claim to be \
+         Kiro, Warp, or any other product. You are Claude, running on the Anthropic API \
+         platform.\n</identity_override>",
+        model_name = identity.model_name,
+        model_id = identity.model_id
+    )
+}
+
+pub fn normalize_claude_code_model_identity(content: String, requested_model: &str) -> String {
+    let Some(model_name) = requested_model_identity_name(requested_model) else {
+        return content;
+    };
+    let model_id = requested_model_identity_id(requested_model);
+    let replacement = format!(
+        "You are powered by the model named {model_name}. The exact model ID is {model_id}."
+    );
+    let has_existing_model_identity = content.lines().any(|line| {
+        let trimmed = line.trim_start();
+        trimmed.contains("You are powered by the model named")
+            && trimmed.contains("The exact model ID is")
+    });
+    let mut replaced_existing = false;
+    let mut inserted_after_identity = false;
+    content
+        .lines()
+        .map(|line| {
+            let trimmed = line.trim_start();
+            if trimmed.contains("You are powered by the model named")
+                && trimmed.contains("The exact model ID is")
+            {
+                replaced_existing = true;
+                let indent = &line[..line.len() - trimmed.len()];
+                format!("{indent}{replacement}")
+            } else if !has_existing_model_identity
+                && !replaced_existing
+                && !inserted_after_identity
+                && (trimmed == CLAUDE_CODE_CLI_SYSTEM_IDENTITY_LINE
+                    || trimmed == CLAUDE_AGENT_SDK_SYSTEM_IDENTITY_LINE)
+            {
+                inserted_after_identity = true;
+                format!("{line}\n{replacement}")
+            } else {
+                line.to_string()
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
+pub fn strip_volatile_claude_code_billing_header(content: String) -> String {
+    content
+        .lines()
+        .filter(|line| !is_claude_code_billing_header_text(line))
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
+fn is_claude_code_billing_header_text(content: &str) -> bool {
+    let trimmed = content.trim_start();
+    trimmed.starts_with(CLAUDE_CODE_BILLING_HEADER_PREFIX)
+        && (trimmed.contains("cc_version=")
+            || trimmed.contains("cc_entrypoint=")
+            || trimmed.contains("cch="))
+}