From 0aa20d78730239fd7e8bef639606c6c5be9d5e32 Mon Sep 17 00:00:00 2001 From: Guilherme Guilherme Date: Mon, 18 May 2026 23:20:03 +0100 Subject: [PATCH 1/8] Add artifact scripts --- .../artifacts/AIChatbotNovaCachedImages.py | 172 ++++++ .../artifacts/AIChatbotNovaConversations.py | 579 ++++++++++++++++++ scripts/artifacts/AIChatbotNovaHistory.py | 276 +++++++++ .../artifacts/AIChatbotNovaHistoryDetail.py | 329 ++++++++++ .../AIChatbotNovaHistoryDetailDocument.py | 411 +++++++++++++ .../AIChatbotNovaHistoryDetailImage.py | 395 ++++++++++++ .../AIChatbotNovaHistoryDetailLink.py | 296 +++++++++ 7 files changed, 2458 insertions(+) create mode 100644 scripts/artifacts/AIChatbotNovaCachedImages.py create mode 100644 scripts/artifacts/AIChatbotNovaConversations.py create mode 100644 scripts/artifacts/AIChatbotNovaHistory.py create mode 100644 scripts/artifacts/AIChatbotNovaHistoryDetail.py create mode 100644 scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py create mode 100644 scripts/artifacts/AIChatbotNovaHistoryDetailImage.py create mode 100644 scripts/artifacts/AIChatbotNovaHistoryDetailLink.py diff --git a/scripts/artifacts/AIChatbotNovaCachedImages.py b/scripts/artifacts/AIChatbotNovaCachedImages.py new file mode 100644 index 00000000..f58839ce --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaCachedImages.py @@ -0,0 +1,172 @@ +__artifacts_v2__ = { + "nova_cache_images": { + "name": "Nova AI Chatbot - Cached Images (Glide Disk Cache)", + "description": ( + "Extracts all cached image files from the Nova AI Chatbot app's Glide disk cache " + "(cache/image_manager_disk_cache/*.0). These .0 files are raw JPEG images that were " + "downloaded from Firebase Storage and cached locally. The module embeds the original " + "cache file paths as file:// URLs in the HTML report, allowing direct preview from " + "the extracted data folder. No copying is performed, preserving forensic integrity. " + "Each image is displayed as a clickable thumbnail with file metadata." + ), + "author": "Guilherme Guilherme", + "version": "1.3", + "date": "2026-05-03", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "The Glide disk cache location: cache/image_manager_disk_cache/*.0. " + "Each .0 file is a raw JPEG. The filename is a SHA-256 hash of the signed Firebase URL. " + "The module directly links to the original file using absolute paths. " + "For the preview to work, the report must be opened on the same computer that extracted " + "the data, and the browser must allow file:// links (most do when the report is also " + "opened from a file:// location)." + ), + "paths": ("*/com.scaleup.chatai/cache/image_manager_disk_cache",), + "function": "get_nova_cache_images", + } +} + +import os +import csv +import datetime +import html as html_module +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + + +def _e(text): + return html_module.escape(str(text)) if text else "" + + +def _convert_timestamp(ts_sec): + if ts_sec is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ts_sec).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ts_sec) + + +def _format_file_size(size_bytes): + if size_bytes is None: + return "" + try: + size_bytes = int(size_bytes) + if size_bytes < 1024: + return f"{size_bytes} B" + elif size_bytes < 1024**2: + return f"{size_bytes / 1024:.1f} KB" + elif size_bytes < 1024**3: + return f"{size_bytes / (1024**2):.1f} MB" + else: + return f"{size_bytes / (1024**3):.2f} GB" + except (ValueError, TypeError): + return str(size_bytes) + + +def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): + """ + Entry point for the nova_cache_images artifact. + Scans for *.0 files, and generates an HTML gallery with direct file:// links + to the original cache files. No copying is performed. + """ + # Collect all unique cache directories from the glob matches + cache_dirs = set() + for path in files_found: + path = str(path) + if os.path.isdir(path): + cache_dirs.add(path) + else: + parent = os.path.dirname(path) + cache_dirs.add(parent) + + if not cache_dirs: + scripts.ilapfuncs.logfunc("[nova_cache_images] No cache directory found.") + return + + all_images = [] # list of dict with metadata and absolute path + + for cache_dir in cache_dirs: + if not os.path.isdir(cache_dir): + continue + for fname in os.listdir(cache_dir): + if not fname.endswith(".0"): + continue + src_path = os.path.join(cache_dir, fname) + try: + stat = os.stat(src_path) + all_images.append( + { + "original_name": fname, + "abs_path": src_path, + "size": stat.st_size, + "mtime": stat.st_mtime, + } + ) + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_cache_images] Error reading {src_path}: {e}" + ) + + if not all_images: + scripts.ilapfuncs.logfunc("[nova_cache_images] No .0 cache files found.") + return + + all_images.sort(key=lambda x: x["mtime"], reverse=True) + + # Prepare HTML rows + headers = [ + "Thumbnail & Filename", + "File Size", + "Last Modified (UTC)", + "Original Cache Filename", + ] + html_rows = [] + tsv_rows = [] + + for img in all_images: + # Convert absolute path to file:// URL + abs_url = "file://" + os.path.abspath(img["abs_path"]) + # For display, use the basename as label + display_name = f"{img['original_name']}" + thumbnail_html = ( + f'
' + f' ' + f' {_e(display_name)}' + f"
" + f" {_e(display_name)}" + f"
" + ) + size_str = _format_file_size(img["size"]) + mtime_str = _convert_timestamp(img["mtime"]) + html_rows.append((thumbnail_html, size_str, mtime_str, img["original_name"])) + tsv_rows.append((display_name, size_str, mtime_str, img["original_name"])) + + # Generate HTML report directly inside report_folder (top-level _HTML) + report_name = "Nova AI Chatbot - Cached Images" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, html_rows, "cache/image_manager_disk_cache", html_escape=False + ) + report.end_artifact_report() + + # TSV export + tsv_path = os.path.join(report_folder, f"{report_name}.tsv") + with open(tsv_path, "w", newline="", encoding="utf-8") as tsvfile: + writer = csv.writer(tsvfile, delimiter="\t") + writer.writerow( + ["Filename", "File Size", "Last Modified (UTC)", "Original Cache Filename"] + ) + writer.writerows(tsv_rows) + + scripts.ilapfuncs.logfunc( + f"[nova_cache_images] Displayed {len(all_images)} cached images using file:// links." + ) diff --git a/scripts/artifacts/AIChatbotNovaConversations.py b/scripts/artifacts/AIChatbotNovaConversations.py new file mode 100644 index 00000000..f2679b8e --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaConversations.py @@ -0,0 +1,579 @@ +__artifacts_v2__ = { + "nova_chatbot_conversations": { + "name": "Nova AI Chatbot - Conversations (Full Detail)", + "description": ( + "Reconstructs full conversations from the AI Chatbot - Nova app by joining " + "History, HistoryDetail, HistoryDetailImage, HistoryDetailDocument, and " + "HistoryDetailLink tables. Produces one row per message with all attachment " + "metadata surfaced inline. Image origin (user‑submitted vs AI‑generated) is " + "determined by the parent message role. Generated images are not resolvable " + "locally due to Firebase signed URL tokens; the report shows the Firebase path " + "and a forensic note. Documents are displayed with full metadata and a note " + "confirming they were submitted by the user. Soft‑deleted conversations are " + "flagged on every associated message row." + ), + "author": "Guilherme Guilherme", + "version": "0.5", + "date": "2026-05-03", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Message timestamps are stored as Unix milliseconds (INTEGER) and are " + "converted to UTC for display and timeline submission. " + "HistoryDetail.type: 0 = USER, 1 = ASSISTANT. " + "Attachment columns are empty when no attachment is linked to a message. " + "A conversation flagged as DELETED means History.softDeleted = 1; the record " + "remains in the database after user deletion and is forensically recoverable. " + "chatBotModel is an integer mapped to known AI model names where possible. " + "Image origin is correctly identified by the parent message role: " + "USER messages contain user‑submitted images (e.g., vision queries); " + "ASSISTANT messages contain AI‑generated images. " + "All images are stored on Firebase Storage; local cache filenames are hashes " + "of signed URLs (tokens not on device), so automatic matching is impossible. " + "Documents are also stored on Firebase; no local copy is kept. " + "TSV export contains plain‑text equivalents for all attachment fields." + ), + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_conversations", + } +} + +import os +import shutil +import sqlite3 +import datetime +import html as html_module +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + +# --------------------------------------------------------------------------- +# Known mappings for the chatBotModel integer field. +# --------------------------------------------------------------------------- +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", +} + +IMAGE_STATE_MAP = { + 0: "Pending", + 1: "Success", + 2: "Failed", +} + +DOCUMENT_TYPE_MAP = { + 0: "Local File", + 1: "Remote File", +} + +MIME_ICON_MAP = { + "application/pdf": "📄", + "application/msword": "📝", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "📝", + "application/vnd.ms-excel": "📊", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "📊", + "text/plain": "📃", + "text/csv": "📊", + "image/jpeg": "🖼️", + "image/png": "🖼️", + "image/gif": "🖼️", + "image/webp": "🖼️", +} + +# --------------------------------------------------------------------------- +# Scalar helpers +# --------------------------------------------------------------------------- + + +def _convert_ms_timestamp(ms): + if ms is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def _resolve_model(model_int): + if model_int is None: + return "Unknown" + name = CHAT_BOT_MODEL_MAP.get(model_int) + return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" + + +def _resolve_image_state(state_int): + if state_int is None: + return "" + label = IMAGE_STATE_MAP.get(state_int) + return f"{label} ({state_int})" if label else f"Unknown State ({state_int})" + + +def _resolve_document_type(type_int): + if type_int is None: + return "" + label = DOCUMENT_TYPE_MAP.get(type_int) + return f"{label} ({type_int})" if label else f"Unknown Type ({type_int})" + + +def _format_role(type_int): + return {0: "USER", 1: "ASSISTANT"}.get(type_int, f"UNKNOWN ({type_int})") + + +def _format_soft_deleted(value): + return "DELETED" if value == 1 else "No" + + +def _format_file_size(size_bytes): + if size_bytes is None: + return "" + try: + size_bytes = int(size_bytes) + if size_bytes < 1024: + return f"{size_bytes} B" + elif size_bytes < 1024**2: + return f"{size_bytes / 1024:.1f} KB" + elif size_bytes < 1024**3: + return f"{size_bytes / (1024**2):.1f} MB" + else: + return f"{size_bytes / (1024**3):.2f} GB" + except (ValueError, TypeError): + return str(size_bytes) + + +def _e(text): + return html_module.escape(str(text)) if text else "" + + +# --------------------------------------------------------------------------- +# Image resolution – always None (no local preview) +# --------------------------------------------------------------------------- +def _resolve_image_file(db_url, seeker, images_dir): + return None + + +# --------------------------------------------------------------------------- +# Rich HTML cell builders +# --------------------------------------------------------------------------- + + +def _build_image_html(msg_type, img_urls, img_prompts, img_states, resolved_filenames): + """ + Build HTML cell for images. Uses msg_type to determine origin. + """ + if not img_urls: + return "" + + urls = [u.strip() for u in img_urls.split(",") if u.strip()] + prompts = ( + [p.strip() for p in img_prompts.split(",") if p.strip()] if img_prompts else [] + ) + + parts = [] + for i, url in enumerate(urls): + prompt = prompts[i] if i < len(prompts) else "" + filename = resolved_filenames[i] if i < len(resolved_filenames) else None + + cell = '
' + + # Prompt + if prompt: + cell += f'
Prompt: {_e(prompt)}
' + + # Forensic note based on msg_type + if msg_type == 0: # USER → user‑submitted (vision query) + cell += ( + f'
' + f" 📤 User‑submitted image
" + f" This image was uploaded by the device user (e.g., as part of a vision query). " + f" The file content is stored on Firebase Storage and is not cached locally." + f"
" + ) + else: # ASSISTANT or unknown → AI‑generated + cell += ( + f'
' + f" 🤖 AI‑generated image
" + f" This image was created by the AI based on the user prompt. " + f" It is stored on Firebase Storage; a temporary local copy may exist " + f" in cache/image_manager_disk_cache/*.0 but the filename " + f" is a hash of a signed URL that includes a token not stored on the device. " + f" Manual inspection of .0 files is recommended.
" + f" Forensic action: Examine .0 files directly as JPEG." + f"
" + ) + + # Firebase path (was "Internal path") + cell += ( + f'
' + f" Firebase path:
" + f' {_e(url)}' + f"
" + ) + cell += "
" + + if i < len(urls) - 1: + cell += ( + '
' + ) + parts.append(cell) + + return "".join(parts) + + +def _build_document_html(doc_names, doc_mime_types, doc_sizes, doc_urls, doc_types): + """ + Build HTML cell for documents – no local preview, shows Firebase path and forensic note. + """ + if not doc_names: + return "" + + names = [n.strip() for n in doc_names.split(",") if n.strip()] + mimes = ( + [m.strip() for m in doc_mime_types.split(",") if m.strip()] + if doc_mime_types + else [] + ) + sizes = [s.strip() for s in doc_sizes.split(",") if s.strip()] if doc_sizes else [] + urls = [u.strip() for u in doc_urls.split(",") if u.strip()] if doc_urls else [] + types = [t.strip() for t in doc_types.split(",") if t.strip()] if doc_types else [] + + parts = [] + for i, name in enumerate(names): + mime = mimes[i] if i < len(mimes) else "" + size_raw = sizes[i] if i < len(sizes) else None + url = urls[i] if i < len(urls) else "" + dtype_raw = types[i] if i < len(types) else None + + icon = MIME_ICON_MAP.get(mime, "📎") + size_label = ( + _format_file_size(int(size_raw)) + if size_raw and size_raw.lstrip("-").isdigit() + else "" + ) + dtype_label = ( + _resolve_document_type(int(dtype_raw)) + if dtype_raw and dtype_raw.lstrip("-").isdigit() + else "" + ) + + cell = ( + f'
' + f'
{icon}
' + f'
{_e(name)}
' + ) + if mime: + cell += f"
MIME Type: {_e(mime)}
" + if size_label: + cell += f"
Size: {_e(size_label)}
" + if url: + cell += ( + f"
Firebase path:
" + f' {_e(url)}
' + ) + if dtype_label: + cell += f"
Source Type: {_e(dtype_label)}
" + + # Forensic note – same as standalone document module + cell += ( + f'
' + f" ⚠️ Forensic note: This file was submitted by the" + f" user to the AI assistant as part of this conversation." + f"
" + f"
" + ) + + if i < len(names) - 1: + cell += ( + '
' + ) + parts.append(cell) + + return "".join(parts) + + +# --------------------------------------------------------------------------- +# Plain-text builders for TSV / timeline +# --------------------------------------------------------------------------- + + +def _build_image_tsv( + msg_type, img_urls, img_prompts, img_states, img_mime_types, img_pipelines +): + """Flat plain‑text representation for TSV.""" + if not img_urls: + return "" + origin = "user-submitted" if msg_type == 0 else "ai-generated" + parts = [f"Origin: {origin}", f"URL: {img_urls}"] + if img_prompts: + parts.append(f"Prompt: {img_prompts}") + if img_states: + states = ", ".join( + _resolve_image_state(int(s.strip())) + for s in img_states.split(",") + if s.strip().lstrip("-").isdigit() + ) + if states: + parts.append(f"State: {states}") + if img_mime_types: + parts.append(f"MIME: {img_mime_types}") + if img_pipelines: + parts.append(f"Pipeline: {img_pipelines}") + return " | ".join(parts) + + +def _build_document_tsv(doc_names, doc_mime_types, doc_sizes, doc_urls, doc_types): + """Flat plain‑text representation for TSV.""" + if not doc_names: + return "" + size_label = ( + _format_file_size(doc_sizes) + if doc_sizes and doc_sizes.lstrip("-").isdigit() + else "" + ) + dtype_label = "" + if doc_types: + first_type = doc_types.split(",")[0].strip() + if first_type.lstrip("-").isdigit(): + dtype_label = _resolve_document_type(int(first_type)) + else: + dtype_label = first_type + parts = [f"Name: {doc_names}"] + if doc_mime_types: + parts.append(f"MIME: {doc_mime_types}") + if size_label: + parts.append(f"Size: {size_label}") + if doc_urls: + parts.append(f"Path: {doc_urls}") + if dtype_label: + parts.append(f"Type: {dtype_label}") + parts.append("Note: File submitted by user to AI assistant") + return " | ".join(parts) + + +# --------------------------------------------------------------------------- +# SQL (unchanged) +# --------------------------------------------------------------------------- +QUERY = """ +SELECT + h.id AS conv_id, + h.UUID AS conv_uuid, + h.title AS conv_title, + h.chatBotModel AS chat_bot_model, + h.softDeleted AS soft_deleted, + h.syncState AS conv_sync_state, + + hd.id AS msg_id, + hd.UUID AS msg_uuid, + hd.type AS msg_type, + hd.text AS msg_text, + hd.token AS msg_token, + hd.reasoningContent AS msg_reasoning, + hd.createdAt AS msg_created_at, + hd.syncState AS msg_sync_state, + + GROUP_CONCAT(DISTINCT hdi.url) AS img_urls, + GROUP_CONCAT(DISTINCT hdi.prompt) AS img_prompts, + GROUP_CONCAT(DISTINCT hdi.state) AS img_states, + GROUP_CONCAT(DISTINCT hdi.mimeType) AS img_mime_types, + GROUP_CONCAT(DISTINCT hdi.pipeline) AS img_pipelines, + + GROUP_CONCAT(DISTINCT hdd.name) AS doc_names, + GROUP_CONCAT(DISTINCT hdd.mimeType) AS doc_mime_types, + GROUP_CONCAT(DISTINCT hdd.size) AS doc_sizes, + GROUP_CONCAT(DISTINCT hdd.url) AS doc_urls, + GROUP_CONCAT(DISTINCT hdd.type) AS doc_types, + + GROUP_CONCAT(DISTINCT hdl.url) AS link_urls + +FROM History h +INNER JOIN HistoryDetail hd + ON hd.historyID = h.id +LEFT JOIN HistoryDetailImage hdi + ON hdi.historyDetailID = hd.id +LEFT JOIN HistoryDetailDocument hdd + ON hdd.historyDetailID = hd.id +LEFT JOIN HistoryDetailLink hdl + ON hdl.historyDetailID = hd.id +GROUP BY hd.id +ORDER BY h.id ASC, hd.createdAt ASC +""" + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def get_nova_chatbot_conversations(files_found, report_folder, seeker, wrap_text): + for file_found in files_found: + file_found = str(file_found) + if not file_found.endswith("chat-ai.db"): + continue + + try: + db = sqlite3.connect(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_conversations] Error reading {file_found}: {e}" + ) + continue + + if not rows_raw: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_conversations] No records found in {file_found}." + ) + continue + + images_dir = os.path.join(report_folder, "nova_images") + os.makedirs(images_dir, exist_ok=True) + + headers = [ + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Conv. Sync State", + "Msg. ID", + "Msg. UUID", + "Role", + "Message Text", + "Token Count", + "Reasoning Content", + "Message Timestamp (UTC)", + "Msg. Sync State", + "Image Attachment", + "Document Attachment", + "Link URL(s)", + ] + + html_rows = [] + tsv_rows = [] + + for row in rows_raw: + ( + conv_id, + conv_uuid, + conv_title, + chat_bot_model, + soft_deleted, + conv_sync_state, + msg_id, + msg_uuid, + msg_type, + msg_text, + msg_token, + msg_reasoning, + msg_created_at, + msg_sync_state, + img_urls, + img_prompts, + img_states, + img_mime_types, + img_pipelines, + doc_names, + doc_mime_types, + doc_sizes, + doc_urls, + doc_types, + link_urls, + ) = row + + # Resolve images (always None, but we need a list parallel to URLs) + resolved_filenames = [] + if img_urls: + for raw_url in img_urls.split(","): + raw_url = raw_url.strip() + resolved_filenames.append( + _resolve_image_file(raw_url, seeker, images_dir) + ) + + # Common scalar columns + common = ( + conv_id, + conv_uuid or "", + conv_title or "", + _resolve_model(chat_bot_model), + _format_soft_deleted(soft_deleted), + conv_sync_state if conv_sync_state is not None else "", + msg_id, + msg_uuid or "", + _format_role(msg_type), + msg_text or "", + msg_token if msg_token is not None else "", + msg_reasoning or "", + _convert_ms_timestamp(msg_created_at), + msg_sync_state if msg_sync_state is not None else "", + ) + + # HTML cells + img_html = _build_image_html( + msg_type, img_urls, img_prompts, img_states, resolved_filenames + ) + doc_html = _build_document_html( + doc_names, doc_mime_types, doc_sizes, doc_urls, doc_types + ) + html_rows.append(common + (img_html, doc_html, link_urls or "")) + + # TSV cells + img_tsv = _build_image_tsv( + msg_type, + img_urls, + img_prompts, + img_states, + img_mime_types, + img_pipelines, + ) + doc_tsv = _build_document_tsv( + doc_names, doc_mime_types, doc_sizes, doc_urls, doc_types + ) + tsv_rows.append(common + (img_tsv, doc_tsv, link_urls or "")) + + # HTML report + report_name = "Nova AI Chatbot - Conversations (Full Detail)" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, html_rows, file_found, html_escape=False + ) + report.end_artifact_report() + + # TSV and timeline + scripts.ilapfuncs.tsv(report_folder, headers, tsv_rows, report_name, file_found) + scripts.ilapfuncs.timeline(report_folder, report_name, tsv_rows, headers) diff --git a/scripts/artifacts/AIChatbotNovaHistory.py b/scripts/artifacts/AIChatbotNovaHistory.py new file mode 100644 index 00000000..9939b8cf --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistory.py @@ -0,0 +1,276 @@ +__artifacts_v2__ = { + "nova_chatbot_history": { + "name": "Nova AI Chatbot - Conversation History", + "description": ( + "Extracts the conversation index from the AI Chatbot - Nova app " + "(com.scaleup.chatai) from the History table. " + "Each row represents one conversation and includes the conversation title, " + "AI model used, starred and soft-deleted status, all relevant timestamps, " + "and sync metadata. Each row is further enriched with three summary columns " + "derived from HistoryDetail: total message count, timestamp of the last " + "message, and the text of the first user message — providing immediate " + "investigative context without requiring the full message detail report. " + "Soft-deleted conversations are flagged on every row." + ), + "author": "Guilherme Guilherme", + "version": "0.2", + "date": "2025-04-27", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Database: com.scaleup.chatai/databases/chat-ai.db. " + "All timestamps (createdAt, updatedAt, lastModifiedAt) are stored as Unix " + "milliseconds (INTEGER) and converted to UTC strings for display. " + "chatBotModel is an integer mapped to known AI model names where possible; " + "unknown values are shown as 'Unknown Model (N)'. " + "softDeleted = 1 indicates the conversation was deleted by the user but " + "remains physically present in the database and is forensically recoverable. " + "starred = 1 indicates the user bookmarked the conversation. " + "assistantId identifies a custom AI assistant persona assigned to the " + "conversation when not NULL. " + "captionHistoryId links to an associated caption or summary history entry " + "when present. " + "message_count, last_message_at, and first_user_message are aggregated " + "from HistoryDetail via LEFT JOIN so conversations with zero messages are " + "still returned. first_user_message reflects the earliest USER-role message " + "text (HistoryDetail.type = 0)." + ), + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_history", + } +} + +import sqlite3 +import datetime +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + +# --------------------------------------------------------------------------- +# Known mappings for the chatBotModel integer field. +# Source: FirestoreHistory.EngineTypes enum ordinals from decompiled APK source +# (com.scaleup.chatai.ui.conversation.FirestoreHistory). +# The integer stored in the database is the ENUM ORDINAL (0-based position), +# NOT the botId from chatbotAgentMap. These are two independent systems. +# Image-generating engines: 3 (legacy Bard ordinal reused), 4, 12, 13, 17. +# NOTE: ordinal 3 ('bard') was reused for image generation in newer app versions; +# presence of HistoryDetailImage records confirms image generation regardless of label. +# NOTE: ordinal 20 ('deepSeekR1') — if reasoningContent is NULL the actual API +# call may have used DeepSeek V3; the field reflects the UI selector, not the API. +# --------------------------------------------------------------------------- +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", # gpt-3.5 + 1: "GPT-5", # gpt-5 + 2: "GPT-4o", # gpt-4o + 3: "Bard / Image Gen.", # bard (legacy; reused for image generation) + 4: "Image Generator", # image-generator + 5: "Vision", # vision + 6: "Google Vision", # googleVision + 7: "Document", # document + 8: "LLaMA 2", # llama2 + 9: "Nova", # nova + 10: "Gemini", # gemini + 11: "Superbot", # superbot + 12: "Logo Generator", # logo-generator + 13: "Tattoo Generator", # tattoo-generator + 14: "Web Search", # webSearch + 15: "Claude", # claude + 16: "DeepSeek", # deepSeek + 17: "Signature Generator", # signature-generator + 18: "Mistral", # mistral + 19: "Grok", # grok + 20: "DeepSeek R1", # deepSeekR1 + 21: "AI Filter", # aiFilter + 22: "Voice Chat", # voiceChat + 23: "Snap & Solve", # snapAndSolve + 24: "Study Planner", # studyPlanner + 25: "Quiz Maker", # quizMaker + 26: "Essay Helper", # essayHelper + 27: "Gemini 3 Pro", # gemini-3-pro + 28: "GPT-5.1", # gpt-5.1 + 29: "GPT-4o Mini", # 4o-mini +} + +# --------------------------------------------------------------------------- +# SQL +# One row per History entry, enriched with three summary columns from +# HistoryDetail via a LEFT JOIN + GROUP BY so conversations with zero +# messages are still returned. +# --------------------------------------------------------------------------- +QUERY = """ +SELECT + h.id AS conv_id, + h.UUID AS conv_uuid, + h.title AS title, + h.chatBotModel AS chat_bot_model, + h.assistantId AS assistant_id, + h.captionHistoryId AS caption_history_id, + h.starred AS starred, + h.softDeleted AS soft_deleted, + h.syncState AS sync_state, + h.syncRetryCount AS sync_retry_count, + h.createdAt AS created_at, + h.updatedAt AS updated_at, + h.lastModifiedAt AS last_modified_at, + COUNT(hd.id) AS message_count, + MAX(hd.createdAt) AS last_msg_ts, + MIN(CASE WHEN hd.type = 0 THEN hd.text END) AS first_user_msg +FROM History h +LEFT JOIN HistoryDetail hd + ON hd.historyID = h.id +GROUP BY h.id +ORDER BY h.createdAt ASC +""" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _convert_ms_timestamp(ms): + """Convert a Unix millisecond timestamp to a human-readable UTC string.""" + if ms is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def _resolve_model(model_int): + """Return a labelled model name, falling back to the raw integer for unknowns.""" + if model_int is None: + return "Unknown" + name = CHAT_BOT_MODEL_MAP.get(model_int) + return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" + + +def _format_soft_deleted(value): + """Return a clearly labelled string for the softDeleted field.""" + return "DELETED" if value == 1 else "No" + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def get_nova_chatbot_history(files_found, report_folder, seeker, wrap_text): + """ + Entry point for the nova_chatbot_history artifact. + + Queries the History table enriched with per-conversation summary data + from HistoryDetail. Outputs HTML report, TSV, and timeline. + """ + + for file_found in files_found: + file_found = str(file_found) + + if not file_found.endswith("chat-ai.db"): + continue + + try: + db = sqlite3.connect(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_history] Error reading {file_found}: {e}" + ) + continue + + if not rows_raw: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_history] No records found in {file_found}." + ) + continue + + headers = [ + # --- Identity --- + "Conv. ID", + "Conv. UUID", + "Title", + # --- Model --- + "AI Model", + "Assistant ID", + "Caption History ID", + # --- Flags --- + "Starred", + "Soft Deleted", + "Sync State", + "Sync Retry Count", + # --- Timestamps --- + "Created At (UTC)", + "Updated At (UTC)", + "Last Modified At (UTC)", + # --- Summary from HistoryDetail --- + "Message Count", + "Last Message At (UTC)", + "First User Message", + ] + + rows = [] + for row in rows_raw: + ( + conv_id, + conv_uuid, + title, + chat_bot_model, + assistant_id, + caption_history_id, + starred, + soft_deleted, + sync_state, + sync_retry_count, + created_at, + updated_at, + last_modified_at, + message_count, + last_msg_ts, + first_user_msg, + ) = row + + rows.append( + ( + conv_id, + conv_uuid or "", + title or "", + _resolve_model(chat_bot_model), + assistant_id if assistant_id is not None else "", + caption_history_id or "", + "Yes" if starred else "No", + _format_soft_deleted(soft_deleted), + sync_state if sync_state is not None else "", + sync_retry_count if sync_retry_count is not None else "", + _convert_ms_timestamp(created_at), + _convert_ms_timestamp(updated_at), + _convert_ms_timestamp(last_modified_at), + message_count if message_count is not None else 0, + _convert_ms_timestamp(last_msg_ts), + first_user_msg or "", + ) + ) + + # --- HTML report --- + report_name = "Nova AI Chatbot - History" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, + rows, + file_found, + html_escape=True, + ) + report.end_artifact_report() + + # --- TSV output --- + scripts.ilapfuncs.tsv(report_folder, headers, rows, report_name, file_found) + + # --- Timeline (uses Created At, index 10) --- + scripts.ilapfuncs.timeline(report_folder, report_name, rows, headers) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetail.py b/scripts/artifacts/AIChatbotNovaHistoryDetail.py new file mode 100644 index 00000000..0ff38a74 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistoryDetail.py @@ -0,0 +1,329 @@ +__artifacts_v2__ = { + "nova_chatbot_history_detail": { + "name": "Nova AI Chatbot - Message Detail", + "description": ( + "Extracts every individual message from the AI Chatbot - Nova app " + "(com.scaleup.chatai) from the HistoryDetail table. " + "Each row represents one message and is enriched with parent conversation " + "context joined from History: conversation title, AI model used, and " + "soft-deleted status. Three attachment presence flags are added via " + "correlated EXISTS subqueries against HistoryDetailImage, " + "HistoryDetailDocument, and HistoryDetailLink, indicating whether each " + "message has an associated image, document, or link without duplicating " + "rows or loading attachment content. " + "Enables message-level timeline reconstruction and rapid attachment triage " + "across all conversations." + ), + "author": "Guilherme Guilherme", + "version": "0.2", + "date": "2025-04-27", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Database: com.scaleup.chatai/databases/chat-ai.db. " + "All timestamps (createdAt, lastModifiedAt) are stored as Unix milliseconds " + "(INTEGER) and converted to UTC strings for display. " + "HistoryDetail.type: 0 = USER (message sent by the device user), " + "1 = ASSISTANT (response generated by the AI model). " + "token counts reflect the number of tokens consumed by each message; " + "ASSISTANT messages with 0 tokens typically indicate image-generation " + "responses where no text tokens were billed. " + "reasoningContent contains chain-of-thought reasoning text when the " + "underlying model produces it (e.g. DeepSeek-R1 reasoning traces). " + "has_image = Yes means at least one record exists in HistoryDetailImage " + "for this message. " + "has_document = Yes means at least one record exists in HistoryDetailDocument " + "for this message — the user submitted a file to the AI. " + "has_link = Yes means at least one record exists in HistoryDetailLink. " + "softDeleted is inherited from the parent History record; DELETED means the " + "conversation was removed by the user but all messages remain physically in " + "the database and are forensically recoverable. " + "syncState and syncRetryCount reflect cloud synchronisation status." + ), + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_history_detail", + } +} + +import sqlite3 +import datetime +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + +# --------------------------------------------------------------------------- +# Known mappings for the chatBotModel integer field. +# Source: FirestoreHistory.EngineTypes enum ordinals from decompiled APK source +# (com.scaleup.chatai.ui.conversation.FirestoreHistory). +# The integer stored in the database is the ENUM ORDINAL (0-based position), +# NOT the botId from chatbotAgentMap. These are two independent systems. +# Image-generating engines: 3 (legacy Bard ordinal reused), 4, 12, 13, 17. +# NOTE: ordinal 3 ('bard') was reused for image generation in newer app versions; +# presence of HistoryDetailImage records confirms image generation regardless of label. +# NOTE: ordinal 20 ('deepSeekR1') — if reasoningContent is NULL the actual API +# call may have used DeepSeek V3; the field reflects the UI selector, not the API. +# --------------------------------------------------------------------------- +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", # gpt-3.5 + 1: "GPT-5", # gpt-5 + 2: "GPT-4o", # gpt-4o + 3: "Bard / Image Gen.", # bard (legacy; reused for image generation) + 4: "Image Generator", # image-generator + 5: "Vision", # vision + 6: "Google Vision", # googleVision + 7: "Document", # document + 8: "LLaMA 2", # llama2 + 9: "Nova", # nova + 10: "Gemini", # gemini + 11: "Superbot", # superbot + 12: "Logo Generator", # logo-generator + 13: "Tattoo Generator", # tattoo-generator + 14: "Web Search", # webSearch + 15: "Claude", # claude + 16: "DeepSeek", # deepSeek + 17: "Signature Generator", # signature-generator + 18: "Mistral", # mistral + 19: "Grok", # grok + 20: "DeepSeek R1", # deepSeekR1 + 21: "AI Filter", # aiFilter + 22: "Voice Chat", # voiceChat + 23: "Snap & Solve", # snapAndSolve + 24: "Study Planner", # studyPlanner + 25: "Quiz Maker", # quizMaker + 26: "Essay Helper", # essayHelper + 27: "Gemini 3 Pro", # gemini-3-pro + 28: "GPT-5.1", # gpt-5.1 + 29: "GPT-4o Mini", # 4o-mini +} + +# --------------------------------------------------------------------------- +# SQL +# One row per HistoryDetail message. +# Parent conversation context (title, model, soft-deleted) is joined from +# History. Attachment presence is detected via correlated EXISTS subqueries — +# lightweight boolean checks that avoid duplicating rows from the attachment +# tables. +# --------------------------------------------------------------------------- +QUERY = """ +SELECT + -- Message identity + hd.id AS msg_id, + hd.UUID AS msg_uuid, + hd.historyID AS conv_id, + + -- Parent conversation context (from History) + h.UUID AS conv_uuid, + h.title AS conv_title, + h.chatBotModel AS chat_bot_model, + h.softDeleted AS soft_deleted, + + -- Message content + hd.type AS msg_type, + hd.text AS msg_text, + hd.token AS token_count, + hd.reasoningContent AS reasoning_content, + + -- Message timestamps + hd.createdAt AS created_at, + hd.lastModifiedAt AS last_modified_at, + + -- Sync metadata + hd.syncState AS sync_state, + hd.syncRetryCount AS sync_retry_count, + + -- Attachment flags (correlated EXISTS — no row multiplication) + CASE WHEN EXISTS ( + SELECT 1 FROM HistoryDetailImage i + WHERE i.historyDetailID = hd.id + ) THEN 1 ELSE 0 END AS has_image, + + CASE WHEN EXISTS ( + SELECT 1 FROM HistoryDetailDocument d + WHERE d.historyDetailID = hd.id + ) THEN 1 ELSE 0 END AS has_document, + + CASE WHEN EXISTS ( + SELECT 1 FROM HistoryDetailLink l + WHERE l.historyDetailID = hd.id + ) THEN 1 ELSE 0 END AS has_link + +FROM HistoryDetail hd +INNER JOIN History h + ON h.id = hd.historyID +ORDER BY hd.historyID ASC, hd.createdAt ASC +""" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _convert_ms_timestamp(ms): + """Convert a Unix millisecond timestamp to a human-readable UTC string.""" + if ms is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def _resolve_model(model_int): + """Return a labelled model name, falling back to the raw integer for unknowns.""" + if model_int is None: + return "Unknown" + name = CHAT_BOT_MODEL_MAP.get(model_int) + return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" + + +def _format_role(type_int): + """Map HistoryDetail.type to a forensically clear role label.""" + if type_int == 0: + return "USER" + if type_int == 1: + return "ASSISTANT" + return f"UNKNOWN ({type_int})" + + +def _format_soft_deleted(value): + """Return a clearly labelled string for the softDeleted field.""" + return "DELETED" if value == 1 else "No" + + +def _flag(value): + """Return Yes/No for a boolean integer flag.""" + return "Yes" if value else "No" + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def get_nova_chatbot_history_detail(files_found, report_folder, seeker, wrap_text): + """ + Entry point for the nova_chatbot_history_detail artifact. + + Queries every message in HistoryDetail, enriched with parent conversation + context from History and attachment presence flags from the three attachment + tables. Outputs HTML report, TSV, and timeline. + """ + + for file_found in files_found: + file_found = str(file_found) + + if not file_found.endswith("chat-ai.db"): + continue + + try: + db = sqlite3.connect(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_history_detail] Error reading {file_found}: {e}" + ) + continue + + if not rows_raw: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_history_detail] No records found in {file_found}." + ) + continue + + headers = [ + # --- Message identity --- + "Msg. ID", + "Msg. UUID", + "Conv. ID", + # --- Parent conversation context --- + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + # --- Message content --- + "Role", + "Message Text", + "Token Count", + "Reasoning Content", + # --- Timestamps --- + "Message Timestamp (UTC)", + "Last Modified At (UTC)", + # --- Sync metadata --- + "Sync State", + "Sync Retry Count", + # --- Attachment flags --- + "Has Image", + "Has Document", + "Has Link", + ] + + rows = [] + for row in rows_raw: + ( + msg_id, + msg_uuid, + conv_id, + conv_uuid, + conv_title, + chat_bot_model, + soft_deleted, + msg_type, + msg_text, + token_count, + reasoning_content, + created_at, + last_modified_at, + sync_state, + sync_retry_count, + has_image, + has_document, + has_link, + ) = row + + rows.append( + ( + msg_id, + msg_uuid or "", + conv_id, + conv_uuid or "", + conv_title or "", + _resolve_model(chat_bot_model), + _format_soft_deleted(soft_deleted), + _format_role(msg_type), + msg_text or "", + token_count if token_count is not None else "", + reasoning_content or "", + _convert_ms_timestamp(created_at), + _convert_ms_timestamp(last_modified_at), + sync_state if sync_state is not None else "", + sync_retry_count if sync_retry_count is not None else "", + _flag(has_image), + _flag(has_document), + _flag(has_link), + ) + ) + + # --- HTML report --- + report_name = "Nova AI Chatbot - HistoryDetail" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, + rows, + file_found, + html_escape=True, + ) + report.end_artifact_report() + + # --- TSV output --- + scripts.ilapfuncs.tsv(report_folder, headers, rows, report_name, file_found) + + # --- Timeline (message-level granularity, index 11 = Message Timestamp) --- + scripts.ilapfuncs.timeline(report_folder, report_name, rows, headers) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py new file mode 100644 index 00000000..4d1c7aef --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py @@ -0,0 +1,411 @@ +__artifacts_v2__ = { + "nova_chatbot_documents": { + "name": "Nova AI Chatbot - Submitted Documents", + "description": ( + "Extracts all document records submitted by the user to the AI from the " + "AI Chatbot - Nova app (HistoryDetailDocument table). Each row represents " + "one document and is enriched with parent message context from HistoryDetail " + "and parent conversation context from History. " + "Documents are stored on Firebase Storage; the database stores only the " + "Firebase object path. No local cache of user‑submitted documents is kept " + "on the device. The metadata and a forensic note are displayed in the HTML " + "report. The full file content is not available for preview. " + "A forensic note is shown on every row confirming the file was actively " + "submitted by the device user to the AI assistant." + ), + "author": "Guilherme Guilherme", + "version": "0.2", + "date": "2025-04-27", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Database: com.scaleup.chatai/databases/chat-ai.db. " + "HistoryDetailDocument.url stores a Firebase path such as " + "'/document///document-input-'. " + "The file content is not stored locally on the device; it lives in " + "Firebase Storage. The metadata record is still valuable for forensic " + "timeline and user activity. " + "type: 0 = Local File (uploaded from the device), 1 = Remote File. " + "size is stored in bytes and converted to a human-readable string. " + "mimeType identifies the document format (e.g. application/pdf). " + "softDeleted is inherited from the parent History record; DELETED means " + "the conversation was removed by the user but the document record remains " + "physically in the database and is forensically recoverable. " + "The user message text associated with the document reveals the query the " + "user submitted alongside the file to the AI." + ), + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_documents", + } +} + +import os +import shutil +import sqlite3 +import datetime +import html as html_module +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + +# --------------------------------------------------------------------------- +# Known mappings for the chatBotModel integer field. +# Source: FirestoreHistory.EngineTypes enum ordinals from decompiled APK source +# (com.scaleup.chatai.ui.conversation.FirestoreHistory). +# The integer stored in the database is the ENUM ORDINAL (0-based position), +# NOT the botId from chatbotAgentMap. These are two independent systems. +# Image-generating engines: 3 (legacy Bard ordinal reused), 4, 12, 13, 17. +# NOTE: ordinal 3 ('bard') was reused for image generation in newer app versions; +# presence of HistoryDetailImage records confirms image generation regardless of label. +# NOTE: ordinal 20 ('deepSeekR1') — if reasoningContent is NULL the actual API +# call may have used DeepSeek V3; the field reflects the UI selector, not the API. +# --------------------------------------------------------------------------- +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", # gpt-3.5 + 1: "GPT-5", # gpt-5 + 2: "GPT-4o", # gpt-4o + 3: "Bard / Image Gen.", # bard (legacy; reused for image generation) + 4: "Image Generator", # image-generator + 5: "Vision", # vision + 6: "Google Vision", # googleVision + 7: "Document", # document + 8: "LLaMA 2", # llama2 + 9: "Nova", # nova + 10: "Gemini", # gemini + 11: "Superbot", # superbot + 12: "Logo Generator", # logo-generator + 13: "Tattoo Generator", # tattoo-generator + 14: "Web Search", # webSearch + 15: "Claude", # claude + 16: "DeepSeek", # deepSeek + 17: "Signature Generator", # signature-generator + 18: "Mistral", # mistral + 19: "Grok", # grok + 20: "DeepSeek R1", # deepSeekR1 + 21: "AI Filter", # aiFilter + 22: "Voice Chat", # voiceChat + 23: "Snap & Solve", # snapAndSolve + 24: "Study Planner", # studyPlanner + 25: "Quiz Maker", # quizMaker + 26: "Essay Helper", # essayHelper + 27: "Gemini 3 Pro", # gemini-3-pro + 28: "GPT-5.1", # gpt-5.1 + 29: "GPT-4o Mini", # 4o-mini +} + +DOCUMENT_TYPE_MAP = { + 0: "Local File", + 1: "Remote File", +} + +MIME_ICON_MAP = { + "application/pdf": "📄", + "application/msword": "📝", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "📝", + "application/vnd.ms-excel": "📊", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "📊", + "text/plain": "📃", + "text/csv": "📊", + "image/jpeg": "🖼️", + "image/png": "🖼️", + "image/gif": "🖼️", + "image/webp": "🖼️", +} + +# --------------------------------------------------------------------------- +# SQL +# One row per HistoryDetailDocument, enriched with parent message and +# conversation context. +# --------------------------------------------------------------------------- +QUERY = """ +SELECT + -- Document record + d.id AS doc_id, + d.historyDetailID AS msg_id, + d.url AS doc_url, + d.name AS doc_name, + d.type AS doc_type, + d.size AS doc_size, + d.mimeType AS mime_type, + + -- Parent message context (HistoryDetail) + hd.historyID AS conv_id, + hd.type AS msg_type, + hd.text AS msg_text, + hd.createdAt AS msg_created_at, + + -- Parent conversation context (History) + h.UUID AS conv_uuid, + h.title AS conv_title, + h.chatBotModel AS chat_bot_model, + h.softDeleted AS soft_deleted + +FROM HistoryDetailDocument d +INNER JOIN HistoryDetail hd ON hd.id = d.historyDetailID +INNER JOIN History h ON h.id = hd.historyID +ORDER BY d.id ASC +""" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _e(text): + return html_module.escape(str(text)) if text else "" + +def _convert_ms_timestamp(ms): + if ms is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + +def _resolve_model(model_int): + if model_int is None: + return "Unknown" + name = CHAT_BOT_MODEL_MAP.get(model_int) + return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" + +def _resolve_doc_type(type_int): + if type_int is None: + return "" + label = DOCUMENT_TYPE_MAP.get(type_int) + return f"{label} ({type_int})" if label else f"Unknown ({type_int})" + +def _format_file_size(size_bytes): + if size_bytes is None: + return "" + try: + size_bytes = int(size_bytes) + if size_bytes < 1024: + return f"{size_bytes} B" + elif size_bytes < 1024**2: + return f"{size_bytes / 1024:.1f} KB" + elif size_bytes < 1024**3: + return f"{size_bytes / (1024**2):.1f} MB" + else: + return f"{size_bytes / (1024**3):.2f} GB" + except (ValueError, TypeError): + return str(size_bytes) + +def _format_soft_deleted(value): + return "DELETED" if value == 1 else "No" + +def _format_role(type_int): + return {0: "USER", 1: "ASSISTANT"}.get(type_int, f"UNKNOWN ({type_int})") + +# --------------------------------------------------------------------------- +# Document file resolution (always returns None – no local copy) +# --------------------------------------------------------------------------- +def _resolve_document_file(doc_url, doc_name, seeker, docs_dir): + """ + Documents are stored on Firebase Storage. The database stores only the + Firebase object path. No local cache of user‑submitted documents is + kept on the device. Therefore this function always returns None. + The HTML cell will show a notice explaining the file is not available + locally. + """ + return None + +# --------------------------------------------------------------------------- +# HTML cell builder +# --------------------------------------------------------------------------- + +def _build_document_cell(doc_name, mime_type, doc_size, doc_url, doc_type, filename): + """ + Build a self-contained HTML cell for one document record showing: + - File icon + name (plain text, no link) + - MIME type, size, source type, and Firebase path + - Forensic note confirming the user submitted this file to the AI + - Notice that the file content is stored on Firebase and not available + """ + icon = MIME_ICON_MAP.get(mime_type, "📎") + size_label = _format_file_size(doc_size) + type_label = _resolve_doc_type(doc_type) + + cell = f'
' + cell += f'
{icon} {_e(doc_name)}
' + + if mime_type: + cell += f"
MIME Type: {_e(mime_type)}
" + if size_label: + cell += f"
Size: {_e(size_label)}
" + if type_label: + cell += f"
Source Type: {_e(type_label)}
" + if doc_url: + cell += ( + f'
' + f" Firebase Path:
" + f' {_e(doc_url)}' + f"
" + ) + + # Notice that the file is not stored locally + cell += ( + f'
' + f" ☁️ File stored on Firebase Storage
" + f" The document content is not available on the device. " + f" The database record confirms the user submitted this file to the AI; " + f" the file itself resides in Firebase Storage and is not cached locally." + f"
" + ) + + # Forensic note (original) + cell += ( + f'
' + f" ⚠️ Forensic note: This file was actively submitted " + f" by the device user to the AI assistant as part of this conversation." + f"
" + ) + cell += "
" + return cell + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def get_nova_chatbot_documents(files_found, report_folder, seeker, wrap_text): + """ + Entry point for the nova_chatbot_documents artifact. + + Extracts every HistoryDetailDocument record, resolves the document file from + the device extraction, and produces an HTML report with document metadata + cards and download links, TSV export, and timeline output. + """ + for file_found in files_found: + file_found = str(file_found) + if not file_found.endswith("chat-ai.db"): + continue + + try: + db = sqlite3.connect(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_documents] Error reading {file_found}: {e}" + ) + continue + + if not rows_raw: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_documents] No document records found in {file_found}." + ) + continue + + docs_dir = os.path.join(report_folder, "nova_documents") + os.makedirs(docs_dir, exist_ok=True) + + headers = [ + # Document identity + "Doc. ID", + "Msg. ID", + "Conv. ID", + # Conversation context + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + # Message context + "Msg. Role", + "Msg. Text", + "Msg. Timestamp (UTC)", + # Document card (HTML) + "Document & Metadata", + # Plain fields for TSV + "File Name", + "MIME Type", + "Size", + "Source Type", + "Firebase Path", # changed from "Internal Path" + ] + + html_rows = [] + tsv_rows = [] + + for row in rows_raw: + ( + doc_id, + msg_id, + doc_url, + doc_name, + doc_type, + doc_size, + mime_type, + conv_id, + msg_type, + msg_text, + msg_created_at, + conv_uuid, + conv_title, + chat_bot_model, + soft_deleted, + ) = row + + # Resolve document from extraction (always returns None) + filename = _resolve_document_file(doc_url, doc_name, seeker, docs_dir) + + common = ( + doc_id, + msg_id, + conv_id, + conv_uuid or "", + conv_title or "", + _resolve_model(chat_bot_model), + _format_soft_deleted(soft_deleted), + _format_role(msg_type), + msg_text or "", + _convert_ms_timestamp(msg_created_at), + ) + + doc_cell = _build_document_cell( + doc_name, mime_type, doc_size, doc_url, doc_type, filename + ) + + html_rows.append( + common + + ( + doc_cell, + doc_name or "", + mime_type or "", + _format_file_size(doc_size), + _resolve_doc_type(doc_type), + doc_url or "", + ) + ) + + tsv_rows.append( + common + + ( + "", # no HTML in TSV + doc_name or "", + mime_type or "", + _format_file_size(doc_size), + _resolve_doc_type(doc_type), + doc_url or "", + ) + ) + + # HTML report + report_name = "Nova AI Chatbot - HistoryDetailDocuments" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, html_rows, file_found, html_escape=False + ) + report.end_artifact_report() + + # TSV + scripts.ilapfuncs.tsv(report_folder, headers, tsv_rows, report_name, file_found) + + # Timeline (Msg. Timestamp, index 9) + scripts.ilapfuncs.timeline(report_folder, report_name, tsv_rows, headers) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py new file mode 100644 index 00000000..c322efd5 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py @@ -0,0 +1,395 @@ +__artifacts_v2__ = { + "nova_chatbot_images": { + "name": "Nova AI Chatbot - Images (Generated & Submitted)", + "description": ( + "Extracts all image records from the AI Chatbot - Nova app " + "(HistoryDetailImage table). Each row represents one image and is enriched " + "with the parent message context from HistoryDetail and the parent " + "conversation context from History. " + "Images are correctly identified by the parent message role: " + "USER messages contain images submitted by the device user (e.g., vision queries); " + "ASSISTANT messages contain images generated by the AI. " + "Both types are stored on Firebase Storage; no local copies are predictably " + "available offline. The report shows the prompt, metadata, and a forensic note " + "explaining the image origin and storage behaviour. Generation state, pipeline, " + "and style ID are included for AI‑generated images where applicable." + ), + "author": "Guilherme Guilherme", + "version": "0.5", + "date": "2026-05-03", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Database: com.scaleup.chatai/databases/chat-ai.db. " + "HistoryDetailImage.url stores a Firebase Storage object path. " + "Identification of image origin is based on the parent message's type: " + "0 = USER → user‑submitted image; 1 = ASSISTANT → AI‑generated image. " + "The source column in HistoryDetailImage is ignored because it is often " + "misleading (e.g., vision images are marked as source=0 but are user‑submitted). " + "AI‑generated images are temporarily cached in cache/image_manager_disk_cache/*.0 " + "but the filenames are SHA‑256 hashes of signed URLs containing a token not " + "stored on the device; automatic matching is impossible. User‑submitted images " + "are not cached locally. " + "state: 1=Success, 0=Pending, 2=Failed (only relevant for AI‑generated). " + "pipeline identifies the generation engine (e.g. flux_tpu). " + "styleId references the visual style preset selected by the user. " + "softDeleted is inherited from the parent History record." + ), + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_images", + } +} + +import os +import shutil +import sqlite3 +import datetime +import html as html_module +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + +# --------------------------------------------------------------------------- +# Known mappings for the chatBotModel integer field. +# --------------------------------------------------------------------------- +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", +} + +IMAGE_STATE_MAP = { + 0: "Pending", + 1: "Success", + 2: "Failed", +} + +# --------------------------------------------------------------------------- +# SQL (includes msg_type from HistoryDetail) +# --------------------------------------------------------------------------- +QUERY = """ +SELECT + i.id AS img_id, + i.historyDetailID AS msg_id, + i.url AS img_url, + i.prompt AS prompt, + i.state AS state, + i.mimeType AS mime_type, + i.styleId AS style_id, + i.source AS source, + i.sourceUrl AS source_url, + i.pipeline AS pipeline, + + hd.historyID AS conv_id, + hd.type AS msg_type, + hd.text AS msg_text, + hd.createdAt AS msg_created_at, + + h.UUID AS conv_uuid, + h.title AS conv_title, + h.chatBotModel AS chat_bot_model, + h.softDeleted AS soft_deleted + +FROM HistoryDetailImage i +INNER JOIN HistoryDetail hd ON hd.id = i.historyDetailID +INNER JOIN History h ON h.id = hd.historyID +ORDER BY i.id ASC +""" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _e(text): + return html_module.escape(str(text)) if text else "" + + +def _convert_ms_timestamp(ms): + if ms is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def _resolve_model(model_int): + if model_int is None: + return "Unknown" + name = CHAT_BOT_MODEL_MAP.get(model_int) + return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" + + +def _resolve_state(state_int): + if state_int is None: + return "" + label = IMAGE_STATE_MAP.get(state_int) + return f"{label} ({state_int})" if label else f"Unknown ({state_int})" + + +def _format_soft_deleted(value): + return "DELETED" if value == 1 else "No" + + +def _format_role(type_int): + return {0: "USER", 1: "ASSISTANT"}.get(type_int, f"UNKNOWN ({type_int})") + + +def _get_image_origin(msg_type): + """Return 'user-submitted' for msg_type=0, 'AI-generated' for msg_type=1.""" + if msg_type == 0: + return ("user-submitted", "User‑submitted image") + elif msg_type == 1: + return ("ai-generated", "AI‑generated image") + else: + return ("unknown", "Unknown origin") + + +# --------------------------------------------------------------------------- +# Image file resolution (always None) +# --------------------------------------------------------------------------- +def _resolve_image_file(db_url, seeker, images_dir): + return None + + +# --------------------------------------------------------------------------- +# HTML cell builder +# --------------------------------------------------------------------------- + + +def _build_image_cell( + img_url, prompt, state, mime_type, pipeline, style_id, msg_type, filename +): + """ + Build HTML cell using msg_type to determine image origin. + """ + origin_key, origin_label = _get_image_origin(msg_type) + cell = '
' + + if prompt: + cell += f'
Prompt: {_e(prompt)}
' + + if origin_key == "ai-generated": + cell += ( + f'
' + f" 🤖 AI‑generated image
" + f" This image was created by the AI based on the user prompt. " + f" It is stored on Firebase Storage; a temporary local copy may exist " + f" in cache/image_manager_disk_cache/*.0 but the filename " + f" is a hash of a signed URL that includes a token not stored on the device. " + f" Manual inspection of .0 files is recommended.
" + f" Forensic action: Examine .0 files directly as JPEG." + f"
" + ) + elif origin_key == "user-submitted": + cell += ( + f'
' + f" 📤 User‑submitted image
" + f" This image was actively uploaded by the device user (e.g., as part of a vision query). " + f" The file content is stored on Firebase Storage and is not cached locally. " + f" Only the metadata record remains on the device." + f"
" + ) + else: + cell += ( + f'
' + f" ❓ Unknown image origin
" + f" The parent message type is {_e(str(msg_type))} – cannot determine if user‑submitted or AI‑generated." + f"
" + ) + + if pipeline: + cell += f"
Pipeline: {_e(pipeline)}
" + if style_id is not None: + cell += f"
Style ID: {_e(str(style_id))}
" + if mime_type: + cell += f"
MIME Type: {_e(mime_type)}
" + + cell += ( + f'
' + f" Firebase path:
" + f' {_e(img_url)}' + f"
" + ) + cell += "
" + return cell + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def get_nova_chatbot_images(files_found, report_folder, seeker, wrap_text): + for file_found in files_found: + file_found = str(file_found) + if not file_found.endswith("chat-ai.db"): + continue + + try: + db = sqlite3.connect(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_images] Error reading {file_found}: {e}" + ) + continue + + if not rows_raw: + scripts.ilapfuncs.logfunc( + f"[nova_chbot_images] No image records found in {file_found}." + ) + continue + + images_dir = os.path.join(report_folder, "nova_images") + os.makedirs(images_dir, exist_ok=True) + + headers = [ + "Image ID", + "Msg. ID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Msg. Role", + "Msg. Text", + "Msg. Timestamp (UTC)", + "Image Preview & Metadata", + "Prompt", + "State", + "Pipeline", + "Style ID", + "MIME Type", + "Firebase Path", + ] + + html_rows = [] + tsv_rows = [] + + for row in rows_raw: + ( + img_id, + msg_id, + img_url, + prompt, + state, + mime_type, + style_id, + source, + source_url, + pipeline, + conv_id, + msg_type, + msg_text, + msg_created_at, + conv_uuid, + conv_title, + chat_bot_model, + soft_deleted, + ) = row + + filename = _resolve_image_file(img_url, seeker, images_dir) + + common = ( + img_id, + msg_id, + conv_id, + conv_uuid or "", + conv_title or "", + _resolve_model(chat_bot_model), + _format_soft_deleted(soft_deleted), + _format_role(msg_type), + msg_text or "", + _convert_ms_timestamp(msg_created_at), + ) + + img_cell = _build_image_cell( + img_url, + prompt, + state, + mime_type, + pipeline, + style_id, + msg_type, + filename, + ) + + # For TSV we also include the origin (derived from msg_type) + origin_label = ( + "User-submitted" + if msg_type == 0 + else "AI-generated" + if msg_type == 1 + else "Unknown" + ) + + html_rows.append( + common + + ( + img_cell, + prompt or "", + _resolve_state(state), + pipeline or "", + style_id if style_id is not None else "", + mime_type or "", + img_url or "", + ) + ) + + tsv_rows.append( + common + + ( + "", # no HTML in TSV + prompt or "", + _resolve_state(state), + pipeline or "", + style_id if style_id is not None else "", + mime_type or "", + img_url or "", + ) + ) + + report_name = "Nova AI Chatbot - HistoryDetailImage" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, html_rows, file_found, html_escape=False + ) + report.end_artifact_report() + + scripts.ilapfuncs.tsv(report_folder, headers, tsv_rows, report_name, file_found) + scripts.ilapfuncs.timeline(report_folder, report_name, tsv_rows, headers) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py b/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py new file mode 100644 index 00000000..30a343c9 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py @@ -0,0 +1,296 @@ +__artifacts_v2__ = { + "nova_chatbot_links": { + "name": "Nova AI Chatbot - Shared Links", + "description": ( + "Extracts all link records from the AI Chatbot - Nova app " + "(HistoryDetailLink table). Each row represents one URL shared within " + "a conversation and is enriched with parent message context from " + "HistoryDetail and parent conversation context from History, including " + "the message text that accompanied the link, the role of the sender " + "(USER or ASSISTANT), the AI model used in the conversation, and the " + "soft-deleted status of the parent conversation. " + "Links are rendered as clickable anchors in the HTML report. " + "The table is currently empty in observed samples but the module is " + "future-proof and will extract records if the table is populated in " + "other device images or application versions." + ), + "author": "Guilherme Guilherme", + "version": "0.2", + "date": "2025-04-27", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Database: com.scaleup.chatai/databases/chat-ai.db. " + "HistoryDetailLink.url stores the full URL shared in the message. " + "Links may be shared by the USER (e.g. a webpage submitted for AI " + "analysis) or by the ASSISTANT (e.g. a reference link in a response). " + "The role of the message is determined by HistoryDetail.type: " + "0 = USER, 1 = ASSISTANT. " + "softDeleted is inherited from the parent History record; DELETED means " + "the conversation was removed by the user but the link record remains " + "physically in the database and is forensically recoverable. " + "If this report contains no rows the HistoryDetailLink table was empty " + "in the examined database — this is normal for the current app version." + ), + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_links", + } +} + +import sqlite3 +import datetime +import html as html_module +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + +# --------------------------------------------------------------------------- +# Known mappings for the chatBotModel integer field. +# Source: FirestoreHistory.EngineTypes enum ordinals from decompiled APK source +# (com.scaleup.chatai.ui.conversation.FirestoreHistory). +# The integer stored in the database is the ENUM ORDINAL (0-based position), +# NOT the botId from chatbotAgentMap. These are two independent systems. +# Image-generating engines: 3 (legacy Bard ordinal reused), 4, 12, 13, 17. +# NOTE: ordinal 3 ('bard') was reused for image generation in newer app versions; +# presence of HistoryDetailImage records confirms image generation regardless of label. +# NOTE: ordinal 20 ('deepSeekR1') — if reasoningContent is NULL the actual API +# call may have used DeepSeek V3; the field reflects the UI selector, not the API. +# --------------------------------------------------------------------------- +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", # gpt-3.5 + 1: "GPT-5", # gpt-5 + 2: "GPT-4o", # gpt-4o + 3: "Bard / Image Gen.", # bard (legacy; reused for image generation) + 4: "Image Generator", # image-generator + 5: "Vision", # vision + 6: "Google Vision", # googleVision + 7: "Document", # document + 8: "LLaMA 2", # llama2 + 9: "Nova", # nova + 10: "Gemini", # gemini + 11: "Superbot", # superbot + 12: "Logo Generator", # logo-generator + 13: "Tattoo Generator", # tattoo-generator + 14: "Web Search", # webSearch + 15: "Claude", # claude + 16: "DeepSeek", # deepSeek + 17: "Signature Generator", # signature-generator + 18: "Mistral", # mistral + 19: "Grok", # grok + 20: "DeepSeek R1", # deepSeekR1 + 21: "AI Filter", # aiFilter + 22: "Voice Chat", # voiceChat + 23: "Snap & Solve", # snapAndSolve + 24: "Study Planner", # studyPlanner + 25: "Quiz Maker", # quizMaker + 26: "Essay Helper", # essayHelper + 27: "Gemini 3 Pro", # gemini-3-pro + 28: "GPT-5.1", # gpt-5.1 + 29: "GPT-4o Mini", # 4o-mini +} + +# --------------------------------------------------------------------------- +# SQL +# One row per HistoryDetailLink, enriched with parent message and +# conversation context. +# --------------------------------------------------------------------------- +QUERY = """ +SELECT + -- Link record + l.id AS link_id, + l.historyDetailID AS msg_id, + l.url AS link_url, + + -- Parent message context (HistoryDetail) + hd.historyID AS conv_id, + hd.type AS msg_type, + hd.text AS msg_text, + hd.createdAt AS msg_created_at, + + -- Parent conversation context (History) + h.UUID AS conv_uuid, + h.title AS conv_title, + h.chatBotModel AS chat_bot_model, + h.softDeleted AS soft_deleted + +FROM HistoryDetailLink l +INNER JOIN HistoryDetail hd ON hd.id = l.historyDetailID +INNER JOIN History h ON h.id = hd.historyID +ORDER BY l.id ASC +""" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _e(text): + return html_module.escape(str(text)) if text else "" + + +def _convert_ms_timestamp(ms): + if ms is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def _resolve_model(model_int): + if model_int is None: + return "Unknown" + name = CHAT_BOT_MODEL_MAP.get(model_int) + return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" + + +def _format_soft_deleted(value): + return "DELETED" if value == 1 else "No" + + +def _format_role(type_int): + return {0: "USER", 1: "ASSISTANT"}.get(type_int, f"UNKNOWN ({type_int})") + + +def _build_link_cell(url): + """Render a URL as a clearly labelled clickable anchor.""" + if not url: + return "" + return ( + f'
' + f' 🔗
' + f' {_e(url)}' + f"
" + ) + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def get_nova_chatbot_links(files_found, report_folder, seeker, wrap_text): + """ + Entry point for the nova_chatbot_links artifact. + + Extracts every HistoryDetailLink record enriched with parent message and + conversation context. Outputs HTML report, TSV, and timeline. + Handles an empty HistoryDetailLink table gracefully. + """ + for file_found in files_found: + file_found = str(file_found) + if not file_found.endswith("chat-ai.db"): + continue + + try: + db = sqlite3.connect(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_links] Error reading {file_found}: {e}" + ) + continue + + # Gracefully handle an empty table — log and produce an empty report + # so the examiner knows the module ran and the table had no records. + if not rows_raw: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_links] HistoryDetailLink table is empty in {file_found}." + ) + report_name = "Nova AI Chatbot - HistoryDetailLinks" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + [ + "Link ID", + "Msg. ID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Msg. Role", + "Msg. Text", + "Msg. Timestamp (UTC)", + "Link URL", + ], + [], + file_found, + html_escape=False, + ) + report.end_artifact_report() + continue + + headers = [ + # Link identity + "Link ID", + "Msg. ID", + "Conv. ID", + # Conversation context + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + # Message context + "Msg. Role", + "Msg. Text", + "Msg. Timestamp (UTC)", + # Link (HTML rendered) + "Link URL", + ] + + html_rows = [] + tsv_rows = [] + + for row in rows_raw: + ( + link_id, + msg_id, + link_url, + conv_id, + msg_type, + msg_text, + msg_created_at, + conv_uuid, + conv_title, + chat_bot_model, + soft_deleted, + ) = row + + common = ( + link_id, + msg_id, + conv_id, + conv_uuid or "", + conv_title or "", + _resolve_model(chat_bot_model), + _format_soft_deleted(soft_deleted), + _format_role(msg_type), + msg_text or "", + _convert_ms_timestamp(msg_created_at), + ) + + html_rows.append(common + (_build_link_cell(link_url),)) + tsv_rows.append(common + (link_url or "",)) + + # HTML report + report_name = "Nova AI Chatbot - HistoryDetailLinks" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, html_rows, file_found, html_escape=False + ) + report.end_artifact_report() + + # TSV + scripts.ilapfuncs.tsv(report_folder, headers, tsv_rows, report_name, file_found) + + # Timeline (Msg. Timestamp, index 9) + scripts.ilapfuncs.timeline(report_folder, report_name, tsv_rows, headers) From abac85ba5dabaf61c7de4bb7c4b99ae4e386f728 Mon Sep 17 00:00:00 2001 From: Guilherme Guilherme Date: Tue, 19 May 2026 11:49:18 +0100 Subject: [PATCH 2/8] Add icons --- scripts/report_icons.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/report_icons.py b/scripts/report_icons.py index 857fed29..762c4414 100644 --- a/scripts/report_icons.py +++ b/scripts/report_icons.py @@ -38,6 +38,7 @@ 'default': 'user' }, 'AGGREGATE DICTIONARY': 'book', + 'AI CHATBOT - NOVA': 'message-circle', 'AIRDROP DISCOVERABLE': 'search', 'AIRDROP EMAILS': 'send', 'AIRDROP NUMBERS': 'smartphone', From c585946ba3b8c463a761d86aeb479a57bd066829 Mon Sep 17 00:00:00 2001 From: Guilherme Guilherme Date: Wed, 20 May 2026 19:58:10 +0100 Subject: [PATCH 3/8] Revert "Add icons" This reverts commit abac85ba5dabaf61c7de4bb7c4b99ae4e386f728. --- scripts/report_icons.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/report_icons.py b/scripts/report_icons.py index 762c4414..857fed29 100644 --- a/scripts/report_icons.py +++ b/scripts/report_icons.py @@ -38,7 +38,6 @@ 'default': 'user' }, 'AGGREGATE DICTIONARY': 'book', - 'AI CHATBOT - NOVA': 'message-circle', 'AIRDROP DISCOVERABLE': 'search', 'AIRDROP EMAILS': 'send', 'AIRDROP NUMBERS': 'smartphone', From eb345bb8a304e91bf01ad2edf3b09978867282e3 Mon Sep 17 00:00:00 2001 From: Guilherme Guilherme Date: Wed, 20 May 2026 19:59:54 +0100 Subject: [PATCH 4/8] Forget Changes --- .../artifacts/AIChatbotNovaConversations.py | 3 +- .../AIChatbotNovaHistoryDetailDocument.py | 1 - .../AIChatbotNovaHistoryDetailImage.py | 19 +- scripts/artifacts/AIChatbotNovaMediastore.py | 216 ++++++++++++++++++ scripts/report_icons.py | 1 + 5 files changed, 227 insertions(+), 13 deletions(-) create mode 100644 scripts/artifacts/AIChatbotNovaMediastore.py diff --git a/scripts/artifacts/AIChatbotNovaConversations.py b/scripts/artifacts/AIChatbotNovaConversations.py index f2679b8e..b8d71704 100644 --- a/scripts/artifacts/AIChatbotNovaConversations.py +++ b/scripts/artifacts/AIChatbotNovaConversations.py @@ -39,7 +39,6 @@ } import os -import shutil import sqlite3 import datetime import html as html_module @@ -200,7 +199,7 @@ def _build_image_html(msg_type, img_urls, img_prompts, img_states, resolved_file parts = [] for i, url in enumerate(urls): prompt = prompts[i] if i < len(prompts) else "" - filename = resolved_filenames[i] if i < len(resolved_filenames) else None + # filename = resolved_filenames[i] if i < len(resolved_filenames) else None cell = '
' diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py index 4d1c7aef..750a8578 100644 --- a/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py @@ -40,7 +40,6 @@ } import os -import shutil import sqlite3 import datetime import html as html_module diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py index c322efd5..bcde5bd6 100644 --- a/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py @@ -41,7 +41,6 @@ } import os -import shutil import sqlite3 import datetime import html as html_module @@ -321,7 +320,7 @@ def get_nova_chatbot_images(files_found, report_folder, seeker, wrap_text): soft_deleted, ) = row - filename = _resolve_image_file(img_url, seeker, images_dir) + # filename = _resolve_image_file(img_url, seeker, images_dir) common = ( img_id, @@ -344,17 +343,17 @@ def get_nova_chatbot_images(files_found, report_folder, seeker, wrap_text): pipeline, style_id, msg_type, - filename, + # filename ) # For TSV we also include the origin (derived from msg_type) - origin_label = ( - "User-submitted" - if msg_type == 0 - else "AI-generated" - if msg_type == 1 - else "Unknown" - ) + # origin_label = ( + # "User-submitted" + # if msg_type == 0 + # else "AI-generated" + # if msg_type == 1 + # else "Unknown" + # ) html_rows.append( common diff --git a/scripts/artifacts/AIChatbotNovaMediastore.py b/scripts/artifacts/AIChatbotNovaMediastore.py new file mode 100644 index 00000000..0d518882 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaMediastore.py @@ -0,0 +1,216 @@ +__artifacts_v2__ = { + "nova_mediastore": { + "name": "Nova AI Chatbot - MediaStore Aggregation", + "description": ( + "Extracts and correlates metadata for files created, downloaded, or shared " + "by the Nova AI Chatbot app that are cataloged inside Android's MediaStore database. " + "By querying the central system media provider index, this module resolves virtualized " + "Scoped Storage paths and identifies artifacts physically saved across shared system " + "folders (such as /sdcard/Movies/ or /sdcard/Download/) where owner_package_name matches " + "the Nova AI identifier. Images and videos are displayed as clickable thumbnails with metadata." + ), + "author": "Guilherme Guilherme", + "version": "1.0", + "date": "2026-05-20", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Target database: com.android.providers.media/databases/external.db. " + "The module parses the 'files' table, pulling records explicitly linked to " + "'com.scaleup.chatai' via the owner_package_name attribute. " + "It establishes clickable previews for media rows directly to the file's raw path." + ), + "paths": ("*/com.android.providers.media/databases/external.db*",), + "function": "get_nova_mediastore", + } +} + +import os +import csv +import datetime +import html as html_module +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs +from scripts.ilapfuncs import open_sqlite_db_readonly + + +def _e(text): + return html_module.escape(str(text)) if text else "" + + +def _convert_timestamp(ts_sec): + if ts_sec is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ts_sec).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ts_sec) + + +def _format_file_size(size_bytes): + if size_bytes is None: + return "" + try: + size_bytes = int(size_bytes) + if size_bytes < 1024: + return f"{size_bytes} B" + elif size_bytes < 1024**2: + return f"{size_bytes / 1024:.1f} KB" + elif size_bytes < 1024**2: + return f"{size_bytes / (1024**2):.1f} MB" + else: + return f"{size_bytes / (1024**3):.2f} GB" + except (ValueError, TypeError): + return str(size_bytes) + + +def get_nova_mediastore(files_found, report_folder, seeker, wrap_text): + """ + Entry point for the nova_mediastore artifact. + Queries the Android MediaProvider system database to locate all indexed entries + belonging to Nova AI, tracking down actual storage paths of media items. + """ + db_file = None + for file_found in files_found: + if file_found.endswith("external.db"): + db_file = file_found + break + + if not db_file: + scripts.ilapfuncs.logfunc( + "[nova_mediastore] MediaStore 'external.db' database not found in extraction." + ) + return + + try: + db = open_sqlite_db_readonly(db_file) + cursor = db.cursor() + # Querying MediaStore files schema filtering specifically by Nova package name + cursor.execute( + """ + SELECT + _id, + _data, + _size, + date_added, + date_modified, + mime_type, + title + FROM files + WHERE owner_package_name = 'com.scaleup.chatai' + ORDER BY date_added DESC + """ + ) + all_rows = cursor.fetchall() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_mediastore] Failed to query external.db: {e}" + ) + return + + if not all_rows: + scripts.ilapfuncs.logfunc( + "[nova_mediastore] No entries found for package 'com.scaleup.chatai' inside MediaStore." + ) + db.close() + return + + headers = [ + "Media & Preview", + "Physical Storage Path", + "File Size", + "Date Added (UTC)", + "Date Modified (UTC)", + "MIME Type", + ] + html_rows = [] + tsv_rows = [] + + for row in all_rows: + media_id = row[0] + raw_path = str(row[1]) if row[1] else "" + size_bytes = row[2] + added_ts = row[3] + modified_ts = row[4] + mime_type = str(row[5]) if row[5] else "" + title = str(row[6]) if row[6] else "Unknown" + + size_str = _format_file_size(size_bytes) + added_str = _convert_timestamp(added_ts) + modified_str = _convert_timestamp(modified_ts) + + # Build absolute URL link to the mapped data file location + abs_url = "file://" + os.path.abspath(raw_path) + base_name = os.path.basename(raw_path) if raw_path else title + + # Create a preview column based on file MIME type categorization + if mime_type.startswith("image/"): + preview_html = ( + f'
' + f' ' + f' {_e(base_name)}' + f"
" + f" {_e(base_name)}" + f"
" + ) + elif mime_type.startswith("video/"): + preview_html = ( + f'
' + f'
" + f' {_e(base_name)} (🎬 Video)' + f"
" + ) + else: + # For documents or audio recordings, output a non-media generic folder block representation + preview_html = ( + f'
' + f' 📄
' + f' {_e(base_name)}' + f"
" + ) + + html_rows.append( + (preview_html, raw_path, size_str, added_str, modified_str, mime_type) + ) + tsv_rows.append( + (base_name, raw_path, size_str, added_str, modified_str, mime_type) + ) + + db.close() + + # Generate HTML report + report_name = "Nova AI Chatbot - MediaStore Aggregation" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, html_rows, db_file, html_escape=False + ) + report.end_artifact_report() + + # Generate TSV Export + tsv_path = os.path.join(report_folder, f"{report_name}.tsv") + with open(tsv_path, "w", newline="", encoding="utf-8") as tsvfile: + writer = csv.writer(tsvfile, delimiter="\t") + writer.writerow( + [ + "Filename", + "Physical Storage Path", + "File Size", + "Date Added (UTC)", + "Date Modified (UTC)", + "MIME Type", + ] + ) + writer.writerows(tsv_rows) + + scripts.ilapfuncs.logfunc( + f"[nova_mediastore] Successfully mapped {len(all_rows)} files via MediaStore indexing using file:// URLs." + ) diff --git a/scripts/report_icons.py b/scripts/report_icons.py index 857fed29..762c4414 100644 --- a/scripts/report_icons.py +++ b/scripts/report_icons.py @@ -38,6 +38,7 @@ 'default': 'user' }, 'AGGREGATE DICTIONARY': 'book', + 'AI CHATBOT - NOVA': 'message-circle', 'AIRDROP DISCOVERABLE': 'search', 'AIRDROP EMAILS': 'send', 'AIRDROP NUMBERS': 'smartphone', From fc8a36a063a8291202ff32183281859711de8ac2 Mon Sep 17 00:00:00 2001 From: Guilherme Guilherme Date: Wed, 20 May 2026 20:00:27 +0100 Subject: [PATCH 5/8] Revert "Forget Changes" This reverts commit eb345bb8a304e91bf01ad2edf3b09978867282e3. --- .../artifacts/AIChatbotNovaConversations.py | 3 +- .../AIChatbotNovaHistoryDetailDocument.py | 1 + .../AIChatbotNovaHistoryDetailImage.py | 19 +- scripts/artifacts/AIChatbotNovaMediastore.py | 216 ------------------ scripts/report_icons.py | 1 - 5 files changed, 13 insertions(+), 227 deletions(-) delete mode 100644 scripts/artifacts/AIChatbotNovaMediastore.py diff --git a/scripts/artifacts/AIChatbotNovaConversations.py b/scripts/artifacts/AIChatbotNovaConversations.py index b8d71704..f2679b8e 100644 --- a/scripts/artifacts/AIChatbotNovaConversations.py +++ b/scripts/artifacts/AIChatbotNovaConversations.py @@ -39,6 +39,7 @@ } import os +import shutil import sqlite3 import datetime import html as html_module @@ -199,7 +200,7 @@ def _build_image_html(msg_type, img_urls, img_prompts, img_states, resolved_file parts = [] for i, url in enumerate(urls): prompt = prompts[i] if i < len(prompts) else "" - # filename = resolved_filenames[i] if i < len(resolved_filenames) else None + filename = resolved_filenames[i] if i < len(resolved_filenames) else None cell = '
' diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py index 750a8578..4d1c7aef 100644 --- a/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py @@ -40,6 +40,7 @@ } import os +import shutil import sqlite3 import datetime import html as html_module diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py index bcde5bd6..c322efd5 100644 --- a/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py @@ -41,6 +41,7 @@ } import os +import shutil import sqlite3 import datetime import html as html_module @@ -320,7 +321,7 @@ def get_nova_chatbot_images(files_found, report_folder, seeker, wrap_text): soft_deleted, ) = row - # filename = _resolve_image_file(img_url, seeker, images_dir) + filename = _resolve_image_file(img_url, seeker, images_dir) common = ( img_id, @@ -343,17 +344,17 @@ def get_nova_chatbot_images(files_found, report_folder, seeker, wrap_text): pipeline, style_id, msg_type, - # filename + filename, ) # For TSV we also include the origin (derived from msg_type) - # origin_label = ( - # "User-submitted" - # if msg_type == 0 - # else "AI-generated" - # if msg_type == 1 - # else "Unknown" - # ) + origin_label = ( + "User-submitted" + if msg_type == 0 + else "AI-generated" + if msg_type == 1 + else "Unknown" + ) html_rows.append( common diff --git a/scripts/artifacts/AIChatbotNovaMediastore.py b/scripts/artifacts/AIChatbotNovaMediastore.py deleted file mode 100644 index 0d518882..00000000 --- a/scripts/artifacts/AIChatbotNovaMediastore.py +++ /dev/null @@ -1,216 +0,0 @@ -__artifacts_v2__ = { - "nova_mediastore": { - "name": "Nova AI Chatbot - MediaStore Aggregation", - "description": ( - "Extracts and correlates metadata for files created, downloaded, or shared " - "by the Nova AI Chatbot app that are cataloged inside Android's MediaStore database. " - "By querying the central system media provider index, this module resolves virtualized " - "Scoped Storage paths and identifies artifacts physically saved across shared system " - "folders (such as /sdcard/Movies/ or /sdcard/Download/) where owner_package_name matches " - "the Nova AI identifier. Images and videos are displayed as clickable thumbnails with metadata." - ), - "author": "Guilherme Guilherme", - "version": "1.0", - "date": "2026-05-20", - "requirements": "none", - "category": "AI Chatbot - Nova", - "notes": ( - "Target database: com.android.providers.media/databases/external.db. " - "The module parses the 'files' table, pulling records explicitly linked to " - "'com.scaleup.chatai' via the owner_package_name attribute. " - "It establishes clickable previews for media rows directly to the file's raw path." - ), - "paths": ("*/com.android.providers.media/databases/external.db*",), - "function": "get_nova_mediastore", - } -} - -import os -import csv -import datetime -import html as html_module -from scripts.artifact_report import ArtifactHtmlReport -import scripts.ilapfuncs -from scripts.ilapfuncs import open_sqlite_db_readonly - - -def _e(text): - return html_module.escape(str(text)) if text else "" - - -def _convert_timestamp(ts_sec): - if ts_sec is None: - return "" - try: - return datetime.datetime.utcfromtimestamp(ts_sec).strftime( - "%Y-%m-%d %H:%M:%S UTC" - ) - except (OSError, OverflowError, ValueError): - return str(ts_sec) - - -def _format_file_size(size_bytes): - if size_bytes is None: - return "" - try: - size_bytes = int(size_bytes) - if size_bytes < 1024: - return f"{size_bytes} B" - elif size_bytes < 1024**2: - return f"{size_bytes / 1024:.1f} KB" - elif size_bytes < 1024**2: - return f"{size_bytes / (1024**2):.1f} MB" - else: - return f"{size_bytes / (1024**3):.2f} GB" - except (ValueError, TypeError): - return str(size_bytes) - - -def get_nova_mediastore(files_found, report_folder, seeker, wrap_text): - """ - Entry point for the nova_mediastore artifact. - Queries the Android MediaProvider system database to locate all indexed entries - belonging to Nova AI, tracking down actual storage paths of media items. - """ - db_file = None - for file_found in files_found: - if file_found.endswith("external.db"): - db_file = file_found - break - - if not db_file: - scripts.ilapfuncs.logfunc( - "[nova_mediastore] MediaStore 'external.db' database not found in extraction." - ) - return - - try: - db = open_sqlite_db_readonly(db_file) - cursor = db.cursor() - # Querying MediaStore files schema filtering specifically by Nova package name - cursor.execute( - """ - SELECT - _id, - _data, - _size, - date_added, - date_modified, - mime_type, - title - FROM files - WHERE owner_package_name = 'com.scaleup.chatai' - ORDER BY date_added DESC - """ - ) - all_rows = cursor.fetchall() - except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_mediastore] Failed to query external.db: {e}" - ) - return - - if not all_rows: - scripts.ilapfuncs.logfunc( - "[nova_mediastore] No entries found for package 'com.scaleup.chatai' inside MediaStore." - ) - db.close() - return - - headers = [ - "Media & Preview", - "Physical Storage Path", - "File Size", - "Date Added (UTC)", - "Date Modified (UTC)", - "MIME Type", - ] - html_rows = [] - tsv_rows = [] - - for row in all_rows: - media_id = row[0] - raw_path = str(row[1]) if row[1] else "" - size_bytes = row[2] - added_ts = row[3] - modified_ts = row[4] - mime_type = str(row[5]) if row[5] else "" - title = str(row[6]) if row[6] else "Unknown" - - size_str = _format_file_size(size_bytes) - added_str = _convert_timestamp(added_ts) - modified_str = _convert_timestamp(modified_ts) - - # Build absolute URL link to the mapped data file location - abs_url = "file://" + os.path.abspath(raw_path) - base_name = os.path.basename(raw_path) if raw_path else title - - # Create a preview column based on file MIME type categorization - if mime_type.startswith("image/"): - preview_html = ( - f'
' - f' ' - f' {_e(base_name)}' - f"
" - f" {_e(base_name)}" - f"
" - ) - elif mime_type.startswith("video/"): - preview_html = ( - f'
' - f'
" - f' {_e(base_name)} (🎬 Video)' - f"
" - ) - else: - # For documents or audio recordings, output a non-media generic folder block representation - preview_html = ( - f'
' - f' 📄
' - f' {_e(base_name)}' - f"
" - ) - - html_rows.append( - (preview_html, raw_path, size_str, added_str, modified_str, mime_type) - ) - tsv_rows.append( - (base_name, raw_path, size_str, added_str, modified_str, mime_type) - ) - - db.close() - - # Generate HTML report - report_name = "Nova AI Chatbot - MediaStore Aggregation" - report = ArtifactHtmlReport(report_name) - report.start_artifact_report(report_folder, report_name) - report.add_script() - report.write_artifact_data_table( - headers, html_rows, db_file, html_escape=False - ) - report.end_artifact_report() - - # Generate TSV Export - tsv_path = os.path.join(report_folder, f"{report_name}.tsv") - with open(tsv_path, "w", newline="", encoding="utf-8") as tsvfile: - writer = csv.writer(tsvfile, delimiter="\t") - writer.writerow( - [ - "Filename", - "Physical Storage Path", - "File Size", - "Date Added (UTC)", - "Date Modified (UTC)", - "MIME Type", - ] - ) - writer.writerows(tsv_rows) - - scripts.ilapfuncs.logfunc( - f"[nova_mediastore] Successfully mapped {len(all_rows)} files via MediaStore indexing using file:// URLs." - ) diff --git a/scripts/report_icons.py b/scripts/report_icons.py index 762c4414..857fed29 100644 --- a/scripts/report_icons.py +++ b/scripts/report_icons.py @@ -38,7 +38,6 @@ 'default': 'user' }, 'AGGREGATE DICTIONARY': 'book', - 'AI CHATBOT - NOVA': 'message-circle', 'AIRDROP DISCOVERABLE': 'search', 'AIRDROP EMAILS': 'send', 'AIRDROP NUMBERS': 'smartphone', From da5712bc49b0ef2c084b8a1397aa1d476058bc48 Mon Sep 17 00:00:00 2001 From: Guilherme Guilherme Date: Wed, 20 May 2026 23:38:47 +0100 Subject: [PATCH 6/8] Add User Media Submissions --- .../artifacts/AIChatbotNovaCachedImages.py | 71 +++- scripts/artifacts/AIChatbotNovaMediastore.py | 381 ++++++++++++++++++ scripts/report_icons.py | 1 + 3 files changed, 438 insertions(+), 15 deletions(-) create mode 100644 scripts/artifacts/AIChatbotNovaMediastore.py diff --git a/scripts/artifacts/AIChatbotNovaCachedImages.py b/scripts/artifacts/AIChatbotNovaCachedImages.py index f58839ce..fa782d1e 100644 --- a/scripts/artifacts/AIChatbotNovaCachedImages.py +++ b/scripts/artifacts/AIChatbotNovaCachedImages.py @@ -10,19 +10,22 @@ "Each image is displayed as a clickable thumbnail with file metadata." ), "author": "Guilherme Guilherme", - "version": "1.3", - "date": "2026-05-03", + "version": "1.4", + "date": "2026-05-20", "requirements": "none", "category": "AI Chatbot - Nova", "notes": ( "The Glide disk cache location: cache/image_manager_disk_cache/*.0. " "Each .0 file is a raw JPEG. The filename is a SHA-256 hash of the signed Firebase URL. " - "The module directly links to the original file using absolute paths. " + "The module searches for the cache directory using multiple fallback paths. " "For the preview to work, the report must be opened on the same computer that extracted " - "the data, and the browser must allow file:// links (most do when the report is also " - "opened from a file:// location)." + "the data, and the browser must allow file:// links." + ), + "paths": ( + "*/com.scaleup.chatai/cache/image_manager_disk_cache", + "*/data/data/com.scaleup.chatai/cache/image_manager_disk_cache", + "*/*/com.scaleup.chatai/cache/image_manager_disk_cache", ), - "paths": ("*/com.scaleup.chatai/cache/image_manager_disk_cache",), "function": "get_nova_cache_images", } } @@ -75,23 +78,66 @@ def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): """ # Collect all unique cache directories from the glob matches cache_dirs = set() + for path in files_found: path = str(path) if os.path.isdir(path): cache_dirs.add(path) else: parent = os.path.dirname(path) - cache_dirs.add(parent) + if os.path.isdir(parent): + cache_dirs.add(parent) + + # If no cache directories found by glob, try manual fallback paths + if not cache_dirs: + extraction_root = getattr(seeker, "search_dir", "") + scripts.ilapfuncs.logfunc( + f"[nova_cache_images] Searching for cache in: {extraction_root}" + ) + + # Try common paths + fallback_paths = [ + os.path.join( + extraction_root, + "data", + "data", + "com.scaleup.chatai", + "cache", + "image_manager_disk_cache", + ), + os.path.join( + extraction_root, + "data", + "com.scaleup.chatai", + "cache", + "image_manager_disk_cache", + ), + os.path.join( + extraction_root, + "com.scaleup.chatai", + "cache", + "image_manager_disk_cache", + ), + ] + + for fb_path in fallback_paths: + if os.path.isdir(fb_path): + cache_dirs.add(fb_path) + scripts.ilapfuncs.logfunc( + f"[nova_cache_images] Found cache via fallback: {fb_path}" + ) + break if not cache_dirs: scripts.ilapfuncs.logfunc("[nova_cache_images] No cache directory found.") return - all_images = [] # list of dict with metadata and absolute path + all_images = [] for cache_dir in cache_dirs: if not os.path.isdir(cache_dir): continue + scripts.ilapfuncs.logfunc(f"[nova_cache_images] Scanning: {cache_dir}") for fname in os.listdir(cache_dir): if not fname.endswith(".0"): continue @@ -117,7 +163,6 @@ def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): all_images.sort(key=lambda x: x["mtime"], reverse=True) - # Prepare HTML rows headers = [ "Thumbnail & Filename", "File Size", @@ -128,10 +173,8 @@ def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): tsv_rows = [] for img in all_images: - # Convert absolute path to file:// URL abs_url = "file://" + os.path.abspath(img["abs_path"]) - # For display, use the basename as label - display_name = f"{img['original_name']}" + display_name = img["original_name"] thumbnail_html = ( f'
' f' ' @@ -148,7 +191,6 @@ def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): html_rows.append((thumbnail_html, size_str, mtime_str, img["original_name"])) tsv_rows.append((display_name, size_str, mtime_str, img["original_name"])) - # Generate HTML report directly inside report_folder (top-level _HTML) report_name = "Nova AI Chatbot - Cached Images" report = ArtifactHtmlReport(report_name) report.start_artifact_report(report_folder, report_name) @@ -158,7 +200,6 @@ def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): ) report.end_artifact_report() - # TSV export tsv_path = os.path.join(report_folder, f"{report_name}.tsv") with open(tsv_path, "w", newline="", encoding="utf-8") as tsvfile: writer = csv.writer(tsvfile, delimiter="\t") @@ -168,5 +209,5 @@ def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): writer.writerows(tsv_rows) scripts.ilapfuncs.logfunc( - f"[nova_cache_images] Displayed {len(all_images)} cached images using file:// links." + f"[nova_cache_images] Displayed {len(all_images)} cached images." ) diff --git a/scripts/artifacts/AIChatbotNovaMediastore.py b/scripts/artifacts/AIChatbotNovaMediastore.py new file mode 100644 index 00000000..dc0c65b6 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaMediastore.py @@ -0,0 +1,381 @@ +__artifacts_v2__ = { + "nova_user_submissions": { + "name": "Nova AI Chatbot - User Media Submissions", + "description": ( + "Identifies ALL media files submitted by the user to Nova AI Chatbot. " + "This includes uploaded documents and photos captured using the in-app camera. " + "The artifact lists recovered filenames, user context, timestamps, MIME types, " + "and resolved physical paths from the extracted filesystem." + ), + "author": "Guilherme Guilherme", + "version": "2.7", + "date": "2026-05-20", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Sources: chat-ai.db and Android MediaStore databases. " + "Resolves extracted filesystem paths and falls back to filename search when needed. " + "This module does not embed previews; it focuses on metadata and physical path reporting." + ), + "paths": ( + "*/com.scaleup.chatai/databases/chat-ai.db", + "*/com.android.providers.media/databases/external*.db", + "*/com.google.android.providers.media.module/databases/external*.db", + ), + "function": "get_nova_user_submissions", + } +} + +import os +import csv +import sqlite3 +import datetime +import html as html_module +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + + +def _e(text): + return html_module.escape(str(text)) if text else "" + + +def _convert_ms_timestamp(ms): + if ms is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except Exception: + return str(ms) + + +def _convert_sec_timestamp(ts): + if ts is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(int(ts)).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except Exception: + return str(ts) + + +def _format_file_size(size_bytes): + if size_bytes is None: + return "" + try: + size_bytes = int(size_bytes) + if size_bytes < 1024: + return f"{size_bytes} B" + if size_bytes < 1024**2: + return f"{size_bytes / 1024:.1f} KB" + if size_bytes < 1024**3: + return f"{size_bytes / (1024**2):.1f} MB" + return f"{size_bytes / (1024**3):.2f} GB" + except Exception: + return str(size_bytes) + + +def _normalize_media_path(media_path): + if not media_path: + return None + p = media_path.replace("\\", "/") + if p.startswith("/storage/emulated/0/"): + p = p.replace("/storage/emulated/0/", "/data/media/0/", 1) + return p + + +def _resolve_extraction_path(extraction_root, media_path): + if not extraction_root or not media_path: + return None + + normalized = _normalize_media_path(media_path) + candidate = os.path.normpath(os.path.join(extraction_root, normalized.lstrip("/"))) + if os.path.exists(candidate): + return candidate + + fname = os.path.basename(normalized) + filename_candidate = os.path.normpath(os.path.join(extraction_root, fname)) + if os.path.exists(filename_candidate): + return filename_candidate + + data_media_candidate = os.path.normpath( + os.path.join(extraction_root, "data/media/0", fname) + ) + if os.path.exists(data_media_candidate): + return data_media_candidate + + for root, dirs, files in os.walk(extraction_root): + if fname in files: + return os.path.join(root, fname) + + return None + + +def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): + nova_db = None + media_db = None + + for file_found in files_found: + file_found = str(file_found) + if "chat-ai.db" in file_found: + nova_db = file_found + elif "external" in file_found and file_found.endswith(".db"): + media_db = file_found + + if not nova_db: + scripts.ilapfuncs.logfunc("[nova_user_submissions] Nova database not found.") + return + + extraction_root = getattr(seeker, "search_dir", "") + scripts.ilapfuncs.logfunc( + f"[nova_user_submissions] Extraction root: {extraction_root}" + ) + + media_lookup = {} + + if media_db and os.path.exists(media_db): + try: + db = sqlite3.connect(media_db) + cursor = db.cursor() + cursor.execute(""" + SELECT _display_name, _data, _size, date_added, mime_type + FROM files + WHERE _data IS NOT NULL + """) + for ( + display_name, + data_path, + size, + date_added, + mime_type, + ) in cursor.fetchall(): + normalized_path = _normalize_media_path(data_path or "") + if not normalized_path: + continue + + if not any( + x in normalized_path.lower() + for x in ["/download/", "/nova/", "/com.scaleup.chatai/"] + ): + continue + + key = (display_name or os.path.basename(normalized_path)).lower() + media_lookup[key] = { + "media_path": normalized_path, + "extraction_path": _resolve_extraction_path( + extraction_root, normalized_path + ), + "size": size, + "timestamp": date_added, + "mime": mime_type or "", + } + + db.close() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_user_submissions] Error reading MediaStore: {e}" + ) + + all_items = [] + + query_docs = """ + SELECT + hdd.name, + hdd.url, + hdd.mimeType, + hdd.size, + hd.text, + hd.createdAt, + h.title + FROM HistoryDetailDocument hdd + INNER JOIN HistoryDetail hd ON hd.id = hdd.historyDetailID + INNER JOIN History h ON h.id = hd.historyID + WHERE hd.type = 0 + ORDER BY hd.createdAt DESC + """ + + try: + db = sqlite3.connect(nova_db) + cursor = db.cursor() + cursor.execute(query_docs) + for ( + file_name, + firebase_url, + mime_type, + size_db, + message, + created_at, + conversation, + ) in cursor.fetchall(): + media_match = media_lookup.get((file_name or "").lower()) + all_items.append( + { + "type": "submitted_document", + "name": file_name or "Unknown", + "firebase_url": firebase_url or "", + "mime": mime_type or "", + "size_db": size_db, + "message": message or "", + "timestamp": created_at, + "conversation": conversation or "Untitled", + "media_path": media_match["media_path"] if media_match else None, + "extraction_path": media_match["extraction_path"] + if media_match + else None, + } + ) + db.close() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_user_submissions] Error querying documents: {e}" + ) + + if media_db and os.path.exists(media_db): + try: + db = sqlite3.connect(media_db) + cursor = db.cursor() + cursor.execute(""" + SELECT _display_name, _data, _size, date_added, mime_type + FROM files + WHERE bucket_display_name = 'Nova' OR _data LIKE '%/Nova/%' + ORDER BY date_added DESC + """) + for ( + display_name, + data_path, + size, + date_added, + mime_type, + ) in cursor.fetchall(): + normalized_path = _normalize_media_path(data_path or "") + extraction_path = _resolve_extraction_path( + extraction_root, normalized_path + ) + all_items.append( + { + "type": "camera_photo", + "name": display_name + or os.path.basename(normalized_path or "") + or "Unknown", + "mime": mime_type or "image/jpeg", + "size_db": size, + "message": "", + "timestamp": date_added, + "conversation": "Camera photo (not associated with a message)", + "media_path": normalized_path, + "extraction_path": extraction_path, + } + ) + db.close() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_user_submissions] Error querying camera photos: {e}" + ) + + deduped = [] + seen = set() + for item in all_items: + key = (item["name"].lower(), item.get("media_path") or "") + if key in seen: + continue + seen.add(key) + deduped.append(item) + + deduped.sort(key=lambda x: x.get("timestamp", 0) or 0, reverse=True) + + if not deduped: + scripts.ilapfuncs.logfunc("[nova_user_submissions] No media found.") + return + + headers = ( + "File Name", + "Type", + "User Message / Context", + "Conversation", + "Date (UTC)", + "Size", + "MIME Type", + "Physical Path", + ) + + rows = [] + tsv_rows = [] + + for item in deduped: + type_label = ( + "📤 Submitted to AI" + if item["type"] == "submitted_document" + else "📷 Camera Photo" + ) + context = item.get("message") or "No message recorded" + if isinstance(context, str) and len(context) > 150: + context = _e(context[:150] + "...") + date_str = ( + _convert_ms_timestamp(item["timestamp"]) + if item["type"] == "submitted_document" + else _convert_sec_timestamp(item["timestamp"]) + ) + size_str = _format_file_size(item.get("size_db")) + + if item.get("extraction_path") and os.path.exists(item["extraction_path"]): + physical_path = item["extraction_path"] + elif item.get("media_path"): + physical_path = item["media_path"] + else: + physical_path = "Cloud-only (Firebase Storage)" + + rows.append( + ( + _e(item["name"]), + type_label, + context, + _e(item["conversation"]), + date_str, + size_str, + _e(item.get("mime") or ""), + _e(physical_path), + ) + ) + + tsv_rows.append( + ( + item["name"], + type_label, + context, + item["conversation"], + date_str, + size_str, + item.get("mime") or "", + physical_path, + ) + ) + + report_name = "Nova AI Chatbot - User Media Submissions" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table(headers, rows, nova_db, html_escape=False) + report.end_artifact_report() + + tsv_path = os.path.join(report_folder, f"{report_name}.tsv") + with open(tsv_path, "w", newline="", encoding="utf-8") as tsvfile: + writer = csv.writer(tsvfile, delimiter="\t") + writer.writerow( + [ + "File Name", + "Type", + "User Message", + "Conversation", + "Date (UTC)", + "Size", + "MIME Type", + "Physical Path", + ] + ) + writer.writerows(tsv_rows) + + scripts.ilapfuncs.logfunc( + f"[nova_user_submissions] Found {len(deduped)} total items." + ) diff --git a/scripts/report_icons.py b/scripts/report_icons.py index 857fed29..762c4414 100644 --- a/scripts/report_icons.py +++ b/scripts/report_icons.py @@ -38,6 +38,7 @@ 'default': 'user' }, 'AGGREGATE DICTIONARY': 'book', + 'AI CHATBOT - NOVA': 'message-circle', 'AIRDROP DISCOVERABLE': 'search', 'AIRDROP EMAILS': 'send', 'AIRDROP NUMBERS': 'smartphone', From 5e366d80942d9c91d1457b59dc6c847ded4d2f5c Mon Sep 17 00:00:00 2001 From: Guilherme Guilherme Date: Thu, 21 May 2026 16:57:18 +0100 Subject: [PATCH 7/8] Fix Modules --- .../artifacts/AIChatbotNovaCachedImages.py | 237 ++------ .../artifacts/AIChatbotNovaConversations.py | 4 +- scripts/artifacts/AIChatbotNovaHistory.py | 368 +++++-------- .../artifacts/AIChatbotNovaHistoryDetail.py | 436 ++++++--------- .../AIChatbotNovaHistoryDetailDocument.py | 505 ++++++------------ .../AIChatbotNovaHistoryDetailImage.py | 440 +++++---------- .../AIChatbotNovaHistoryDetailLink.py | 6 +- scripts/artifacts/AIChatbotNovaMediastore.py | 474 ++++++---------- 8 files changed, 825 insertions(+), 1645 deletions(-) diff --git a/scripts/artifacts/AIChatbotNovaCachedImages.py b/scripts/artifacts/AIChatbotNovaCachedImages.py index fa782d1e..8ccf705d 100644 --- a/scripts/artifacts/AIChatbotNovaCachedImages.py +++ b/scripts/artifacts/AIChatbotNovaCachedImages.py @@ -1,213 +1,94 @@ __artifacts_v2__ = { "nova_cache_images": { - "name": "Nova AI Chatbot - Cached Images (Glide Disk Cache)", + "name": "Cached Images (Glide Disk Cache)", "description": ( - "Extracts all cached image files from the Nova AI Chatbot app's Glide disk cache " - "(cache/image_manager_disk_cache/*.0). These .0 files are raw JPEG images that were " - "downloaded from Firebase Storage and cached locally. The module embeds the original " - "cache file paths as file:// URLs in the HTML report, allowing direct preview from " - "the extracted data folder. No copying is performed, preserving forensic integrity. " - "Each image is displayed as a clickable thumbnail with file metadata." + "Extracts cached image files from the Nova AI Chatbot Glide disk cache " + "(cache/image_manager_disk_cache/*.0). These files are raw JPEG images " + "downloaded from Firebase Storage and cached locally." ), "author": "Guilherme Guilherme", - "version": "1.4", - "date": "2026-05-20", + "version": "2.0", + "date": "2026-05-21", "requirements": "none", "category": "AI Chatbot - Nova", - "notes": ( - "The Glide disk cache location: cache/image_manager_disk_cache/*.0. " - "Each .0 file is a raw JPEG. The filename is a SHA-256 hash of the signed Firebase URL. " - "The module searches for the cache directory using multiple fallback paths. " - "For the preview to work, the report must be opened on the same computer that extracted " - "the data, and the browser must allow file:// links." - ), + "notes": "Glide disk cache location: cache/image_manager_disk_cache/*.0.", "paths": ( - "*/com.scaleup.chatai/cache/image_manager_disk_cache", - "*/data/data/com.scaleup.chatai/cache/image_manager_disk_cache", - "*/*/com.scaleup.chatai/cache/image_manager_disk_cache", + "*/com.scaleup.chatai/cache/image_manager_disk_cache/*", + "*/data/data/com.scaleup.chatai/cache/image_manager_disk_cache/*", ), "function": "get_nova_cache_images", + "output_types": "standard", + "artifact_icon": "image", } } import os -import csv import datetime -import html as html_module +from datetime import timezone from scripts.artifact_report import ArtifactHtmlReport -import scripts.ilapfuncs +from scripts.ilapfuncs import logfunc, tsv, media_to_html -def _e(text): - return html_module.escape(str(text)) if text else "" +def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for Nova Cached Images") + data_list = [] -def _convert_timestamp(ts_sec): - if ts_sec is None: - return "" - try: - return datetime.datetime.utcfromtimestamp(ts_sec).strftime( - "%Y-%m-%d %H:%M:%S UTC" - ) - except (OSError, OverflowError, ValueError): - return str(ts_sec) + for file_found in files_found: + file_found = str(file_found) + if os.path.isdir(file_found): + continue + fname = os.path.basename(file_found) + if not fname.endswith(".0"): + continue -def _format_file_size(size_bytes): - if size_bytes is None: - return "" - try: - size_bytes = int(size_bytes) - if size_bytes < 1024: - return f"{size_bytes} B" - elif size_bytes < 1024**2: - return f"{size_bytes / 1024:.1f} KB" - elif size_bytes < 1024**3: - return f"{size_bytes / (1024**2):.1f} MB" - else: - return f"{size_bytes / (1024**3):.2f} GB" - except (ValueError, TypeError): - return str(size_bytes) + try: + stat = os.stat(file_found) + size_bytes = stat.st_size + # Modern, non-deprecated timezone conversion + mtime = datetime.datetime.fromtimestamp(stat.st_mtime, timezone.utc) + mtime_str = mtime.strftime("%Y-%m-%d %H:%M:%S UTC") -def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): - """ - Entry point for the nova_cache_images artifact. - Scans for *.0 files, and generates an HTML gallery with direct file:// links - to the original cache files. No copying is performed. - """ - # Collect all unique cache directories from the glob matches - cache_dirs = set() - - for path in files_found: - path = str(path) - if os.path.isdir(path): - cache_dirs.add(path) - else: - parent = os.path.dirname(path) - if os.path.isdir(parent): - cache_dirs.add(parent) - - # If no cache directories found by glob, try manual fallback paths - if not cache_dirs: - extraction_root = getattr(seeker, "search_dir", "") - scripts.ilapfuncs.logfunc( - f"[nova_cache_images] Searching for cache in: {extraction_root}" - ) - - # Try common paths - fallback_paths = [ - os.path.join( - extraction_root, - "data", - "data", - "com.scaleup.chatai", - "cache", - "image_manager_disk_cache", - ), - os.path.join( - extraction_root, - "data", - "com.scaleup.chatai", - "cache", - "image_manager_disk_cache", - ), - os.path.join( - extraction_root, - "com.scaleup.chatai", - "cache", - "image_manager_disk_cache", - ), - ] - - for fb_path in fallback_paths: - if os.path.isdir(fb_path): - cache_dirs.add(fb_path) - scripts.ilapfuncs.logfunc( - f"[nova_cache_images] Found cache via fallback: {fb_path}" - ) - break - - if not cache_dirs: - scripts.ilapfuncs.logfunc("[nova_cache_images] No cache directory found.") - return + # Mandatory framework call: copies images to output structure and populates LAVA tracking manifests + media_to_html(fname, file_found, report_folder) - all_images = [] + # Parse path so it consistently normalizes from the extraction /data node onward + normalized_path = file_found.replace("\\", "/") + if "/data/" in normalized_path: + display_path = "/data/" + normalized_path.split("/data/", 1)[1] + elif "data/data/" in normalized_path: + display_path = "/data/data/" + normalized_path.split("data/data/", 1)[1] + else: + display_path = normalized_path - for cache_dir in cache_dirs: - if not os.path.isdir(cache_dir): - continue - scripts.ilapfuncs.logfunc(f"[nova_cache_images] Scanning: {cache_dir}") - for fname in os.listdir(cache_dir): - if not fname.endswith(".0"): - continue - src_path = os.path.join(cache_dir, fname) - try: - stat = os.stat(src_path) - all_images.append( - { - "original_name": fname, - "abs_path": src_path, - "size": stat.st_size, - "mtime": stat.st_mtime, - } - ) - except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_cache_images] Error reading {src_path}: {e}" - ) - - if not all_images: - scripts.ilapfuncs.logfunc("[nova_cache_images] No .0 cache files found.") - return + data_list.append((fname, size_bytes, mtime_str, display_path)) - all_images.sort(key=lambda x: x["mtime"], reverse=True) + except Exception as e: + logfunc(f"[nova_cache_images] Error reading {file_found}: {e}") - headers = [ - "Thumbnail & Filename", - "File Size", - "Last Modified (UTC)", - "Original Cache Filename", - ] - html_rows = [] - tsv_rows = [] - - for img in all_images: - abs_url = "file://" + os.path.abspath(img["abs_path"]) - display_name = img["original_name"] - thumbnail_html = ( - f'
' - f' ' - f' {_e(display_name)}' - f"
" - f" {_e(display_name)}" - f"
" - ) - size_str = _format_file_size(img["size"]) - mtime_str = _convert_timestamp(img["mtime"]) - html_rows.append((thumbnail_html, size_str, mtime_str, img["original_name"])) - tsv_rows.append((display_name, size_str, mtime_str, img["original_name"])) - - report_name = "Nova AI Chatbot - Cached Images" + if not data_list: + logfunc("No Nova Cached Images data found.") + return + + report_name = "Cached Images" report = ArtifactHtmlReport(report_name) report.start_artifact_report(report_folder, report_name) report.add_script() + + headers = ( + "Original Cache Filename", + "File Size (Bytes)", + "Last Modified (UTC)", + "Path", + ) + + # HTML injection vulnerabilities are entirely eliminated by delegating escaping to the framework report.write_artifact_data_table( - headers, html_rows, "cache/image_manager_disk_cache", html_escape=False + headers, data_list, report_folder, table_id="NovaCacheImages", html_escape=True ) report.end_artifact_report() - tsv_path = os.path.join(report_folder, f"{report_name}.tsv") - with open(tsv_path, "w", newline="", encoding="utf-8") as tsvfile: - writer = csv.writer(tsvfile, delimiter="\t") - writer.writerow( - ["Filename", "File Size", "Last Modified (UTC)", "Original Cache Filename"] - ) - writer.writerows(tsv_rows) - - scripts.ilapfuncs.logfunc( - f"[nova_cache_images] Displayed {len(all_images)} cached images." - ) + tsv(report_folder, headers, data_list, report_name) + logfunc(f"[nova_cache_images] Displayed {len(data_list)} cached image entries.") diff --git a/scripts/artifacts/AIChatbotNovaConversations.py b/scripts/artifacts/AIChatbotNovaConversations.py index f2679b8e..3fdce765 100644 --- a/scripts/artifacts/AIChatbotNovaConversations.py +++ b/scripts/artifacts/AIChatbotNovaConversations.py @@ -1,6 +1,6 @@ __artifacts_v2__ = { "nova_chatbot_conversations": { - "name": "Nova AI Chatbot - Conversations (Full Detail)", + "name": "Conversations (Full Detail)", "description": ( "Reconstructs full conversations from the AI Chatbot - Nova app by joining " "History, HistoryDetail, HistoryDetailImage, HistoryDetailDocument, and " @@ -565,7 +565,7 @@ def get_nova_chatbot_conversations(files_found, report_folder, seeker, wrap_text tsv_rows.append(common + (img_tsv, doc_tsv, link_urls or "")) # HTML report - report_name = "Nova AI Chatbot - Conversations (Full Detail)" + report_name = "Conversations (Full Detail)" report = ArtifactHtmlReport(report_name) report.start_artifact_report(report_folder, report_name) report.add_script() diff --git a/scripts/artifacts/AIChatbotNovaHistory.py b/scripts/artifacts/AIChatbotNovaHistory.py index 9939b8cf..3ff37131 100644 --- a/scripts/artifacts/AIChatbotNovaHistory.py +++ b/scripts/artifacts/AIChatbotNovaHistory.py @@ -1,276 +1,188 @@ __artifacts_v2__ = { "nova_chatbot_history": { - "name": "Nova AI Chatbot - Conversation History", + "name": "Conversation History", "description": ( "Extracts the conversation index from the AI Chatbot - Nova app " - "(com.scaleup.chatai) from the History table. " - "Each row represents one conversation and includes the conversation title, " - "AI model used, starred and soft-deleted status, all relevant timestamps, " - "and sync metadata. Each row is further enriched with three summary columns " - "derived from HistoryDetail: total message count, timestamp of the last " - "message, and the text of the first user message — providing immediate " - "investigative context without requiring the full message detail report. " - "Soft-deleted conversations are flagged on every row." + "(com.scaleup.chatai) from the History table." ), "author": "Guilherme Guilherme", - "version": "0.2", - "date": "2025-04-27", + "version": "1.0", + "date": "2026-05-21", "requirements": "none", "category": "AI Chatbot - Nova", - "notes": ( - "Database: com.scaleup.chatai/databases/chat-ai.db. " - "All timestamps (createdAt, updatedAt, lastModifiedAt) are stored as Unix " - "milliseconds (INTEGER) and converted to UTC strings for display. " - "chatBotModel is an integer mapped to known AI model names where possible; " - "unknown values are shown as 'Unknown Model (N)'. " - "softDeleted = 1 indicates the conversation was deleted by the user but " - "remains physically present in the database and is forensically recoverable. " - "starred = 1 indicates the user bookmarked the conversation. " - "assistantId identifies a custom AI assistant persona assigned to the " - "conversation when not NULL. " - "captionHistoryId links to an associated caption or summary history entry " - "when present. " - "message_count, last_message_at, and first_user_message are aggregated " - "from HistoryDetail via LEFT JOIN so conversations with zero messages are " - "still returned. first_user_message reflects the earliest USER-role message " - "text (HistoryDetail.type = 0)." - ), + "notes": "Database: com.scaleup.chatai/databases/chat-ai.db", "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), "function": "get_nova_chatbot_history", + "output_types": "standard", + "artifact_icon": "message-square", } } -import sqlite3 import datetime +from datetime import timezone from scripts.artifact_report import ArtifactHtmlReport -import scripts.ilapfuncs +from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly -# --------------------------------------------------------------------------- -# Known mappings for the chatBotModel integer field. -# Source: FirestoreHistory.EngineTypes enum ordinals from decompiled APK source -# (com.scaleup.chatai.ui.conversation.FirestoreHistory). -# The integer stored in the database is the ENUM ORDINAL (0-based position), -# NOT the botId from chatbotAgentMap. These are two independent systems. -# Image-generating engines: 3 (legacy Bard ordinal reused), 4, 12, 13, 17. -# NOTE: ordinal 3 ('bard') was reused for image generation in newer app versions; -# presence of HistoryDetailImage records confirms image generation regardless of label. -# NOTE: ordinal 20 ('deepSeekR1') — if reasoningContent is NULL the actual API -# call may have used DeepSeek V3; the field reflects the UI selector, not the API. -# --------------------------------------------------------------------------- CHAT_BOT_MODEL_MAP = { - 0: "ChatGPT 3.5", # gpt-3.5 - 1: "GPT-5", # gpt-5 - 2: "GPT-4o", # gpt-4o - 3: "Bard / Image Gen.", # bard (legacy; reused for image generation) - 4: "Image Generator", # image-generator - 5: "Vision", # vision - 6: "Google Vision", # googleVision - 7: "Document", # document - 8: "LLaMA 2", # llama2 - 9: "Nova", # nova - 10: "Gemini", # gemini - 11: "Superbot", # superbot - 12: "Logo Generator", # logo-generator - 13: "Tattoo Generator", # tattoo-generator - 14: "Web Search", # webSearch - 15: "Claude", # claude - 16: "DeepSeek", # deepSeek - 17: "Signature Generator", # signature-generator - 18: "Mistral", # mistral - 19: "Grok", # grok - 20: "DeepSeek R1", # deepSeekR1 - 21: "AI Filter", # aiFilter - 22: "Voice Chat", # voiceChat - 23: "Snap & Solve", # snapAndSolve - 24: "Study Planner", # studyPlanner - 25: "Quiz Maker", # quizMaker - 26: "Essay Helper", # essayHelper - 27: "Gemini 3 Pro", # gemini-3-pro - 28: "GPT-5.1", # gpt-5.1 - 29: "GPT-4o Mini", # 4o-mini + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", } -# --------------------------------------------------------------------------- -# SQL -# One row per History entry, enriched with three summary columns from -# HistoryDetail via a LEFT JOIN + GROUP BY so conversations with zero -# messages are still returned. -# --------------------------------------------------------------------------- QUERY = """ SELECT - h.id AS conv_id, - h.UUID AS conv_uuid, - h.title AS title, - h.chatBotModel AS chat_bot_model, - h.assistantId AS assistant_id, - h.captionHistoryId AS caption_history_id, - h.starred AS starred, - h.softDeleted AS soft_deleted, - h.syncState AS sync_state, - h.syncRetryCount AS sync_retry_count, - h.createdAt AS created_at, - h.updatedAt AS updated_at, - h.lastModifiedAt AS last_modified_at, - COUNT(hd.id) AS message_count, - MAX(hd.createdAt) AS last_msg_ts, - MIN(CASE WHEN hd.type = 0 THEN hd.text END) AS first_user_msg + h.id, + h.UUID, + h.title, + h.chatBotModel, + h.assistantId, + h.captionHistoryId, + h.starred, + h.softDeleted, + h.syncState, + h.syncRetryCount, + h.createdAt, + h.updatedAt, + h.lastModifiedAt, + COUNT(hd.id), + MAX(hd.createdAt), + MIN(CASE WHEN hd.type = 0 THEN hd.text END) FROM History h -LEFT JOIN HistoryDetail hd - ON hd.historyID = h.id +LEFT JOIN HistoryDetail hd ON hd.historyID = h.id GROUP BY h.id ORDER BY h.createdAt ASC """ -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - def _convert_ms_timestamp(ms): - """Convert a Unix millisecond timestamp to a human-readable UTC string.""" + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" if ms is None: return "" try: - return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( "%Y-%m-%d %H:%M:%S UTC" ) except (OSError, OverflowError, ValueError): return str(ms) -def _resolve_model(model_int): - """Return a labelled model name, falling back to the raw integer for unknowns.""" - if model_int is None: - return "Unknown" - name = CHAT_BOT_MODEL_MAP.get(model_int) - return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" - - -def _format_soft_deleted(value): - """Return a clearly labelled string for the softDeleted field.""" - return "DELETED" if value == 1 else "No" - - -# --------------------------------------------------------------------------- -# Entry point -# --------------------------------------------------------------------------- - - def get_nova_chatbot_history(files_found, report_folder, seeker, wrap_text): - """ - Entry point for the nova_chatbot_history artifact. + logfunc("Processing data for Conversation History") - Queries the History table enriched with per-conversation summary data - from HistoryDetail. Outputs HTML report, TSV, and timeline. - """ + # Clean the file list of any SQLite journal artifacts + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + file_found = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) - for file_found in files_found: - file_found = str(file_found) + if not file_found: + logfunc("[nova_chatbot_history] Nova database file not found.") + return - if not file_found.endswith("chat-ai.db"): - continue - - try: - db = sqlite3.connect(file_found) - cursor = db.cursor() - cursor.execute(QUERY) - rows_raw = cursor.fetchall() - db.close() - - except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_history] Error reading {file_found}: {e}" - ) - continue - - if not rows_raw: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_history] No records found in {file_found}." + try: + db = open_sqlite_db_readonly(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_history] Error reading {file_found}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_history] No records found in {file_found}.") + return + + headers = ( + "Conv. ID", + "Conv. UUID", + "Title", + "AI Model", + "Assistant ID", + "Caption History ID", + "Starred", + "Soft Deleted", + "Sync State", + "Sync Retry Count", + "Created At (UTC)", + "Updated At (UTC)", + "Last Modified At (UTC)", + "Message Count", + "Last Message At (UTC)", + "First User Message", + ) + + rows = [] + for row in rows_raw: + model_int = row[3] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" ) - continue - headers = [ - # --- Identity --- - "Conv. ID", - "Conv. UUID", - "Title", - # --- Model --- - "AI Model", - "Assistant ID", - "Caption History ID", - # --- Flags --- - "Starred", - "Soft Deleted", - "Sync State", - "Sync Retry Count", - # --- Timestamps --- - "Created At (UTC)", - "Updated At (UTC)", - "Last Modified At (UTC)", - # --- Summary from HistoryDetail --- - "Message Count", - "Last Message At (UTC)", - "First User Message", - ] - - rows = [] - for row in rows_raw: + rows.append( ( - conv_id, - conv_uuid, - title, - chat_bot_model, - assistant_id, - caption_history_id, - starred, - soft_deleted, - sync_state, - sync_retry_count, - created_at, - updated_at, - last_modified_at, - message_count, - last_msg_ts, - first_user_msg, - ) = row - - rows.append( - ( - conv_id, - conv_uuid or "", - title or "", - _resolve_model(chat_bot_model), - assistant_id if assistant_id is not None else "", - caption_history_id or "", - "Yes" if starred else "No", - _format_soft_deleted(soft_deleted), - sync_state if sync_state is not None else "", - sync_retry_count if sync_retry_count is not None else "", - _convert_ms_timestamp(created_at), - _convert_ms_timestamp(updated_at), - _convert_ms_timestamp(last_modified_at), - message_count if message_count is not None else 0, - _convert_ms_timestamp(last_msg_ts), - first_user_msg or "", - ) + row[0], + row[1] or "", + row[2] or "", + model_name, + row[4] if row[4] is not None else "", + row[5] or "", + "Yes" if row[6] else "No", + "DELETED" if row[7] == 1 else "No", + row[8] if row[8] is not None else "", + row[9] if row[9] is not None else "", + _convert_ms_timestamp(row[10]), + _convert_ms_timestamp(row[11]), + _convert_ms_timestamp(row[12]), + row[13] if row[13] is not None else 0, + _convert_ms_timestamp(row[14]), + row[15] or "", ) - - # --- HTML report --- - report_name = "Nova AI Chatbot - History" - report = ArtifactHtmlReport(report_name) - report.start_artifact_report(report_folder, report_name) - report.add_script() - report.write_artifact_data_table( - headers, - rows, - file_found, - html_escape=True, ) - report.end_artifact_report() - # --- TSV output --- - scripts.ilapfuncs.tsv(report_folder, headers, rows, report_name, file_found) + report_name = "History" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # html_escape=True hands escaping entirely to the framework backend safely + report.write_artifact_data_table(headers, rows, file_found, html_escape=True) + report.end_artifact_report() - # --- Timeline (uses Created At, index 10) --- - scripts.ilapfuncs.timeline(report_folder, report_name, rows, headers) + tsv(report_folder, headers, rows, report_name, file_found) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_history] Processed {len(rows)} conversation history records." + ) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetail.py b/scripts/artifacts/AIChatbotNovaHistoryDetail.py index 0ff38a74..f146b0a1 100644 --- a/scripts/artifacts/AIChatbotNovaHistoryDetail.py +++ b/scripts/artifacts/AIChatbotNovaHistoryDetail.py @@ -1,329 +1,203 @@ __artifacts_v2__ = { "nova_chatbot_history_detail": { - "name": "Nova AI Chatbot - Message Detail", + "name": "HistoryDetail", "description": ( "Extracts every individual message from the AI Chatbot - Nova app " - "(com.scaleup.chatai) from the HistoryDetail table. " - "Each row represents one message and is enriched with parent conversation " - "context joined from History: conversation title, AI model used, and " - "soft-deleted status. Three attachment presence flags are added via " - "correlated EXISTS subqueries against HistoryDetailImage, " - "HistoryDetailDocument, and HistoryDetailLink, indicating whether each " - "message has an associated image, document, or link without duplicating " - "rows or loading attachment content. " - "Enables message-level timeline reconstruction and rapid attachment triage " - "across all conversations." + "(com.scaleup.chatai) from the HistoryDetail table, enriched with parent " + "conversation context and attachment existence flags." ), "author": "Guilherme Guilherme", - "version": "0.2", - "date": "2025-04-27", + "version": "1.0", + "date": "2026-05-21", "requirements": "none", "category": "AI Chatbot - Nova", - "notes": ( - "Database: com.scaleup.chatai/databases/chat-ai.db. " - "All timestamps (createdAt, lastModifiedAt) are stored as Unix milliseconds " - "(INTEGER) and converted to UTC strings for display. " - "HistoryDetail.type: 0 = USER (message sent by the device user), " - "1 = ASSISTANT (response generated by the AI model). " - "token counts reflect the number of tokens consumed by each message; " - "ASSISTANT messages with 0 tokens typically indicate image-generation " - "responses where no text tokens were billed. " - "reasoningContent contains chain-of-thought reasoning text when the " - "underlying model produces it (e.g. DeepSeek-R1 reasoning traces). " - "has_image = Yes means at least one record exists in HistoryDetailImage " - "for this message. " - "has_document = Yes means at least one record exists in HistoryDetailDocument " - "for this message — the user submitted a file to the AI. " - "has_link = Yes means at least one record exists in HistoryDetailLink. " - "softDeleted is inherited from the parent History record; DELETED means the " - "conversation was removed by the user but all messages remain physically in " - "the database and are forensically recoverable. " - "syncState and syncRetryCount reflect cloud synchronisation status." - ), + "notes": "Database: com.scaleup.chatai/databases/chat-ai.db", "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), "function": "get_nova_chatbot_history_detail", + "output_types": "standard", + "artifact_icon": "message-circle", } } -import sqlite3 import datetime +from datetime import timezone from scripts.artifact_report import ArtifactHtmlReport -import scripts.ilapfuncs +from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly -# --------------------------------------------------------------------------- -# Known mappings for the chatBotModel integer field. -# Source: FirestoreHistory.EngineTypes enum ordinals from decompiled APK source -# (com.scaleup.chatai.ui.conversation.FirestoreHistory). -# The integer stored in the database is the ENUM ORDINAL (0-based position), -# NOT the botId from chatbotAgentMap. These are two independent systems. -# Image-generating engines: 3 (legacy Bard ordinal reused), 4, 12, 13, 17. -# NOTE: ordinal 3 ('bard') was reused for image generation in newer app versions; -# presence of HistoryDetailImage records confirms image generation regardless of label. -# NOTE: ordinal 20 ('deepSeekR1') — if reasoningContent is NULL the actual API -# call may have used DeepSeek V3; the field reflects the UI selector, not the API. -# --------------------------------------------------------------------------- CHAT_BOT_MODEL_MAP = { - 0: "ChatGPT 3.5", # gpt-3.5 - 1: "GPT-5", # gpt-5 - 2: "GPT-4o", # gpt-4o - 3: "Bard / Image Gen.", # bard (legacy; reused for image generation) - 4: "Image Generator", # image-generator - 5: "Vision", # vision - 6: "Google Vision", # googleVision - 7: "Document", # document - 8: "LLaMA 2", # llama2 - 9: "Nova", # nova - 10: "Gemini", # gemini - 11: "Superbot", # superbot - 12: "Logo Generator", # logo-generator - 13: "Tattoo Generator", # tattoo-generator - 14: "Web Search", # webSearch - 15: "Claude", # claude - 16: "DeepSeek", # deepSeek - 17: "Signature Generator", # signature-generator - 18: "Mistral", # mistral - 19: "Grok", # grok - 20: "DeepSeek R1", # deepSeekR1 - 21: "AI Filter", # aiFilter - 22: "Voice Chat", # voiceChat - 23: "Snap & Solve", # snapAndSolve - 24: "Study Planner", # studyPlanner - 25: "Quiz Maker", # quizMaker - 26: "Essay Helper", # essayHelper - 27: "Gemini 3 Pro", # gemini-3-pro - 28: "GPT-5.1", # gpt-5.1 - 29: "GPT-4o Mini", # 4o-mini + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", } -# --------------------------------------------------------------------------- -# SQL -# One row per HistoryDetail message. -# Parent conversation context (title, model, soft-deleted) is joined from -# History. Attachment presence is detected via correlated EXISTS subqueries — -# lightweight boolean checks that avoid duplicating rows from the attachment -# tables. -# --------------------------------------------------------------------------- QUERY = """ SELECT - -- Message identity - hd.id AS msg_id, - hd.UUID AS msg_uuid, - hd.historyID AS conv_id, - - -- Parent conversation context (from History) - h.UUID AS conv_uuid, - h.title AS conv_title, - h.chatBotModel AS chat_bot_model, - h.softDeleted AS soft_deleted, - - -- Message content - hd.type AS msg_type, - hd.text AS msg_text, - hd.token AS token_count, - hd.reasoningContent AS reasoning_content, - - -- Message timestamps - hd.createdAt AS created_at, - hd.lastModifiedAt AS last_modified_at, - - -- Sync metadata - hd.syncState AS sync_state, - hd.syncRetryCount AS sync_retry_count, - - -- Attachment flags (correlated EXISTS — no row multiplication) - CASE WHEN EXISTS ( - SELECT 1 FROM HistoryDetailImage i - WHERE i.historyDetailID = hd.id - ) THEN 1 ELSE 0 END AS has_image, - - CASE WHEN EXISTS ( - SELECT 1 FROM HistoryDetailDocument d - WHERE d.historyDetailID = hd.id - ) THEN 1 ELSE 0 END AS has_document, - - CASE WHEN EXISTS ( - SELECT 1 FROM HistoryDetailLink l - WHERE l.historyDetailID = hd.id - ) THEN 1 ELSE 0 END AS has_link - + hd.id, + hd.UUID, + hd.historyID, + h.UUID, + h.title, + h.chatBotModel, + h.softDeleted, + hd.type, + hd.text, + hd.token, + hd.reasoningContent, + hd.createdAt, + hd.lastModifiedAt, + hd.syncState, + hd.syncRetryCount, + EXISTS(SELECT 1 FROM HistoryDetailImage i WHERE i.historyDetailID = hd.id), + EXISTS(SELECT 1 FROM HistoryDetailDocument d WHERE d.historyDetailID = hd.id), + EXISTS(SELECT 1 FROM HistoryDetailLink l WHERE l.historyDetailID = hd.id) FROM HistoryDetail hd -INNER JOIN History h - ON h.id = hd.historyID +INNER JOIN History h ON h.id = hd.historyID ORDER BY hd.historyID ASC, hd.createdAt ASC """ -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - def _convert_ms_timestamp(ms): - """Convert a Unix millisecond timestamp to a human-readable UTC string.""" + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" if ms is None: return "" try: - return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( "%Y-%m-%d %H:%M:%S UTC" ) except (OSError, OverflowError, ValueError): return str(ms) -def _resolve_model(model_int): - """Return a labelled model name, falling back to the raw integer for unknowns.""" - if model_int is None: - return "Unknown" - name = CHAT_BOT_MODEL_MAP.get(model_int) - return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" - - -def _format_role(type_int): - """Map HistoryDetail.type to a forensically clear role label.""" - if type_int == 0: - return "USER" - if type_int == 1: - return "ASSISTANT" - return f"UNKNOWN ({type_int})" - - -def _format_soft_deleted(value): - """Return a clearly labelled string for the softDeleted field.""" - return "DELETED" if value == 1 else "No" - - -def _flag(value): - """Return Yes/No for a boolean integer flag.""" - return "Yes" if value else "No" - - -# --------------------------------------------------------------------------- -# Entry point -# --------------------------------------------------------------------------- - - def get_nova_chatbot_history_detail(files_found, report_folder, seeker, wrap_text): - """ - Entry point for the nova_chatbot_history_detail artifact. - - Queries every message in HistoryDetail, enriched with parent conversation - context from History and attachment presence flags from the three attachment - tables. Outputs HTML report, TSV, and timeline. - """ - - for file_found in files_found: - file_found = str(file_found) + logfunc("Processing data for HistoryDetail") - if not file_found.endswith("chat-ai.db"): - continue + # Filter out secondary transactional database engines + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + file_found = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) - try: - db = sqlite3.connect(file_found) - cursor = db.cursor() - cursor.execute(QUERY) - rows_raw = cursor.fetchall() - db.close() + if not file_found: + logfunc("[nova_chatbot_history_detail] Nova database file not found.") + return - except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_history_detail] Error reading {file_found}: {e}" - ) - continue - - if not rows_raw: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_history_detail] No records found in {file_found}." + try: + db = open_sqlite_db_readonly(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_history_detail] Error reading {file_found}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_history_detail] No records found in {file_found}.") + return + + headers = ( + "Msg. ID", + "Msg. UUID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Role", + "Message Text", + "Token Count", + "Reasoning Content", + "Message Timestamp (UTC)", + "Last Modified At (UTC)", + "Sync State", + "Sync Retry Count", + "Has Image", + "Has Document", + "Has Link", + ) + + rows = [] + for row in rows_raw: + model_int = row[5] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" ) - continue - headers = [ - # --- Message identity --- - "Msg. ID", - "Msg. UUID", - "Conv. ID", - # --- Parent conversation context --- - "Conv. UUID", - "Conv. Title", - "AI Model", - "Conv. Deleted", - # --- Message content --- - "Role", - "Message Text", - "Token Count", - "Reasoning Content", - # --- Timestamps --- - "Message Timestamp (UTC)", - "Last Modified At (UTC)", - # --- Sync metadata --- - "Sync State", - "Sync Retry Count", - # --- Attachment flags --- - "Has Image", - "Has Document", - "Has Link", - ] + role_int = row[7] + role_label = ( + "USER" + if role_int == 0 + else "ASSISTANT" + if role_int == 1 + else f"UNKNOWN ({role_int})" + ) - rows = [] - for row in rows_raw: + rows.append( ( - msg_id, - msg_uuid, - conv_id, - conv_uuid, - conv_title, - chat_bot_model, - soft_deleted, - msg_type, - msg_text, - token_count, - reasoning_content, - created_at, - last_modified_at, - sync_state, - sync_retry_count, - has_image, - has_document, - has_link, - ) = row - - rows.append( - ( - msg_id, - msg_uuid or "", - conv_id, - conv_uuid or "", - conv_title or "", - _resolve_model(chat_bot_model), - _format_soft_deleted(soft_deleted), - _format_role(msg_type), - msg_text or "", - token_count if token_count is not None else "", - reasoning_content or "", - _convert_ms_timestamp(created_at), - _convert_ms_timestamp(last_modified_at), - sync_state if sync_state is not None else "", - sync_retry_count if sync_retry_count is not None else "", - _flag(has_image), - _flag(has_document), - _flag(has_link), - ) + row[0], + row[1] or "", + row[2], + row[3] or "", + row[4] or "", + model_name, + "DELETED" if row[6] == 1 else "No", + role_label, + row[8] or "", + row[9] if row[9] is not None else "", + row[10] or "", + _convert_ms_timestamp(row[11]), + _convert_ms_timestamp(row[12]), + row[13] if row[13] is not None else "", + row[14] if row[14] is not None else "", + "Yes" if row[15] else "No", + "Yes" if row[16] else "No", + "Yes" if row[17] else "No", ) - - # --- HTML report --- - report_name = "Nova AI Chatbot - HistoryDetail" - report = ArtifactHtmlReport(report_name) - report.start_artifact_report(report_folder, report_name) - report.add_script() - report.write_artifact_data_table( - headers, - rows, - file_found, - html_escape=True, ) - report.end_artifact_report() - # --- TSV output --- - scripts.ilapfuncs.tsv(report_folder, headers, rows, report_name, file_found) + report_name = "HistoryDetail" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # Delegates script-side sanitization directly to framework tables safely + report.write_artifact_data_table(headers, rows, file_found, html_escape=True) + report.end_artifact_report() - # --- Timeline (message-level granularity, index 11 = Message Timestamp) --- - scripts.ilapfuncs.timeline(report_folder, report_name, rows, headers) + tsv(report_folder, headers, rows, report_name, file_found) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_history_detail] Processed {len(rows)} message detail records." + ) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py index 4d1c7aef..91d4cf5b 100644 --- a/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py @@ -1,178 +1,96 @@ __artifacts_v2__ = { "nova_chatbot_documents": { - "name": "Nova AI Chatbot - Submitted Documents", + "name": "HistoryDetailDocuments", "description": ( - "Extracts all document records submitted by the user to the AI from the " - "AI Chatbot - Nova app (HistoryDetailDocument table). Each row represents " - "one document and is enriched with parent message context from HistoryDetail " - "and parent conversation context from History. " - "Documents are stored on Firebase Storage; the database stores only the " - "Firebase object path. No local cache of user‑submitted documents is kept " - "on the device. The metadata and a forensic note are displayed in the HTML " - "report. The full file content is not available for preview. " - "A forensic note is shown on every row confirming the file was actively " - "submitted by the device user to the AI assistant." + "Extracts document records submitted by the user to the AI from the " + "HistoryDetailDocument table, enriched with parent message and conversation context." ), "author": "Guilherme Guilherme", - "version": "0.2", - "date": "2025-04-27", + "version": "1.0", + "date": "2026-05-21", "requirements": "none", "category": "AI Chatbot - Nova", - "notes": ( - "Database: com.scaleup.chatai/databases/chat-ai.db. " - "HistoryDetailDocument.url stores a Firebase path such as " - "'/document///document-input-'. " - "The file content is not stored locally on the device; it lives in " - "Firebase Storage. The metadata record is still valuable for forensic " - "timeline and user activity. " - "type: 0 = Local File (uploaded from the device), 1 = Remote File. " - "size is stored in bytes and converted to a human-readable string. " - "mimeType identifies the document format (e.g. application/pdf). " - "softDeleted is inherited from the parent History record; DELETED means " - "the conversation was removed by the user but the document record remains " - "physically in the database and is forensically recoverable. " - "The user message text associated with the document reveals the query the " - "user submitted alongside the file to the AI." - ), + "notes": "Database: com.scaleup.chatai/databases/chat-ai.db", "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), "function": "get_nova_chatbot_documents", + "output_types": "standard", + "artifact_icon": "file-text", } } -import os -import shutil -import sqlite3 import datetime -import html as html_module +from datetime import timezone from scripts.artifact_report import ArtifactHtmlReport -import scripts.ilapfuncs +from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly -# --------------------------------------------------------------------------- -# Known mappings for the chatBotModel integer field. -# Source: FirestoreHistory.EngineTypes enum ordinals from decompiled APK source -# (com.scaleup.chatai.ui.conversation.FirestoreHistory). -# The integer stored in the database is the ENUM ORDINAL (0-based position), -# NOT the botId from chatbotAgentMap. These are two independent systems. -# Image-generating engines: 3 (legacy Bard ordinal reused), 4, 12, 13, 17. -# NOTE: ordinal 3 ('bard') was reused for image generation in newer app versions; -# presence of HistoryDetailImage records confirms image generation regardless of label. -# NOTE: ordinal 20 ('deepSeekR1') — if reasoningContent is NULL the actual API -# call may have used DeepSeek V3; the field reflects the UI selector, not the API. -# --------------------------------------------------------------------------- CHAT_BOT_MODEL_MAP = { - 0: "ChatGPT 3.5", # gpt-3.5 - 1: "GPT-5", # gpt-5 - 2: "GPT-4o", # gpt-4o - 3: "Bard / Image Gen.", # bard (legacy; reused for image generation) - 4: "Image Generator", # image-generator - 5: "Vision", # vision - 6: "Google Vision", # googleVision - 7: "Document", # document - 8: "LLaMA 2", # llama2 - 9: "Nova", # nova - 10: "Gemini", # gemini - 11: "Superbot", # superbot - 12: "Logo Generator", # logo-generator - 13: "Tattoo Generator", # tattoo-generator - 14: "Web Search", # webSearch - 15: "Claude", # claude - 16: "DeepSeek", # deepSeek - 17: "Signature Generator", # signature-generator - 18: "Mistral", # mistral - 19: "Grok", # grok - 20: "DeepSeek R1", # deepSeekR1 - 21: "AI Filter", # aiFilter - 22: "Voice Chat", # voiceChat - 23: "Snap & Solve", # snapAndSolve - 24: "Study Planner", # studyPlanner - 25: "Quiz Maker", # quizMaker - 26: "Essay Helper", # essayHelper - 27: "Gemini 3 Pro", # gemini-3-pro - 28: "GPT-5.1", # gpt-5.1 - 29: "GPT-4o Mini", # 4o-mini -} - -DOCUMENT_TYPE_MAP = { - 0: "Local File", - 1: "Remote File", + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", } -MIME_ICON_MAP = { - "application/pdf": "📄", - "application/msword": "📝", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "📝", - "application/vnd.ms-excel": "📊", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "📊", - "text/plain": "📃", - "text/csv": "📊", - "image/jpeg": "🖼️", - "image/png": "🖼️", - "image/gif": "🖼️", - "image/webp": "🖼️", -} - -# --------------------------------------------------------------------------- -# SQL -# One row per HistoryDetailDocument, enriched with parent message and -# conversation context. -# --------------------------------------------------------------------------- QUERY = """ SELECT - -- Document record - d.id AS doc_id, - d.historyDetailID AS msg_id, - d.url AS doc_url, - d.name AS doc_name, - d.type AS doc_type, - d.size AS doc_size, - d.mimeType AS mime_type, - - -- Parent message context (HistoryDetail) - hd.historyID AS conv_id, - hd.type AS msg_type, - hd.text AS msg_text, - hd.createdAt AS msg_created_at, - - -- Parent conversation context (History) - h.UUID AS conv_uuid, - h.title AS conv_title, - h.chatBotModel AS chat_bot_model, - h.softDeleted AS soft_deleted - + d.id, + d.historyDetailID, + d.url, + d.name, + d.type, + d.size, + d.mimeType, + hd.historyID, + hd.type, + hd.text, + hd.createdAt, + h.UUID, + h.title, + h.chatBotModel, + h.softDeleted FROM HistoryDetailDocument d INNER JOIN HistoryDetail hd ON hd.id = d.historyDetailID INNER JOIN History h ON h.id = hd.historyID ORDER BY d.id ASC """ -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _e(text): - return html_module.escape(str(text)) if text else "" def _convert_ms_timestamp(ms): + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" if ms is None: return "" try: - return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( "%Y-%m-%d %H:%M:%S UTC" ) except (OSError, OverflowError, ValueError): return str(ms) -def _resolve_model(model_int): - if model_int is None: - return "Unknown" - name = CHAT_BOT_MODEL_MAP.get(model_int) - return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" - -def _resolve_doc_type(type_int): - if type_int is None: - return "" - label = DOCUMENT_TYPE_MAP.get(type_int) - return f"{label} ({type_int})" if label else f"Unknown ({type_int})" def _format_file_size(size_bytes): if size_bytes is None: @@ -183,229 +101,122 @@ def _format_file_size(size_bytes): return f"{size_bytes} B" elif size_bytes < 1024**2: return f"{size_bytes / 1024:.1f} KB" - elif size_bytes < 1024**3: - return f"{size_bytes / (1024**2):.1f} MB" else: - return f"{size_bytes / (1024**3):.2f} GB" + return f"{size_bytes / (1024**2):.1f} MB" except (ValueError, TypeError): return str(size_bytes) -def _format_soft_deleted(value): - return "DELETED" if value == 1 else "No" - -def _format_role(type_int): - return {0: "USER", 1: "ASSISTANT"}.get(type_int, f"UNKNOWN ({type_int})") - -# --------------------------------------------------------------------------- -# Document file resolution (always returns None – no local copy) -# --------------------------------------------------------------------------- -def _resolve_document_file(doc_url, doc_name, seeker, docs_dir): - """ - Documents are stored on Firebase Storage. The database stores only the - Firebase object path. No local cache of user‑submitted documents is - kept on the device. Therefore this function always returns None. - The HTML cell will show a notice explaining the file is not available - locally. - """ - return None - -# --------------------------------------------------------------------------- -# HTML cell builder -# --------------------------------------------------------------------------- -def _build_document_cell(doc_name, mime_type, doc_size, doc_url, doc_type, filename): - """ - Build a self-contained HTML cell for one document record showing: - - File icon + name (plain text, no link) - - MIME type, size, source type, and Firebase path - - Forensic note confirming the user submitted this file to the AI - - Notice that the file content is stored on Firebase and not available - """ - icon = MIME_ICON_MAP.get(mime_type, "📎") - size_label = _format_file_size(doc_size) - type_label = _resolve_doc_type(doc_type) +def get_nova_chatbot_documents(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for HistoryDetail Submitted Documents") - cell = f'
' - cell += f'
{icon} {_e(doc_name)}
' + # Filter out secondary transactional database engines + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + file_found = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) - if mime_type: - cell += f"
MIME Type: {_e(mime_type)}
" - if size_label: - cell += f"
Size: {_e(size_label)}
" - if type_label: - cell += f"
Source Type: {_e(type_label)}
" - if doc_url: - cell += ( - f'
' - f" Firebase Path:
" - f' {_e(doc_url)}' - f"
" - ) + if not file_found: + logfunc("[nova_chatbot_documents] Nova database file not found.") + return - # Notice that the file is not stored locally - cell += ( - f'
' - f" ☁️ File stored on Firebase Storage
" - f" The document content is not available on the device. " - f" The database record confirms the user submitted this file to the AI; " - f" the file itself resides in Firebase Storage and is not cached locally." - f"
" - ) - - # Forensic note (original) - cell += ( - f'
' - f" ⚠️ Forensic note: This file was actively submitted " - f" by the device user to the AI assistant as part of this conversation." - f"
" + try: + db = open_sqlite_db_readonly(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_documents] Error reading {file_found}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_documents] No document records found in {file_found}.") + return + + headers = ( + "Doc. ID", + "Msg. ID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Media Submitted By", + "Msg. Text", + "Msg. Timestamp (UTC)", + "File Name", + "MIME Type", + "Size", + "Source Type", + "Firebase Storage Path", + "Forensic Notes", ) - cell += "
" - return cell -# --------------------------------------------------------------------------- -# Entry point -# --------------------------------------------------------------------------- - -def get_nova_chatbot_documents(files_found, report_folder, seeker, wrap_text): - """ - Entry point for the nova_chatbot_documents artifact. - - Extracts every HistoryDetailDocument record, resolves the document file from - the device extraction, and produces an HTML report with document metadata - cards and download links, TSV export, and timeline output. - """ - for file_found in files_found: - file_found = str(file_found) - if not file_found.endswith("chat-ai.db"): - continue - - try: - db = sqlite3.connect(file_found) - cursor = db.cursor() - cursor.execute(QUERY) - rows_raw = cursor.fetchall() - db.close() - except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_documents] Error reading {file_found}: {e}" - ) - continue - - if not rows_raw: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_documents] No document records found in {file_found}." + rows = [] + for row in rows_raw: + model_int = row[13] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" ) - continue - - docs_dir = os.path.join(report_folder, "nova_documents") - os.makedirs(docs_dir, exist_ok=True) - - headers = [ - # Document identity - "Doc. ID", - "Msg. ID", - "Conv. ID", - # Conversation context - "Conv. UUID", - "Conv. Title", - "AI Model", - "Conv. Deleted", - # Message context - "Msg. Role", - "Msg. Text", - "Msg. Timestamp (UTC)", - # Document card (HTML) - "Document & Metadata", - # Plain fields for TSV - "File Name", - "MIME Type", - "Size", - "Source Type", - "Firebase Path", # changed from "Internal Path" - ] - html_rows = [] - tsv_rows = [] + doc_type_int = row[4] + doc_type_label = ( + "Local File" + if doc_type_int == 0 + else "Remote File" + if doc_type_int == 1 + else f"Unknown ({doc_type_int})" + ) + + raw_role = row[8] + if raw_role == 0: + submitted_by = "USER" + forensic_note = "Media element actively selected and submitted by the user to the chatbot interface." + elif raw_role == 1: + submitted_by = "AI ASSISTANT" + forensic_note = "Media element generated or provided back by the AI response." + else: + submitted_by = f"UNKNOWN ({raw_role})" + forensic_note = "Unknown structural context for media origin." - for row in rows_raw: + rows.append( ( - doc_id, - msg_id, - doc_url, - doc_name, - doc_type, - doc_size, - mime_type, - conv_id, - msg_type, - msg_text, - msg_created_at, - conv_uuid, - conv_title, - chat_bot_model, - soft_deleted, - ) = row - - # Resolve document from extraction (always returns None) - filename = _resolve_document_file(doc_url, doc_name, seeker, docs_dir) - - common = ( - doc_id, - msg_id, - conv_id, - conv_uuid or "", - conv_title or "", - _resolve_model(chat_bot_model), - _format_soft_deleted(soft_deleted), - _format_role(msg_type), - msg_text or "", - _convert_ms_timestamp(msg_created_at), - ) - - doc_cell = _build_document_cell( - doc_name, mime_type, doc_size, doc_url, doc_type, filename - ) - - html_rows.append( - common - + ( - doc_cell, - doc_name or "", - mime_type or "", - _format_file_size(doc_size), - _resolve_doc_type(doc_type), - doc_url or "", - ) - ) - - tsv_rows.append( - common - + ( - "", # no HTML in TSV - doc_name or "", - mime_type or "", - _format_file_size(doc_size), - _resolve_doc_type(doc_type), - doc_url or "", - ) + row[0], + row[1], + row[7], + row[11] or "", + row[12] or "", + model_name, + "DELETED" if row[14] == 1 else "No", + submitted_by, + row[9] or "", + _convert_ms_timestamp(row[10]), + row[3] or "Unknown", + row[6] or "", + _format_file_size(row[5]), + doc_type_label, + row[2] or "", + forensic_note, ) - - # HTML report - report_name = "Nova AI Chatbot - HistoryDetailDocuments" - report = ArtifactHtmlReport(report_name) - report.start_artifact_report(report_folder, report_name) - report.add_script() - report.write_artifact_data_table( - headers, html_rows, file_found, html_escape=False ) - report.end_artifact_report() - # TSV - scripts.ilapfuncs.tsv(report_folder, headers, tsv_rows, report_name, file_found) + report_name = "HistoryDetailDocuments" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # Enforce safe framework-side text escaping to block script injection vectors entirely + report.write_artifact_data_table(headers, rows, file_found, html_escape=True) + report.end_artifact_report() - # Timeline (Msg. Timestamp, index 9) - scripts.ilapfuncs.timeline(report_folder, report_name, tsv_rows, headers) + tsv(report_folder, headers, rows, report_name, file_found) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_documents] Processed {len(rows)} submitted document records." + ) \ No newline at end of file diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py index c322efd5..6991de63 100644 --- a/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py @@ -1,56 +1,28 @@ __artifacts_v2__ = { "nova_chatbot_images": { - "name": "Nova AI Chatbot - Images (Generated & Submitted)", + "name": "HistoryDetailImages", "description": ( - "Extracts all image records from the AI Chatbot - Nova app " - "(HistoryDetailImage table). Each row represents one image and is enriched " - "with the parent message context from HistoryDetail and the parent " - "conversation context from History. " - "Images are correctly identified by the parent message role: " - "USER messages contain images submitted by the device user (e.g., vision queries); " - "ASSISTANT messages contain images generated by the AI. " - "Both types are stored on Firebase Storage; no local copies are predictably " - "available offline. The report shows the prompt, metadata, and a forensic note " - "explaining the image origin and storage behaviour. Generation state, pipeline, " - "and style ID are included for AI‑generated images where applicable." + "Extracts user-submitted image links and AI-generated image records from the " + "HistoryDetailImage table, enriched with parent message and conversation context." ), "author": "Guilherme Guilherme", - "version": "0.5", - "date": "2026-05-03", + "version": "1.0", + "date": "2026-05-21", "requirements": "none", "category": "AI Chatbot - Nova", - "notes": ( - "Database: com.scaleup.chatai/databases/chat-ai.db. " - "HistoryDetailImage.url stores a Firebase Storage object path. " - "Identification of image origin is based on the parent message's type: " - "0 = USER → user‑submitted image; 1 = ASSISTANT → AI‑generated image. " - "The source column in HistoryDetailImage is ignored because it is often " - "misleading (e.g., vision images are marked as source=0 but are user‑submitted). " - "AI‑generated images are temporarily cached in cache/image_manager_disk_cache/*.0 " - "but the filenames are SHA‑256 hashes of signed URLs containing a token not " - "stored on the device; automatic matching is impossible. User‑submitted images " - "are not cached locally. " - "state: 1=Success, 0=Pending, 2=Failed (only relevant for AI‑generated). " - "pipeline identifies the generation engine (e.g. flux_tpu). " - "styleId references the visual style preset selected by the user. " - "softDeleted is inherited from the parent History record." - ), + "notes": "Database: com.scaleup.chatai/databases/chat-ai.db", "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), "function": "get_nova_chatbot_images", + "output_types": "standard", + "artifact_icon": "image", } } -import os -import shutil -import sqlite3 import datetime -import html as html_module +from datetime import timezone from scripts.artifact_report import ArtifactHtmlReport -import scripts.ilapfuncs +from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly -# --------------------------------------------------------------------------- -# Known mappings for the chatBotModel integer field. -# --------------------------------------------------------------------------- CHAT_BOT_MODEL_MAP = { 0: "ChatGPT 3.5", 1: "GPT-5", @@ -90,306 +62,158 @@ 2: "Failed", } -# --------------------------------------------------------------------------- -# SQL (includes msg_type from HistoryDetail) -# --------------------------------------------------------------------------- QUERY = """ SELECT - i.id AS img_id, - i.historyDetailID AS msg_id, - i.url AS img_url, - i.prompt AS prompt, - i.state AS state, - i.mimeType AS mime_type, - i.styleId AS style_id, - i.source AS source, - i.sourceUrl AS source_url, - i.pipeline AS pipeline, - - hd.historyID AS conv_id, - hd.type AS msg_type, - hd.text AS msg_text, - hd.createdAt AS msg_created_at, - - h.UUID AS conv_uuid, - h.title AS conv_title, - h.chatBotModel AS chat_bot_model, - h.softDeleted AS soft_deleted - + i.id, + i.historyDetailID, + i.url, + i.prompt, + i.state, + i.mimeType, + i.styleId, + i.pipeline, + hd.historyID, + hd.type, + hd.text, + hd.createdAt, + h.UUID, + h.title, + h.chatBotModel, + h.softDeleted FROM HistoryDetailImage i INNER JOIN HistoryDetail hd ON hd.id = i.historyDetailID INNER JOIN History h ON h.id = hd.historyID ORDER BY i.id ASC """ -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _e(text): - return html_module.escape(str(text)) if text else "" - def _convert_ms_timestamp(ms): + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" if ms is None: return "" try: - return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( "%Y-%m-%d %H:%M:%S UTC" ) except (OSError, OverflowError, ValueError): return str(ms) -def _resolve_model(model_int): - if model_int is None: - return "Unknown" - name = CHAT_BOT_MODEL_MAP.get(model_int) - return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" - - -def _resolve_state(state_int): - if state_int is None: - return "" - label = IMAGE_STATE_MAP.get(state_int) - return f"{label} ({state_int})" if label else f"Unknown ({state_int})" - - -def _format_soft_deleted(value): - return "DELETED" if value == 1 else "No" - - -def _format_role(type_int): - return {0: "USER", 1: "ASSISTANT"}.get(type_int, f"UNKNOWN ({type_int})") - - -def _get_image_origin(msg_type): - """Return 'user-submitted' for msg_type=0, 'AI-generated' for msg_type=1.""" - if msg_type == 0: - return ("user-submitted", "User‑submitted image") - elif msg_type == 1: - return ("ai-generated", "AI‑generated image") - else: - return ("unknown", "Unknown origin") - - -# --------------------------------------------------------------------------- -# Image file resolution (always None) -# --------------------------------------------------------------------------- -def _resolve_image_file(db_url, seeker, images_dir): - return None - - -# --------------------------------------------------------------------------- -# HTML cell builder -# --------------------------------------------------------------------------- - - -def _build_image_cell( - img_url, prompt, state, mime_type, pipeline, style_id, msg_type, filename -): - """ - Build HTML cell using msg_type to determine image origin. - """ - origin_key, origin_label = _get_image_origin(msg_type) - cell = '
' - - if prompt: - cell += f'
Prompt: {_e(prompt)}
' - - if origin_key == "ai-generated": - cell += ( - f'
' - f" 🤖 AI‑generated image
" - f" This image was created by the AI based on the user prompt. " - f" It is stored on Firebase Storage; a temporary local copy may exist " - f" in cache/image_manager_disk_cache/*.0 but the filename " - f" is a hash of a signed URL that includes a token not stored on the device. " - f" Manual inspection of .0 files is recommended.
" - f" Forensic action: Examine .0 files directly as JPEG." - f"
" - ) - elif origin_key == "user-submitted": - cell += ( - f'
' - f" 📤 User‑submitted image
" - f" This image was actively uploaded by the device user (e.g., as part of a vision query). " - f" The file content is stored on Firebase Storage and is not cached locally. " - f" Only the metadata record remains on the device." - f"
" - ) - else: - cell += ( - f'
' - f" ❓ Unknown image origin
" - f" The parent message type is {_e(str(msg_type))} – cannot determine if user‑submitted or AI‑generated." - f"
" - ) - - if pipeline: - cell += f"
Pipeline: {_e(pipeline)}
" - if style_id is not None: - cell += f"
Style ID: {_e(str(style_id))}
" - if mime_type: - cell += f"
MIME Type: {_e(mime_type)}
" - - cell += ( - f'
' - f" Firebase path:
" - f' {_e(img_url)}' - f"
" - ) - cell += "
" - return cell - - -# --------------------------------------------------------------------------- -# Entry point -# --------------------------------------------------------------------------- - - def get_nova_chatbot_images(files_found, report_folder, seeker, wrap_text): - for file_found in files_found: - file_found = str(file_found) - if not file_found.endswith("chat-ai.db"): - continue - - try: - db = sqlite3.connect(file_found) - cursor = db.cursor() - cursor.execute(QUERY) - rows_raw = cursor.fetchall() - db.close() - except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_images] Error reading {file_found}: {e}" - ) - continue - - if not rows_raw: - scripts.ilapfuncs.logfunc( - f"[nova_chbot_images] No image records found in {file_found}." - ) - continue - - images_dir = os.path.join(report_folder, "nova_images") - os.makedirs(images_dir, exist_ok=True) - - headers = [ - "Image ID", - "Msg. ID", - "Conv. ID", - "Conv. UUID", - "Conv. Title", - "AI Model", - "Conv. Deleted", - "Msg. Role", - "Msg. Text", - "Msg. Timestamp (UTC)", - "Image Preview & Metadata", - "Prompt", - "State", - "Pipeline", - "Style ID", - "MIME Type", - "Firebase Path", - ] + logfunc("Processing data for HistoryDetail Images") - html_rows = [] - tsv_rows = [] + # Filter out secondary transactional database engines + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + file_found = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) - for row in rows_raw: - ( - img_id, - msg_id, - img_url, - prompt, - state, - mime_type, - style_id, - source, - source_url, - pipeline, - conv_id, - msg_type, - msg_text, - msg_created_at, - conv_uuid, - conv_title, - chat_bot_model, - soft_deleted, - ) = row + if not file_found: + logfunc("[nova_chatbot_images] Nova database file not found.") + return - filename = _resolve_image_file(img_url, seeker, images_dir) + try: + db = open_sqlite_db_readonly(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_images] Error reading {file_found}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_images] No image records found in {file_found}.") + return + + headers = ( + "Image ID", + "Msg. ID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Media Submitted By", + "Msg. Text", + "Msg. Timestamp (UTC)", + "Prompt", + "State", + "Pipeline", + "Style ID", + "MIME Type", + "Firebase Storage Path", + "Forensic Notes", + ) - common = ( - img_id, - msg_id, - conv_id, - conv_uuid or "", - conv_title or "", - _resolve_model(chat_bot_model), - _format_soft_deleted(soft_deleted), - _format_role(msg_type), - msg_text or "", - _convert_ms_timestamp(msg_created_at), + rows = [] + for row in rows_raw: + model_int = row[14] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" ) - img_cell = _build_image_cell( - img_url, - prompt, - state, - mime_type, - pipeline, - style_id, - msg_type, - filename, + state_int = row[4] + state_label = "" + if state_int is not None: + name_state = IMAGE_STATE_MAP.get(state_int) + state_label = f"{name_state} ({state_int})" if name_state else f"Unknown ({state_int})" + + raw_role = row[9] + if raw_role == 0: + submitted_by = "USER" + forensic_note = ( + "Media element actively selected and submitted by the user to the chatbot interface. " + "The file content resides remotely on Firebase Storage and is not locally cached." ) - - # For TSV we also include the origin (derived from msg_type) - origin_label = ( - "User-submitted" - if msg_type == 0 - else "AI-generated" - if msg_type == 1 - else "Unknown" + elif raw_role == 1: + submitted_by = "AI ASSISTANT" + forensic_note = ( + "Media element generated or provided back by the AI response. " + "Stored on Firebase Storage; temporary local copies might be found in cache/image_manager_disk_cache/." ) + else: + submitted_by = f"UNKNOWN ({raw_role})" + forensic_note = "Unknown structural context for media origin." - html_rows.append( - common - + ( - img_cell, - prompt or "", - _resolve_state(state), - pipeline or "", - style_id if style_id is not None else "", - mime_type or "", - img_url or "", - ) + rows.append( + ( + row[0], + row[1], + row[8], + row[12] or "", + row[13] or "", + model_name, + "DELETED" if row[15] == 1 else "No", + submitted_by, + row[10] or "", + _convert_ms_timestamp(row[11]), + row[3] or "", + state_label, + row[7] or "", + row[6] if row[6] is not None else "", + row[5] or "", + row[2] or "", + forensic_note, ) + ) - tsv_rows.append( - common - + ( - "", # no HTML in TSV - prompt or "", - _resolve_state(state), - pipeline or "", - style_id if style_id is not None else "", - mime_type or "", - img_url or "", - ) - ) + report_name = "HistoryDetailImages" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() - report_name = "Nova AI Chatbot - HistoryDetailImage" - report = ArtifactHtmlReport(report_name) - report.start_artifact_report(report_folder, report_name) - report.add_script() - report.write_artifact_data_table( - headers, html_rows, file_found, html_escape=False - ) - report.end_artifact_report() + # Enforce safe framework-side text escaping to block script injection vectors entirely + report.write_artifact_data_table(headers, rows, file_found, html_escape=True) + report.end_artifact_report() - scripts.ilapfuncs.tsv(report_folder, headers, tsv_rows, report_name, file_found) - scripts.ilapfuncs.timeline(report_folder, report_name, tsv_rows, headers) + tsv(report_folder, headers, rows, report_name, file_found) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_images] Processed {len(rows)} submitted image records." + ) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py b/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py index 30a343c9..a88ef782 100644 --- a/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py @@ -1,6 +1,6 @@ __artifacts_v2__ = { "nova_chatbot_links": { - "name": "Nova AI Chatbot - Shared Links", + "name": "Shared Links", "description": ( "Extracts all link records from the AI Chatbot - Nova app " "(HistoryDetailLink table). Each row represents one URL shared within " @@ -202,7 +202,7 @@ def get_nova_chatbot_links(files_found, report_folder, seeker, wrap_text): scripts.ilapfuncs.logfunc( f"[nova_chatbot_links] HistoryDetailLink table is empty in {file_found}." ) - report_name = "Nova AI Chatbot - HistoryDetailLinks" + report_name = "HistoryDetailLinks" report = ArtifactHtmlReport(report_name) report.start_artifact_report(report_folder, report_name) report.add_script() @@ -280,7 +280,7 @@ def get_nova_chatbot_links(files_found, report_folder, seeker, wrap_text): tsv_rows.append(common + (link_url or "",)) # HTML report - report_name = "Nova AI Chatbot - HistoryDetailLinks" + report_name = "HistoryDetailLinks" report = ArtifactHtmlReport(report_name) report.start_artifact_report(report_folder, report_name) report.add_script() diff --git a/scripts/artifacts/AIChatbotNovaMediastore.py b/scripts/artifacts/AIChatbotNovaMediastore.py index dc0c65b6..451fcd07 100644 --- a/scripts/artifacts/AIChatbotNovaMediastore.py +++ b/scripts/artifacts/AIChatbotNovaMediastore.py @@ -1,381 +1,259 @@ __artifacts_v2__ = { "nova_user_submissions": { - "name": "Nova AI Chatbot - User Media Submissions", + "name": "User Media Submissions", "description": ( - "Identifies ALL media files submitted by the user to Nova AI Chatbot. " - "This includes uploaded documents and photos captured using the in-app camera. " - "The artifact lists recovered filenames, user context, timestamps, MIME types, " - "and resolved physical paths from the extracted filesystem." + "Identifies media files submitted by the user to Nova AI Chatbot, including " + "uploaded documents and photos captured using the in-app camera. The artifact " + "lists recovered filenames, conversation context, timestamps, MIME types, and resolved " + "physical paths from the extracted filesystem." ), "author": "Guilherme Guilherme", - "version": "2.7", - "date": "2026-05-20", + "version": "3.3", + "date": "2026-05-21", "requirements": "none", "category": "AI Chatbot - Nova", - "notes": ( - "Sources: chat-ai.db and Android MediaStore databases. " - "Resolves extracted filesystem paths and falls back to filename search when needed. " - "This module does not embed previews; it focuses on metadata and physical path reporting." - ), + "notes": "Sources: chat-ai.db and Android MediaStore databases.", "paths": ( "*/com.scaleup.chatai/databases/chat-ai.db", "*/com.android.providers.media/databases/external*.db", "*/com.google.android.providers.media.module/databases/external*.db", ), "function": "get_nova_user_submissions", + "output_types": "standard", + "artifact_icon": "folder", } } import os -import csv -import sqlite3 import datetime -import html as html_module +from datetime import timezone from scripts.artifact_report import ArtifactHtmlReport -import scripts.ilapfuncs - - -def _e(text): - return html_module.escape(str(text)) if text else "" - +from scripts.ilapfuncs import logfunc, tsv, open_sqlite_db_readonly, media_to_html -def _convert_ms_timestamp(ms): - if ms is None: - return "" - try: - return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( - "%Y-%m-%d %H:%M:%S UTC" - ) - except Exception: - return str(ms) - -def _convert_sec_timestamp(ts): - if ts is None: +def _parse_path(raw_path): + """Normalizes paths and slices them to always start at /data.""" + if not raw_path: return "" - try: - return datetime.datetime.utcfromtimestamp(int(ts)).strftime( - "%Y-%m-%d %H:%M:%S UTC" - ) - except Exception: - return str(ts) - - -def _format_file_size(size_bytes): - if size_bytes is None: - return "" - try: - size_bytes = int(size_bytes) - if size_bytes < 1024: - return f"{size_bytes} B" - if size_bytes < 1024**2: - return f"{size_bytes / 1024:.1f} KB" - if size_bytes < 1024**3: - return f"{size_bytes / (1024**2):.1f} MB" - return f"{size_bytes / (1024**3):.2f} GB" - except Exception: - return str(size_bytes) - - -def _normalize_media_path(media_path): - if not media_path: - return None - p = media_path.replace("\\", "/") - if p.startswith("/storage/emulated/0/"): - p = p.replace("/storage/emulated/0/", "/data/media/0/", 1) - return p - - -def _resolve_extraction_path(extraction_root, media_path): - if not extraction_root or not media_path: - return None - - normalized = _normalize_media_path(media_path) - candidate = os.path.normpath(os.path.join(extraction_root, normalized.lstrip("/"))) - if os.path.exists(candidate): - return candidate - - fname = os.path.basename(normalized) - filename_candidate = os.path.normpath(os.path.join(extraction_root, fname)) - if os.path.exists(filename_candidate): - return filename_candidate - - data_media_candidate = os.path.normpath( - os.path.join(extraction_root, "data/media/0", fname) - ) - if os.path.exists(data_media_candidate): - return data_media_candidate - - for root, dirs, files in os.walk(extraction_root): - if fname in files: - return os.path.join(root, fname) - - return None + normalized = str(raw_path).replace("\\", "/") + if "/data/" in normalized: + return "/data/" + normalized.split("/data/", 1)[1] + elif "data/data/" in normalized: + return "/data/data/" + normalized.split("data/data/", 1)[1] + return normalized def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): - nova_db = None - media_db = None - - for file_found in files_found: - file_found = str(file_found) - if "chat-ai.db" in file_found: - nova_db = file_found - elif "external" in file_found and file_found.endswith(".db"): - media_db = file_found + logfunc("Processing data for Nova User Media Submissions") + + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + nova_db = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) + media_db = next( + ( + str(x) + for x in files_found + if "external" in str(x) and str(x).endswith(".db") + ), + None, + ) if not nova_db: - scripts.ilapfuncs.logfunc("[nova_user_submissions] Nova database not found.") + logfunc("[nova_user_submissions] Nova database not found.") return - extraction_root = getattr(seeker, "search_dir", "") - scripts.ilapfuncs.logfunc( - f"[nova_user_submissions] Extraction root: {extraction_root}" - ) - + extraction_root = getattr(seeker, "search_dir", "") or "" media_lookup = {} - if media_db and os.path.exists(media_db): + # 1. Map the MediaStore database records to see what is on local storage + if media_db: try: - db = sqlite3.connect(media_db) - cursor = db.cursor() - cursor.execute(""" + db = open_sqlite_db_readonly(media_db) + cur = db.cursor() + cur.execute(""" SELECT _display_name, _data, _size, date_added, mime_type FROM files WHERE _data IS NOT NULL """) - for ( - display_name, - data_path, - size, - date_added, - mime_type, - ) in cursor.fetchall(): - normalized_path = _normalize_media_path(data_path or "") - if not normalized_path: - continue - - if not any( - x in normalized_path.lower() - for x in ["/download/", "/nova/", "/com.scaleup.chatai/"] - ): - continue - - key = (display_name or os.path.basename(normalized_path)).lower() - media_lookup[key] = { - "media_path": normalized_path, - "extraction_path": _resolve_extraction_path( - extraction_root, normalized_path - ), - "size": size, - "timestamp": date_added, - "mime": mime_type or "", - } - + for display_name, data_path, size, date_added, mime_type in cur.fetchall(): + local_path = "" + if data_path: + clean_rel = str(data_path).replace("\\", "/").lstrip("/") + if clean_rel.startswith("storage/emulated/0/"): + clean_rel = clean_rel.replace( + "storage/emulated/0/", "data/media/0/", 1 + ) + + candidate_path = os.path.join(extraction_root, clean_rel) + if os.path.exists(candidate_path): + local_path = candidate_path + + key = (display_name or os.path.basename(str(data_path))).lower() + media_lookup[key] = {"data_path": data_path, "local_path": local_path} db.close() except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_user_submissions] Error reading MediaStore: {e}" - ) + logfunc(f"[nova_user_submissions] Error building MediaStore lookup: {e}") all_items = [] - query_docs = """ - SELECT - hdd.name, - hdd.url, - hdd.mimeType, - hdd.size, - hd.text, - hd.createdAt, - h.title - FROM HistoryDetailDocument hdd - INNER JOIN HistoryDetail hd ON hd.id = hdd.historyDetailID - INNER JOIN History h ON h.id = hd.historyID - WHERE hd.type = 0 - ORDER BY hd.createdAt DESC - """ - + # 2. Process chat database attachments and cross-reference with MediaStore try: - db = sqlite3.connect(nova_db) - cursor = db.cursor() - cursor.execute(query_docs) + db = open_sqlite_db_readonly(nova_db) + cur = db.cursor() + cur.execute(""" + SELECT + hdd.name, + hdd.mimeType, + hdd.size, + hd.text, + hd.createdAt, + h.title, + h.UUID + FROM HistoryDetailDocument hdd + INNER JOIN HistoryDetail hd ON hd.id = hdd.historyDetailID + INNER JOIN History h ON h.id = hd.historyID + WHERE hd.type = 0 + ORDER BY hd.createdAt DESC + """) for ( file_name, - firebase_url, mime_type, size_db, message, created_at, conversation, - ) in cursor.fetchall(): - media_match = media_lookup.get((file_name or "").lower()) + conv_uuid, + ) in cur.fetchall(): + mtime_str = "" + if created_at: + try: + mtime_str = datetime.datetime.fromtimestamp( + float(created_at) / 1000, timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S UTC") + except Exception: + mtime_str = str(created_at) + + match_key = (file_name or "").lower() + if match_key in media_lookup: + match = media_lookup[match_key] + if match["local_path"]: + # Correctly links local images into ALEAPP's standard thumb pipeline + media_to_html(file_name, match["local_path"], report_folder) + display_path = _parse_path(match["data_path"]) + else: + display_path = "Cloud-only (Firebase Storage)" + all_items.append( - { - "type": "submitted_document", - "name": file_name or "Unknown", - "firebase_url": firebase_url or "", - "mime": mime_type or "", - "size_db": size_db, - "message": message or "", - "timestamp": created_at, - "conversation": conversation or "Untitled", - "media_path": media_match["media_path"] if media_match else None, - "extraction_path": media_match["extraction_path"] - if media_match - else None, - } + ( + file_name or "Unknown", + "Submitted to AI", + message or "", + conversation or "Untitled", + conv_uuid or "", + mtime_str, + size_db if size_db is not None else "", + mime_type or "", + display_path, + ) ) db.close() except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_user_submissions] Error querying documents: {e}" - ) + logfunc(f"[nova_user_submissions] Error querying documents: {e}") - if media_db and os.path.exists(media_db): + # 3. Process standalone camera storage entries matching the application context + if media_db: try: - db = sqlite3.connect(media_db) - cursor = db.cursor() - cursor.execute(""" + db = open_sqlite_db_readonly(media_db) + cur = db.cursor() + cur.execute(""" SELECT _display_name, _data, _size, date_added, mime_type FROM files WHERE bucket_display_name = 'Nova' OR _data LIKE '%/Nova/%' ORDER BY date_added DESC """) - for ( - display_name, - data_path, - size, - date_added, - mime_type, - ) in cursor.fetchall(): - normalized_path = _normalize_media_path(data_path or "") - extraction_path = _resolve_extraction_path( - extraction_root, normalized_path + for display_name, data_path, size, date_added, mime_type in cur.fetchall(): + mtime_str = "" + if date_added: + try: + mtime_str = datetime.datetime.fromtimestamp( + int(date_added), timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S UTC") + except Exception: + mtime_str = str(date_added) + + fname = display_name or ( + os.path.basename(str(data_path)) if data_path else "Unknown" ) + + clean_rel = str(data_path).replace("\\", "/").lstrip("/") + if clean_rel.startswith("storage/emulated/0/"): + clean_rel = clean_rel.replace( + "storage/emulated/0/", "data/media/0/", 1 + ) + + local_path = os.path.join(extraction_root, clean_rel) + if os.path.exists(local_path): + media_to_html(fname, local_path, report_folder) + all_items.append( - { - "type": "camera_photo", - "name": display_name - or os.path.basename(normalized_path or "") - or "Unknown", - "mime": mime_type or "image/jpeg", - "size_db": size, - "message": "", - "timestamp": date_added, - "conversation": "Camera photo (not associated with a message)", - "media_path": normalized_path, - "extraction_path": extraction_path, - } + ( + fname, + "Camera Photo", + "", + "Camera photo (not associated with a message)", + "", # Camera pictures do not have an associated conversation UUID + mtime_str, + size if size is not None else "", + mime_type or "image/jpeg", + _parse_path(data_path), + ) ) db.close() except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_user_submissions] Error querying camera photos: {e}" - ) + logfunc(f"[nova_user_submissions] Error querying camera photos: {e}") + + if not all_items: + logfunc("[nova_user_submissions] No media found.") + return + # Deduplicate entries safely using filename and localized storage path attributes deduped = [] seen = set() - for item in all_items: - key = (item["name"].lower(), item.get("media_path") or "") + for row in all_items: + # Deduplicate using filename (index 0) and physical path data (index 8) + key = (row[0].lower(), row[8]) if key in seen: continue seen.add(key) - deduped.append(item) - - deduped.sort(key=lambda x: x.get("timestamp", 0) or 0, reverse=True) + deduped.append(row) - if not deduped: - scripts.ilapfuncs.logfunc("[nova_user_submissions] No media found.") - return + report_name = "User Media Submissions" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() headers = ( "File Name", "Type", "User Message / Context", - "Conversation", + "Conversation Title", + "Conv. UUID", "Date (UTC)", - "Size", + "Size (Bytes)", "MIME Type", - "Physical Path", + "Path", ) - rows = [] - tsv_rows = [] - - for item in deduped: - type_label = ( - "📤 Submitted to AI" - if item["type"] == "submitted_document" - else "📷 Camera Photo" - ) - context = item.get("message") or "No message recorded" - if isinstance(context, str) and len(context) > 150: - context = _e(context[:150] + "...") - date_str = ( - _convert_ms_timestamp(item["timestamp"]) - if item["type"] == "submitted_document" - else _convert_sec_timestamp(item["timestamp"]) - ) - size_str = _format_file_size(item.get("size_db")) - - if item.get("extraction_path") and os.path.exists(item["extraction_path"]): - physical_path = item["extraction_path"] - elif item.get("media_path"): - physical_path = item["media_path"] - else: - physical_path = "Cloud-only (Firebase Storage)" - - rows.append( - ( - _e(item["name"]), - type_label, - context, - _e(item["conversation"]), - date_str, - size_str, - _e(item.get("mime") or ""), - _e(physical_path), - ) - ) - - tsv_rows.append( - ( - item["name"], - type_label, - context, - item["conversation"], - date_str, - size_str, - item.get("mime") or "", - physical_path, - ) - ) - - report_name = "Nova AI Chatbot - User Media Submissions" - report = ArtifactHtmlReport(report_name) - report.start_artifact_report(report_folder, report_name) - report.add_script() - report.write_artifact_data_table(headers, rows, nova_db, html_escape=False) + # Compliant HTML injection vulnerability protection handled securely by the framework via DataTables + report.write_artifact_data_table( + headers, + deduped, + nova_db, + table_id="NovaUserSubmissions", + html_escape=True, + ) report.end_artifact_report() - tsv_path = os.path.join(report_folder, f"{report_name}.tsv") - with open(tsv_path, "w", newline="", encoding="utf-8") as tsvfile: - writer = csv.writer(tsvfile, delimiter="\t") - writer.writerow( - [ - "File Name", - "Type", - "User Message", - "Conversation", - "Date (UTC)", - "Size", - "MIME Type", - "Physical Path", - ] - ) - writer.writerows(tsv_rows) - - scripts.ilapfuncs.logfunc( - f"[nova_user_submissions] Found {len(deduped)} total items." - ) + tsv(report_folder, headers, deduped, report_name, nova_db) + logfunc(f"[nova_user_submissions] Found {len(deduped)} total items.") From 8be6e99135949f5307c90715dadf58c09fea19b5 Mon Sep 17 00:00:00 2001 From: Guilherme Guilherme Date: Thu, 21 May 2026 17:20:58 +0100 Subject: [PATCH 8/8] Fix Physical Image Path --- .../artifacts/AIChatbotNovaConversations.py | 708 ++++++------------ scripts/artifacts/AIChatbotNovaMediastore.py | 95 ++- 2 files changed, 310 insertions(+), 493 deletions(-) diff --git a/scripts/artifacts/AIChatbotNovaConversations.py b/scripts/artifacts/AIChatbotNovaConversations.py index 3fdce765..714b9790 100644 --- a/scripts/artifacts/AIChatbotNovaConversations.py +++ b/scripts/artifacts/AIChatbotNovaConversations.py @@ -4,51 +4,39 @@ "description": ( "Reconstructs full conversations from the AI Chatbot - Nova app by joining " "History, HistoryDetail, HistoryDetailImage, HistoryDetailDocument, and " - "HistoryDetailLink tables. Produces one row per message with all attachment " - "metadata surfaced inline. Image origin (user‑submitted vs AI‑generated) is " - "determined by the parent message role. Generated images are not resolvable " - "locally due to Firebase signed URL tokens; the report shows the Firebase path " - "and a forensic note. Documents are displayed with full metadata and a note " - "confirming they were submitted by the user. Soft‑deleted conversations are " - "flagged on every associated message row." + "HistoryDetailLink tables. Cross-references local file attachments with the " + "Android MediaStore database to map real physical file storage paths for both " + "documents and user-submitted images." ), "author": "Guilherme Guilherme", - "version": "0.5", - "date": "2026-05-03", + "version": "1.1", + "date": "2026-05-21", "requirements": "none", "category": "AI Chatbot - Nova", - "notes": ( - "Message timestamps are stored as Unix milliseconds (INTEGER) and are " - "converted to UTC for display and timeline submission. " - "HistoryDetail.type: 0 = USER, 1 = ASSISTANT. " - "Attachment columns are empty when no attachment is linked to a message. " - "A conversation flagged as DELETED means History.softDeleted = 1; the record " - "remains in the database after user deletion and is forensically recoverable. " - "chatBotModel is an integer mapped to known AI model names where possible. " - "Image origin is correctly identified by the parent message role: " - "USER messages contain user‑submitted images (e.g., vision queries); " - "ASSISTANT messages contain AI‑generated images. " - "All images are stored on Firebase Storage; local cache filenames are hashes " - "of signed URLs (tokens not on device), so automatic matching is impossible. " - "Documents are also stored on Firebase; no local copy is kept. " - "TSV export contains plain‑text equivalents for all attachment fields." + "notes": "Sources: chat-ai.db and Android MediaStore databases.", + "paths": ( + "*/com.scaleup.chatai/databases/chat-ai.db", + "*/com.android.providers.media/databases/external*.db", + "*/com.google.android.providers.media.module/databases/external*.db", ), - "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), "function": "get_nova_chatbot_conversations", + "output_types": "standard", + "artifact_icon": "message-square", } } import os -import shutil -import sqlite3 import datetime -import html as html_module +from datetime import timezone from scripts.artifact_report import ArtifactHtmlReport -import scripts.ilapfuncs +from scripts.ilapfuncs import ( + logfunc, + tsv, + timeline, + open_sqlite_db_readonly, + media_to_html, +) -# --------------------------------------------------------------------------- -# Known mappings for the chatBotModel integer field. -# --------------------------------------------------------------------------- CHAT_BOT_MODEL_MAP = { 0: "ChatGPT 3.5", 1: "GPT-5", @@ -82,309 +70,6 @@ 29: "GPT-4o Mini", } -IMAGE_STATE_MAP = { - 0: "Pending", - 1: "Success", - 2: "Failed", -} - -DOCUMENT_TYPE_MAP = { - 0: "Local File", - 1: "Remote File", -} - -MIME_ICON_MAP = { - "application/pdf": "📄", - "application/msword": "📝", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "📝", - "application/vnd.ms-excel": "📊", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "📊", - "text/plain": "📃", - "text/csv": "📊", - "image/jpeg": "🖼️", - "image/png": "🖼️", - "image/gif": "🖼️", - "image/webp": "🖼️", -} - -# --------------------------------------------------------------------------- -# Scalar helpers -# --------------------------------------------------------------------------- - - -def _convert_ms_timestamp(ms): - if ms is None: - return "" - try: - return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( - "%Y-%m-%d %H:%M:%S UTC" - ) - except (OSError, OverflowError, ValueError): - return str(ms) - - -def _resolve_model(model_int): - if model_int is None: - return "Unknown" - name = CHAT_BOT_MODEL_MAP.get(model_int) - return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" - - -def _resolve_image_state(state_int): - if state_int is None: - return "" - label = IMAGE_STATE_MAP.get(state_int) - return f"{label} ({state_int})" if label else f"Unknown State ({state_int})" - - -def _resolve_document_type(type_int): - if type_int is None: - return "" - label = DOCUMENT_TYPE_MAP.get(type_int) - return f"{label} ({type_int})" if label else f"Unknown Type ({type_int})" - - -def _format_role(type_int): - return {0: "USER", 1: "ASSISTANT"}.get(type_int, f"UNKNOWN ({type_int})") - - -def _format_soft_deleted(value): - return "DELETED" if value == 1 else "No" - - -def _format_file_size(size_bytes): - if size_bytes is None: - return "" - try: - size_bytes = int(size_bytes) - if size_bytes < 1024: - return f"{size_bytes} B" - elif size_bytes < 1024**2: - return f"{size_bytes / 1024:.1f} KB" - elif size_bytes < 1024**3: - return f"{size_bytes / (1024**2):.1f} MB" - else: - return f"{size_bytes / (1024**3):.2f} GB" - except (ValueError, TypeError): - return str(size_bytes) - - -def _e(text): - return html_module.escape(str(text)) if text else "" - - -# --------------------------------------------------------------------------- -# Image resolution – always None (no local preview) -# --------------------------------------------------------------------------- -def _resolve_image_file(db_url, seeker, images_dir): - return None - - -# --------------------------------------------------------------------------- -# Rich HTML cell builders -# --------------------------------------------------------------------------- - - -def _build_image_html(msg_type, img_urls, img_prompts, img_states, resolved_filenames): - """ - Build HTML cell for images. Uses msg_type to determine origin. - """ - if not img_urls: - return "" - - urls = [u.strip() for u in img_urls.split(",") if u.strip()] - prompts = ( - [p.strip() for p in img_prompts.split(",") if p.strip()] if img_prompts else [] - ) - - parts = [] - for i, url in enumerate(urls): - prompt = prompts[i] if i < len(prompts) else "" - filename = resolved_filenames[i] if i < len(resolved_filenames) else None - - cell = '
' - - # Prompt - if prompt: - cell += f'
Prompt: {_e(prompt)}
' - - # Forensic note based on msg_type - if msg_type == 0: # USER → user‑submitted (vision query) - cell += ( - f'
' - f" 📤 User‑submitted image
" - f" This image was uploaded by the device user (e.g., as part of a vision query). " - f" The file content is stored on Firebase Storage and is not cached locally." - f"
" - ) - else: # ASSISTANT or unknown → AI‑generated - cell += ( - f'
' - f" 🤖 AI‑generated image
" - f" This image was created by the AI based on the user prompt. " - f" It is stored on Firebase Storage; a temporary local copy may exist " - f" in cache/image_manager_disk_cache/*.0 but the filename " - f" is a hash of a signed URL that includes a token not stored on the device. " - f" Manual inspection of .0 files is recommended.
" - f" Forensic action: Examine .0 files directly as JPEG." - f"
" - ) - - # Firebase path (was "Internal path") - cell += ( - f'
' - f" Firebase path:
" - f' {_e(url)}' - f"
" - ) - cell += "
" - - if i < len(urls) - 1: - cell += ( - '
' - ) - parts.append(cell) - - return "".join(parts) - - -def _build_document_html(doc_names, doc_mime_types, doc_sizes, doc_urls, doc_types): - """ - Build HTML cell for documents – no local preview, shows Firebase path and forensic note. - """ - if not doc_names: - return "" - - names = [n.strip() for n in doc_names.split(",") if n.strip()] - mimes = ( - [m.strip() for m in doc_mime_types.split(",") if m.strip()] - if doc_mime_types - else [] - ) - sizes = [s.strip() for s in doc_sizes.split(",") if s.strip()] if doc_sizes else [] - urls = [u.strip() for u in doc_urls.split(",") if u.strip()] if doc_urls else [] - types = [t.strip() for t in doc_types.split(",") if t.strip()] if doc_types else [] - - parts = [] - for i, name in enumerate(names): - mime = mimes[i] if i < len(mimes) else "" - size_raw = sizes[i] if i < len(sizes) else None - url = urls[i] if i < len(urls) else "" - dtype_raw = types[i] if i < len(types) else None - - icon = MIME_ICON_MAP.get(mime, "📎") - size_label = ( - _format_file_size(int(size_raw)) - if size_raw and size_raw.lstrip("-").isdigit() - else "" - ) - dtype_label = ( - _resolve_document_type(int(dtype_raw)) - if dtype_raw and dtype_raw.lstrip("-").isdigit() - else "" - ) - - cell = ( - f'
' - f'
{icon}
' - f'
{_e(name)}
' - ) - if mime: - cell += f"
MIME Type: {_e(mime)}
" - if size_label: - cell += f"
Size: {_e(size_label)}
" - if url: - cell += ( - f"
Firebase path:
" - f' {_e(url)}
' - ) - if dtype_label: - cell += f"
Source Type: {_e(dtype_label)}
" - - # Forensic note – same as standalone document module - cell += ( - f'
' - f" ⚠️ Forensic note: This file was submitted by the" - f" user to the AI assistant as part of this conversation." - f"
" - f"
" - ) - - if i < len(names) - 1: - cell += ( - '
' - ) - parts.append(cell) - - return "".join(parts) - - -# --------------------------------------------------------------------------- -# Plain-text builders for TSV / timeline -# --------------------------------------------------------------------------- - - -def _build_image_tsv( - msg_type, img_urls, img_prompts, img_states, img_mime_types, img_pipelines -): - """Flat plain‑text representation for TSV.""" - if not img_urls: - return "" - origin = "user-submitted" if msg_type == 0 else "ai-generated" - parts = [f"Origin: {origin}", f"URL: {img_urls}"] - if img_prompts: - parts.append(f"Prompt: {img_prompts}") - if img_states: - states = ", ".join( - _resolve_image_state(int(s.strip())) - for s in img_states.split(",") - if s.strip().lstrip("-").isdigit() - ) - if states: - parts.append(f"State: {states}") - if img_mime_types: - parts.append(f"MIME: {img_mime_types}") - if img_pipelines: - parts.append(f"Pipeline: {img_pipelines}") - return " | ".join(parts) - - -def _build_document_tsv(doc_names, doc_mime_types, doc_sizes, doc_urls, doc_types): - """Flat plain‑text representation for TSV.""" - if not doc_names: - return "" - size_label = ( - _format_file_size(doc_sizes) - if doc_sizes and doc_sizes.lstrip("-").isdigit() - else "" - ) - dtype_label = "" - if doc_types: - first_type = doc_types.split(",")[0].strip() - if first_type.lstrip("-").isdigit(): - dtype_label = _resolve_document_type(int(first_type)) - else: - dtype_label = first_type - parts = [f"Name: {doc_names}"] - if doc_mime_types: - parts.append(f"MIME: {doc_mime_types}") - if size_label: - parts.append(f"Size: {size_label}") - if doc_urls: - parts.append(f"Path: {doc_urls}") - if dtype_label: - parts.append(f"Type: {dtype_label}") - parts.append("Note: File submitted by user to AI assistant") - return " | ".join(parts) - - -# --------------------------------------------------------------------------- -# SQL (unchanged) -# --------------------------------------------------------------------------- QUERY = """ SELECT h.id AS conv_id, @@ -405,175 +90,234 @@ def _build_document_tsv(doc_names, doc_mime_types, doc_sizes, doc_urls, doc_type GROUP_CONCAT(DISTINCT hdi.url) AS img_urls, GROUP_CONCAT(DISTINCT hdi.prompt) AS img_prompts, - GROUP_CONCAT(DISTINCT hdi.state) AS img_states, - GROUP_CONCAT(DISTINCT hdi.mimeType) AS img_mime_types, - GROUP_CONCAT(DISTINCT hdi.pipeline) AS img_pipelines, GROUP_CONCAT(DISTINCT hdd.name) AS doc_names, - GROUP_CONCAT(DISTINCT hdd.mimeType) AS doc_mime_types, - GROUP_CONCAT(DISTINCT hdd.size) AS doc_sizes, + GROUP_CONCAT(DISTINCT hdd.mimeType) AS doc_mimes, GROUP_CONCAT(DISTINCT hdd.url) AS doc_urls, - GROUP_CONCAT(DISTINCT hdd.type) AS doc_types, GROUP_CONCAT(DISTINCT hdl.url) AS link_urls FROM History h -INNER JOIN HistoryDetail hd - ON hd.historyID = h.id -LEFT JOIN HistoryDetailImage hdi - ON hdi.historyDetailID = hd.id -LEFT JOIN HistoryDetailDocument hdd - ON hdd.historyDetailID = hd.id -LEFT JOIN HistoryDetailLink hdl - ON hdl.historyDetailID = hd.id +INNER JOIN HistoryDetail hd ON hd.historyID = h.id +LEFT JOIN HistoryDetailImage hdi ON hdi.historyDetailID = hd.id +LEFT JOIN HistoryDetailDocument hdd ON hdd.historyDetailID = hd.id +LEFT JOIN HistoryDetailLink hdl ON hdl.historyDetailID = hd.id GROUP BY hd.id ORDER BY h.id ASC, hd.createdAt ASC """ -# --------------------------------------------------------------------------- -# Entry point -# --------------------------------------------------------------------------- + +def _parse_path(raw_path): + """Normalizes paths and slices them to always start at /data.""" + if not raw_path: + return "" + normalized = str(raw_path).replace("\\", "/") + if "/data/" in normalized: + return "/data/" + normalized.split("/data/", 1)[1] + elif "data/data/" in normalized: + return "/data/data/" + normalized.split("data/data/", 1)[1] + return normalized + + +def _convert_ms_timestamp(ms): + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" + if ms is None: + return "" + try: + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) def get_nova_chatbot_conversations(files_found, report_folder, seeker, wrap_text): - for file_found in files_found: - file_found = str(file_found) - if not file_found.endswith("chat-ai.db"): - continue + logfunc("Processing data for Nova Full Conversations") + + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + nova_db = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) + media_db = next( + ( + str(x) + for x in files_found + if "external" in str(x) and str(x).endswith(".db") + ), + None, + ) + + if not nova_db: + logfunc("[nova_chatbot_conversations] Nova database file not found.") + return + extraction_root = getattr(seeker, "search_dir", "") or "" + media_lookup = {} + + # 1. Map the MediaStore database entries to verify files present on local storage + if media_db: try: - db = sqlite3.connect(file_found) - cursor = db.cursor() - cursor.execute(QUERY) - rows_raw = cursor.fetchall() + db = open_sqlite_db_readonly(media_db) + cur = db.cursor() + cur.execute(""" + SELECT _display_name, _data + FROM files + WHERE _data IS NOT NULL + """) + for display_name, data_path in cur.fetchall(): + local_path = "" + if data_path: + clean_rel = str(data_path).replace("\\", "/").lstrip("/") + if clean_rel.startswith("storage/emulated/0/"): + clean_rel = clean_rel.replace( + "storage/emulated/0/", "data/media/0/", 1 + ) + + candidate_path = os.path.join(extraction_root, clean_rel) + if os.path.exists(candidate_path): + local_path = candidate_path + + key = (display_name or os.path.basename(str(data_path))).lower() + media_lookup[key] = {"data_path": data_path, "local_path": local_path} db.close() except Exception as e: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_conversations] Error reading {file_found}: {e}" + logfunc( + f"[nova_chatbot_conversations] Error building MediaStore lookup: {e}" ) - continue - if not rows_raw: - scripts.ilapfuncs.logfunc( - f"[nova_chatbot_conversations] No records found in {file_found}." - ) - continue - - images_dir = os.path.join(report_folder, "nova_images") - os.makedirs(images_dir, exist_ok=True) - - headers = [ - "Conv. ID", - "Conv. UUID", - "Conv. Title", - "AI Model", - "Conv. Deleted", - "Conv. Sync State", - "Msg. ID", - "Msg. UUID", - "Role", - "Message Text", - "Token Count", - "Reasoning Content", - "Message Timestamp (UTC)", - "Msg. Sync State", - "Image Attachment", - "Document Attachment", - "Link URL(s)", - ] - - html_rows = [] - tsv_rows = [] - - for row in rows_raw: - ( - conv_id, - conv_uuid, - conv_title, - chat_bot_model, - soft_deleted, - conv_sync_state, - msg_id, - msg_uuid, - msg_type, - msg_text, - msg_token, - msg_reasoning, - msg_created_at, - msg_sync_state, - img_urls, - img_prompts, - img_states, - img_mime_types, - img_pipelines, - doc_names, - doc_mime_types, - doc_sizes, - doc_urls, - doc_types, - link_urls, - ) = row - - # Resolve images (always None, but we need a list parallel to URLs) - resolved_filenames = [] - if img_urls: - for raw_url in img_urls.split(","): - raw_url = raw_url.strip() - resolved_filenames.append( - _resolve_image_file(raw_url, seeker, images_dir) - ) - - # Common scalar columns - common = ( - conv_id, - conv_uuid or "", - conv_title or "", - _resolve_model(chat_bot_model), - _format_soft_deleted(soft_deleted), - conv_sync_state if conv_sync_state is not None else "", - msg_id, - msg_uuid or "", - _format_role(msg_type), - msg_text or "", - msg_token if msg_token is not None else "", - msg_reasoning or "", - _convert_ms_timestamp(msg_created_at), - msg_sync_state if msg_sync_state is not None else "", - ) + # 2. Query chat timeline logs + try: + db = open_sqlite_db_readonly(nova_db) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_conversations] Error reading {nova_db}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_conversations] No records found in {nova_db}.") + return + + headers = ( + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Msg. ID", + "Msg. UUID", + "Role", + "Message Text", + "Token Count", + "Reasoning Content", + "Message Timestamp (UTC)", + "Image Attachment Prompts", + "Image Physical Path", + "Image Firebase Path", + "Document Attachment Name", + "Document Physical Path", + "Document Firebase Path", + "Link URL(s)", + ) - # HTML cells - img_html = _build_image_html( - msg_type, img_urls, img_prompts, img_states, resolved_filenames + rows = [] + for row in rows_raw: + model_int = row[3] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" ) - doc_html = _build_document_html( - doc_names, doc_mime_types, doc_sizes, doc_urls, doc_types - ) - html_rows.append(common + (img_html, doc_html, link_urls or "")) - - # TSV cells - img_tsv = _build_image_tsv( - msg_type, - img_urls, - img_prompts, - img_states, - img_mime_types, - img_pipelines, - ) - doc_tsv = _build_document_tsv( - doc_names, doc_mime_types, doc_sizes, doc_urls, doc_types + + raw_role = row[8] + role_str = ( + "USER" + if raw_role == 0 + else "AI ASSISTANT" + if raw_role == 1 + else f"UNKNOWN ({raw_role})" + ) + + # A. Cross-reference documents using mapped MediaStore indices + doc_names_raw = row[16] + doc_phys_path_resolved = "Cloud-only (Firebase Storage)" + + if doc_names_raw: + primary_doc = doc_names_raw.split(",")[0].strip() + match_key = primary_doc.lower() + if match_key in media_lookup: + match = media_lookup[match_key] + if match["local_path"]: + media_to_html(primary_doc, match["local_path"], report_folder) + doc_phys_path_resolved = _parse_path(match["data_path"]) + + # B. Cross-reference images inside conversation rows for local paths if available + img_urls_raw = row[14] + img_phys_path_resolved = "Cloud-only (Firebase Storage)" + + if img_urls_raw: + # We evaluate the first image in the group for table display mapping + primary_img_url = img_urls_raw.split(",")[0].strip() + # Clean remote arguments out if present inside URL structures + img_name = os.path.basename(primary_img_url.split("?")[0]) + img_key = img_name.lower() + + if img_key in media_lookup: + match = media_lookup[img_key] + if match["local_path"]: + media_to_html(img_name, match["local_path"], report_folder) + img_phys_path_resolved = _parse_path(match["data_path"]) + + # Also ensure any secondary images in a comma-separated list pass safely into the gallery pipelines + if "," in img_urls_raw: + for url_part in img_urls_raw.split(",")[1:]: + sec_name = os.path.basename(url_part.strip().split("?")[0]) + sec_key = sec_name.lower() + if sec_key in media_lookup and media_lookup[sec_key]["local_path"]: + media_to_html( + sec_name, media_lookup[sec_key]["local_path"], report_folder + ) + + rows.append( + ( + row[0], + row[1] or "", + row[2] or "", + model_name, + "DELETED" if row[4] == 1 else "No", + row[6], + row[7] or "", + role_str, + row[9] or "", + row[10] if row[10] is not None else "", + row[11] or "", + _convert_ms_timestamp(row[12]), + row[15] or "", + img_phys_path_resolved, + row[14] or "", + row[16] or "", + doc_phys_path_resolved, + row[18] or "", + row[19] or "", ) - tsv_rows.append(common + (img_tsv, doc_tsv, link_urls or "")) - - # HTML report - report_name = "Conversations (Full Detail)" - report = ArtifactHtmlReport(report_name) - report.start_artifact_report(report_folder, report_name) - report.add_script() - report.write_artifact_data_table( - headers, html_rows, file_found, html_escape=False ) - report.end_artifact_report() - # TSV and timeline - scripts.ilapfuncs.tsv(report_folder, headers, tsv_rows, report_name, file_found) - scripts.ilapfuncs.timeline(report_folder, report_name, tsv_rows, headers) + report_name = "Conversations (Full Detail)" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # Enforce strict framework-side HTML cell escaping to protect against code injection + report.write_artifact_data_table(headers, rows, nova_db, html_escape=True) + report.end_artifact_report() + + tsv(report_folder, headers, rows, report_name, nova_db) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_conversations] Processed {len(rows)} timeline message items." + ) diff --git a/scripts/artifacts/AIChatbotNovaMediastore.py b/scripts/artifacts/AIChatbotNovaMediastore.py index 451fcd07..2006f28f 100644 --- a/scripts/artifacts/AIChatbotNovaMediastore.py +++ b/scripts/artifacts/AIChatbotNovaMediastore.py @@ -3,12 +3,12 @@ "name": "User Media Submissions", "description": ( "Identifies media files submitted by the user to Nova AI Chatbot, including " - "uploaded documents and photos captured using the in-app camera. The artifact " - "lists recovered filenames, conversation context, timestamps, MIME types, and resolved " - "physical paths from the extracted filesystem." + "uploaded documents, chat-attached images, and photos captured using the in-app camera. " + "The artifact lists recovered filenames, conversation context, timestamps, MIME types, " + "and resolved physical paths from the extracted filesystem." ), "author": "Guilherme Guilherme", - "version": "3.3", + "version": "3.4", "date": "2026-05-21", "requirements": "none", "category": "AI Chatbot - Nova", @@ -97,7 +97,7 @@ def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): all_items = [] - # 2. Process chat database attachments and cross-reference with MediaStore + # 2. Process chat database documents and cross-reference with MediaStore try: db = open_sqlite_db_readonly(nova_db) cur = db.cursor() @@ -138,7 +138,6 @@ def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): if match_key in media_lookup: match = media_lookup[match_key] if match["local_path"]: - # Correctly links local images into ALEAPP's standard thumb pipeline media_to_html(file_name, match["local_path"], report_folder) display_path = _parse_path(match["data_path"]) else: @@ -147,7 +146,7 @@ def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): all_items.append( ( file_name or "Unknown", - "Submitted to AI", + "Submitted Document", message or "", conversation or "Untitled", conv_uuid or "", @@ -161,7 +160,83 @@ def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): except Exception as e: logfunc(f"[nova_user_submissions] Error querying documents: {e}") - # 3. Process standalone camera storage entries matching the application context + # 3. New: Process user chat-submitted images (HistoryDetailImage) + try: + db = open_sqlite_db_readonly(nova_db) + cur = db.cursor() + cur.execute(""" + SELECT + hdi.url, + hdi.prompt, + hd.text, + hd.createdAt, + h.title, + h.UUID + FROM HistoryDetailImage hdi + INNER JOIN HistoryDetail hd ON hd.id = hdi.historyDetailID + INNER JOIN History h ON h.id = hd.historyID + WHERE hd.type = 0 + ORDER BY hd.createdAt DESC + """) + for ( + img_url, + prompt, + message, + created_at, + conversation, + conv_uuid, + ) in cur.fetchall(): + mtime_str = "" + if created_at: + try: + mtime_str = datetime.datetime.fromtimestamp( + float(created_at) / 1000, timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S UTC") + except Exception: + mtime_str = str(created_at) + + # Extract the raw filename out of the remote url endpoint path + file_name = ( + os.path.basename(img_url.split("?")[0]) + if img_url + else "Unknown_Image.jpg" + ) + + # Form an inline context blending user's text message input with any associated image generation prompts + context_pieces = [] + if message: + context_pieces.append(f"Msg: {message}") + if prompt: + context_pieces.append(f"Prompt: {prompt}") + combined_context = " | ".join(context_pieces) + + match_key = file_name.lower() + if match_key in media_lookup: + match = media_lookup[match_key] + if match["local_path"]: + media_to_html(file_name, match["local_path"], report_folder) + display_path = _parse_path(match["data_path"]) + else: + display_path = "Cloud-only (Firebase Storage)" + + all_items.append( + ( + file_name, + "Submitted Image", + combined_context, + conversation or "Untitled", + conv_uuid or "", + mtime_str, + "", # Size metadata is typically absent or cloud-side for image mappings + "image/jpeg", + display_path, + ) + ) + db.close() + except Exception as e: + logfunc(f"[nova_user_submissions] Error querying submitted images: {e}") + + # 4. Process standalone camera storage entries matching the application context if media_db: try: db = open_sqlite_db_readonly(media_db) @@ -202,7 +277,7 @@ def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): "Camera Photo", "", "Camera photo (not associated with a message)", - "", # Camera pictures do not have an associated conversation UUID + "", mtime_str, size if size is not None else "", mime_type or "image/jpeg", @@ -221,7 +296,6 @@ def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): deduped = [] seen = set() for row in all_items: - # Deduplicate using filename (index 0) and physical path data (index 8) key = (row[0].lower(), row[8]) if key in seen: continue @@ -245,7 +319,6 @@ def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): "Path", ) - # Compliant HTML injection vulnerability protection handled securely by the framework via DataTables report.write_artifact_data_table( headers, deduped,