diff --git a/scripts/artifacts/AIChatbotNovaCachedImages.py b/scripts/artifacts/AIChatbotNovaCachedImages.py new file mode 100644 index 00000000..8ccf705d --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaCachedImages.py @@ -0,0 +1,94 @@ +__artifacts_v2__ = { + "nova_cache_images": { + "name": "Cached Images (Glide Disk Cache)", + "description": ( + "Extracts cached image files from the Nova AI Chatbot Glide disk cache " + "(cache/image_manager_disk_cache/*.0). These files are raw JPEG images " + "downloaded from Firebase Storage and cached locally." + ), + "author": "Guilherme Guilherme", + "version": "2.0", + "date": "2026-05-21", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": "Glide disk cache location: cache/image_manager_disk_cache/*.0.", + "paths": ( + "*/com.scaleup.chatai/cache/image_manager_disk_cache/*", + "*/data/data/com.scaleup.chatai/cache/image_manager_disk_cache/*", + ), + "function": "get_nova_cache_images", + "output_types": "standard", + "artifact_icon": "image", + } +} + +import os +import datetime +from datetime import timezone +from scripts.artifact_report import ArtifactHtmlReport +from scripts.ilapfuncs import logfunc, tsv, media_to_html + + +def get_nova_cache_images(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for Nova Cached Images") + + data_list = [] + + for file_found in files_found: + file_found = str(file_found) + if os.path.isdir(file_found): + continue + + fname = os.path.basename(file_found) + if not fname.endswith(".0"): + continue + + try: + stat = os.stat(file_found) + size_bytes = stat.st_size + + # Modern, non-deprecated timezone conversion + mtime = datetime.datetime.fromtimestamp(stat.st_mtime, timezone.utc) + mtime_str = mtime.strftime("%Y-%m-%d %H:%M:%S UTC") + + # Mandatory framework call: copies images to output structure and populates LAVA tracking manifests + media_to_html(fname, file_found, report_folder) + + # Parse path so it consistently normalizes from the extraction /data node onward + normalized_path = file_found.replace("\\", "/") + if "/data/" in normalized_path: + display_path = "/data/" + normalized_path.split("/data/", 1)[1] + elif "data/data/" in normalized_path: + display_path = "/data/data/" + normalized_path.split("data/data/", 1)[1] + else: + display_path = normalized_path + + data_list.append((fname, size_bytes, mtime_str, display_path)) + + except Exception as e: + logfunc(f"[nova_cache_images] Error reading {file_found}: {e}") + + if not data_list: + logfunc("No Nova Cached Images data found.") + return + + report_name = "Cached Images" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + headers = ( + "Original Cache Filename", + "File Size (Bytes)", + "Last Modified (UTC)", + "Path", + ) + + # HTML injection vulnerabilities are entirely eliminated by delegating escaping to the framework + report.write_artifact_data_table( + headers, data_list, report_folder, table_id="NovaCacheImages", html_escape=True + ) + report.end_artifact_report() + + tsv(report_folder, headers, data_list, report_name) + logfunc(f"[nova_cache_images] Displayed {len(data_list)} cached image entries.") diff --git a/scripts/artifacts/AIChatbotNovaConversations.py b/scripts/artifacts/AIChatbotNovaConversations.py new file mode 100644 index 00000000..714b9790 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaConversations.py @@ -0,0 +1,323 @@ +__artifacts_v2__ = { + "nova_chatbot_conversations": { + "name": "Conversations (Full Detail)", + "description": ( + "Reconstructs full conversations from the AI Chatbot - Nova app by joining " + "History, HistoryDetail, HistoryDetailImage, HistoryDetailDocument, and " + "HistoryDetailLink tables. Cross-references local file attachments with the " + "Android MediaStore database to map real physical file storage paths for both " + "documents and user-submitted images." + ), + "author": "Guilherme Guilherme", + "version": "1.1", + "date": "2026-05-21", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": "Sources: chat-ai.db and Android MediaStore databases.", + "paths": ( + "*/com.scaleup.chatai/databases/chat-ai.db", + "*/com.android.providers.media/databases/external*.db", + "*/com.google.android.providers.media.module/databases/external*.db", + ), + "function": "get_nova_chatbot_conversations", + "output_types": "standard", + "artifact_icon": "message-square", + } +} + +import os +import datetime +from datetime import timezone +from scripts.artifact_report import ArtifactHtmlReport +from scripts.ilapfuncs import ( + logfunc, + tsv, + timeline, + open_sqlite_db_readonly, + media_to_html, +) + +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", +} + +QUERY = """ +SELECT + h.id AS conv_id, + h.UUID AS conv_uuid, + h.title AS conv_title, + h.chatBotModel AS chat_bot_model, + h.softDeleted AS soft_deleted, + h.syncState AS conv_sync_state, + + hd.id AS msg_id, + hd.UUID AS msg_uuid, + hd.type AS msg_type, + hd.text AS msg_text, + hd.token AS msg_token, + hd.reasoningContent AS msg_reasoning, + hd.createdAt AS msg_created_at, + hd.syncState AS msg_sync_state, + + GROUP_CONCAT(DISTINCT hdi.url) AS img_urls, + GROUP_CONCAT(DISTINCT hdi.prompt) AS img_prompts, + + GROUP_CONCAT(DISTINCT hdd.name) AS doc_names, + GROUP_CONCAT(DISTINCT hdd.mimeType) AS doc_mimes, + GROUP_CONCAT(DISTINCT hdd.url) AS doc_urls, + + GROUP_CONCAT(DISTINCT hdl.url) AS link_urls + +FROM History h +INNER JOIN HistoryDetail hd ON hd.historyID = h.id +LEFT JOIN HistoryDetailImage hdi ON hdi.historyDetailID = hd.id +LEFT JOIN HistoryDetailDocument hdd ON hdd.historyDetailID = hd.id +LEFT JOIN HistoryDetailLink hdl ON hdl.historyDetailID = hd.id +GROUP BY hd.id +ORDER BY h.id ASC, hd.createdAt ASC +""" + + +def _parse_path(raw_path): + """Normalizes paths and slices them to always start at /data.""" + if not raw_path: + return "" + normalized = str(raw_path).replace("\\", "/") + if "/data/" in normalized: + return "/data/" + normalized.split("/data/", 1)[1] + elif "data/data/" in normalized: + return "/data/data/" + normalized.split("data/data/", 1)[1] + return normalized + + +def _convert_ms_timestamp(ms): + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" + if ms is None: + return "" + try: + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def get_nova_chatbot_conversations(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for Nova Full Conversations") + + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + nova_db = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) + media_db = next( + ( + str(x) + for x in files_found + if "external" in str(x) and str(x).endswith(".db") + ), + None, + ) + + if not nova_db: + logfunc("[nova_chatbot_conversations] Nova database file not found.") + return + + extraction_root = getattr(seeker, "search_dir", "") or "" + media_lookup = {} + + # 1. Map the MediaStore database entries to verify files present on local storage + if media_db: + try: + db = open_sqlite_db_readonly(media_db) + cur = db.cursor() + cur.execute(""" + SELECT _display_name, _data + FROM files + WHERE _data IS NOT NULL + """) + for display_name, data_path in cur.fetchall(): + local_path = "" + if data_path: + clean_rel = str(data_path).replace("\\", "/").lstrip("/") + if clean_rel.startswith("storage/emulated/0/"): + clean_rel = clean_rel.replace( + "storage/emulated/0/", "data/media/0/", 1 + ) + + candidate_path = os.path.join(extraction_root, clean_rel) + if os.path.exists(candidate_path): + local_path = candidate_path + + key = (display_name or os.path.basename(str(data_path))).lower() + media_lookup[key] = {"data_path": data_path, "local_path": local_path} + db.close() + except Exception as e: + logfunc( + f"[nova_chatbot_conversations] Error building MediaStore lookup: {e}" + ) + + # 2. Query chat timeline logs + try: + db = open_sqlite_db_readonly(nova_db) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_conversations] Error reading {nova_db}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_conversations] No records found in {nova_db}.") + return + + headers = ( + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Msg. ID", + "Msg. UUID", + "Role", + "Message Text", + "Token Count", + "Reasoning Content", + "Message Timestamp (UTC)", + "Image Attachment Prompts", + "Image Physical Path", + "Image Firebase Path", + "Document Attachment Name", + "Document Physical Path", + "Document Firebase Path", + "Link URL(s)", + ) + + rows = [] + for row in rows_raw: + model_int = row[3] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" + ) + + raw_role = row[8] + role_str = ( + "USER" + if raw_role == 0 + else "AI ASSISTANT" + if raw_role == 1 + else f"UNKNOWN ({raw_role})" + ) + + # A. Cross-reference documents using mapped MediaStore indices + doc_names_raw = row[16] + doc_phys_path_resolved = "Cloud-only (Firebase Storage)" + + if doc_names_raw: + primary_doc = doc_names_raw.split(",")[0].strip() + match_key = primary_doc.lower() + if match_key in media_lookup: + match = media_lookup[match_key] + if match["local_path"]: + media_to_html(primary_doc, match["local_path"], report_folder) + doc_phys_path_resolved = _parse_path(match["data_path"]) + + # B. Cross-reference images inside conversation rows for local paths if available + img_urls_raw = row[14] + img_phys_path_resolved = "Cloud-only (Firebase Storage)" + + if img_urls_raw: + # We evaluate the first image in the group for table display mapping + primary_img_url = img_urls_raw.split(",")[0].strip() + # Clean remote arguments out if present inside URL structures + img_name = os.path.basename(primary_img_url.split("?")[0]) + img_key = img_name.lower() + + if img_key in media_lookup: + match = media_lookup[img_key] + if match["local_path"]: + media_to_html(img_name, match["local_path"], report_folder) + img_phys_path_resolved = _parse_path(match["data_path"]) + + # Also ensure any secondary images in a comma-separated list pass safely into the gallery pipelines + if "," in img_urls_raw: + for url_part in img_urls_raw.split(",")[1:]: + sec_name = os.path.basename(url_part.strip().split("?")[0]) + sec_key = sec_name.lower() + if sec_key in media_lookup and media_lookup[sec_key]["local_path"]: + media_to_html( + sec_name, media_lookup[sec_key]["local_path"], report_folder + ) + + rows.append( + ( + row[0], + row[1] or "", + row[2] or "", + model_name, + "DELETED" if row[4] == 1 else "No", + row[6], + row[7] or "", + role_str, + row[9] or "", + row[10] if row[10] is not None else "", + row[11] or "", + _convert_ms_timestamp(row[12]), + row[15] or "", + img_phys_path_resolved, + row[14] or "", + row[16] or "", + doc_phys_path_resolved, + row[18] or "", + row[19] or "", + ) + ) + + report_name = "Conversations (Full Detail)" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # Enforce strict framework-side HTML cell escaping to protect against code injection + report.write_artifact_data_table(headers, rows, nova_db, html_escape=True) + report.end_artifact_report() + + tsv(report_folder, headers, rows, report_name, nova_db) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_conversations] Processed {len(rows)} timeline message items." + ) diff --git a/scripts/artifacts/AIChatbotNovaHistory.py b/scripts/artifacts/AIChatbotNovaHistory.py new file mode 100644 index 00000000..3ff37131 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistory.py @@ -0,0 +1,188 @@ +__artifacts_v2__ = { + "nova_chatbot_history": { + "name": "Conversation History", + "description": ( + "Extracts the conversation index from the AI Chatbot - Nova app " + "(com.scaleup.chatai) from the History table." + ), + "author": "Guilherme Guilherme", + "version": "1.0", + "date": "2026-05-21", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": "Database: com.scaleup.chatai/databases/chat-ai.db", + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_history", + "output_types": "standard", + "artifact_icon": "message-square", + } +} + +import datetime +from datetime import timezone +from scripts.artifact_report import ArtifactHtmlReport +from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly + +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", +} + +QUERY = """ +SELECT + h.id, + h.UUID, + h.title, + h.chatBotModel, + h.assistantId, + h.captionHistoryId, + h.starred, + h.softDeleted, + h.syncState, + h.syncRetryCount, + h.createdAt, + h.updatedAt, + h.lastModifiedAt, + COUNT(hd.id), + MAX(hd.createdAt), + MIN(CASE WHEN hd.type = 0 THEN hd.text END) +FROM History h +LEFT JOIN HistoryDetail hd ON hd.historyID = h.id +GROUP BY h.id +ORDER BY h.createdAt ASC +""" + + +def _convert_ms_timestamp(ms): + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" + if ms is None: + return "" + try: + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def get_nova_chatbot_history(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for Conversation History") + + # Clean the file list of any SQLite journal artifacts + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + file_found = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) + + if not file_found: + logfunc("[nova_chatbot_history] Nova database file not found.") + return + + try: + db = open_sqlite_db_readonly(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_history] Error reading {file_found}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_history] No records found in {file_found}.") + return + + headers = ( + "Conv. ID", + "Conv. UUID", + "Title", + "AI Model", + "Assistant ID", + "Caption History ID", + "Starred", + "Soft Deleted", + "Sync State", + "Sync Retry Count", + "Created At (UTC)", + "Updated At (UTC)", + "Last Modified At (UTC)", + "Message Count", + "Last Message At (UTC)", + "First User Message", + ) + + rows = [] + for row in rows_raw: + model_int = row[3] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" + ) + + rows.append( + ( + row[0], + row[1] or "", + row[2] or "", + model_name, + row[4] if row[4] is not None else "", + row[5] or "", + "Yes" if row[6] else "No", + "DELETED" if row[7] == 1 else "No", + row[8] if row[8] is not None else "", + row[9] if row[9] is not None else "", + _convert_ms_timestamp(row[10]), + _convert_ms_timestamp(row[11]), + _convert_ms_timestamp(row[12]), + row[13] if row[13] is not None else 0, + _convert_ms_timestamp(row[14]), + row[15] or "", + ) + ) + + report_name = "History" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # html_escape=True hands escaping entirely to the framework backend safely + report.write_artifact_data_table(headers, rows, file_found, html_escape=True) + report.end_artifact_report() + + tsv(report_folder, headers, rows, report_name, file_found) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_history] Processed {len(rows)} conversation history records." + ) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetail.py b/scripts/artifacts/AIChatbotNovaHistoryDetail.py new file mode 100644 index 00000000..f146b0a1 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistoryDetail.py @@ -0,0 +1,203 @@ +__artifacts_v2__ = { + "nova_chatbot_history_detail": { + "name": "HistoryDetail", + "description": ( + "Extracts every individual message from the AI Chatbot - Nova app " + "(com.scaleup.chatai) from the HistoryDetail table, enriched with parent " + "conversation context and attachment existence flags." + ), + "author": "Guilherme Guilherme", + "version": "1.0", + "date": "2026-05-21", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": "Database: com.scaleup.chatai/databases/chat-ai.db", + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_history_detail", + "output_types": "standard", + "artifact_icon": "message-circle", + } +} + +import datetime +from datetime import timezone +from scripts.artifact_report import ArtifactHtmlReport +from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly + +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", +} + +QUERY = """ +SELECT + hd.id, + hd.UUID, + hd.historyID, + h.UUID, + h.title, + h.chatBotModel, + h.softDeleted, + hd.type, + hd.text, + hd.token, + hd.reasoningContent, + hd.createdAt, + hd.lastModifiedAt, + hd.syncState, + hd.syncRetryCount, + EXISTS(SELECT 1 FROM HistoryDetailImage i WHERE i.historyDetailID = hd.id), + EXISTS(SELECT 1 FROM HistoryDetailDocument d WHERE d.historyDetailID = hd.id), + EXISTS(SELECT 1 FROM HistoryDetailLink l WHERE l.historyDetailID = hd.id) +FROM HistoryDetail hd +INNER JOIN History h ON h.id = hd.historyID +ORDER BY hd.historyID ASC, hd.createdAt ASC +""" + + +def _convert_ms_timestamp(ms): + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" + if ms is None: + return "" + try: + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def get_nova_chatbot_history_detail(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for HistoryDetail") + + # Filter out secondary transactional database engines + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + file_found = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) + + if not file_found: + logfunc("[nova_chatbot_history_detail] Nova database file not found.") + return + + try: + db = open_sqlite_db_readonly(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_history_detail] Error reading {file_found}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_history_detail] No records found in {file_found}.") + return + + headers = ( + "Msg. ID", + "Msg. UUID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Role", + "Message Text", + "Token Count", + "Reasoning Content", + "Message Timestamp (UTC)", + "Last Modified At (UTC)", + "Sync State", + "Sync Retry Count", + "Has Image", + "Has Document", + "Has Link", + ) + + rows = [] + for row in rows_raw: + model_int = row[5] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" + ) + + role_int = row[7] + role_label = ( + "USER" + if role_int == 0 + else "ASSISTANT" + if role_int == 1 + else f"UNKNOWN ({role_int})" + ) + + rows.append( + ( + row[0], + row[1] or "", + row[2], + row[3] or "", + row[4] or "", + model_name, + "DELETED" if row[6] == 1 else "No", + role_label, + row[8] or "", + row[9] if row[9] is not None else "", + row[10] or "", + _convert_ms_timestamp(row[11]), + _convert_ms_timestamp(row[12]), + row[13] if row[13] is not None else "", + row[14] if row[14] is not None else "", + "Yes" if row[15] else "No", + "Yes" if row[16] else "No", + "Yes" if row[17] else "No", + ) + ) + + report_name = "HistoryDetail" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # Delegates script-side sanitization directly to framework tables safely + report.write_artifact_data_table(headers, rows, file_found, html_escape=True) + report.end_artifact_report() + + tsv(report_folder, headers, rows, report_name, file_found) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_history_detail] Processed {len(rows)} message detail records." + ) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py new file mode 100644 index 00000000..91d4cf5b --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailDocument.py @@ -0,0 +1,222 @@ +__artifacts_v2__ = { + "nova_chatbot_documents": { + "name": "HistoryDetailDocuments", + "description": ( + "Extracts document records submitted by the user to the AI from the " + "HistoryDetailDocument table, enriched with parent message and conversation context." + ), + "author": "Guilherme Guilherme", + "version": "1.0", + "date": "2026-05-21", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": "Database: com.scaleup.chatai/databases/chat-ai.db", + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_documents", + "output_types": "standard", + "artifact_icon": "file-text", + } +} + +import datetime +from datetime import timezone +from scripts.artifact_report import ArtifactHtmlReport +from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly + +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", +} + +QUERY = """ +SELECT + d.id, + d.historyDetailID, + d.url, + d.name, + d.type, + d.size, + d.mimeType, + hd.historyID, + hd.type, + hd.text, + hd.createdAt, + h.UUID, + h.title, + h.chatBotModel, + h.softDeleted +FROM HistoryDetailDocument d +INNER JOIN HistoryDetail hd ON hd.id = d.historyDetailID +INNER JOIN History h ON h.id = hd.historyID +ORDER BY d.id ASC +""" + + +def _convert_ms_timestamp(ms): + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" + if ms is None: + return "" + try: + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def _format_file_size(size_bytes): + if size_bytes is None: + return "" + try: + size_bytes = int(size_bytes) + if size_bytes < 1024: + return f"{size_bytes} B" + elif size_bytes < 1024**2: + return f"{size_bytes / 1024:.1f} KB" + else: + return f"{size_bytes / (1024**2):.1f} MB" + except (ValueError, TypeError): + return str(size_bytes) + + +def get_nova_chatbot_documents(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for HistoryDetail Submitted Documents") + + # Filter out secondary transactional database engines + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + file_found = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) + + if not file_found: + logfunc("[nova_chatbot_documents] Nova database file not found.") + return + + try: + db = open_sqlite_db_readonly(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_documents] Error reading {file_found}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_documents] No document records found in {file_found}.") + return + + headers = ( + "Doc. ID", + "Msg. ID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Media Submitted By", + "Msg. Text", + "Msg. Timestamp (UTC)", + "File Name", + "MIME Type", + "Size", + "Source Type", + "Firebase Storage Path", + "Forensic Notes", + ) + + rows = [] + for row in rows_raw: + model_int = row[13] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" + ) + + doc_type_int = row[4] + doc_type_label = ( + "Local File" + if doc_type_int == 0 + else "Remote File" + if doc_type_int == 1 + else f"Unknown ({doc_type_int})" + ) + + raw_role = row[8] + if raw_role == 0: + submitted_by = "USER" + forensic_note = "Media element actively selected and submitted by the user to the chatbot interface." + elif raw_role == 1: + submitted_by = "AI ASSISTANT" + forensic_note = "Media element generated or provided back by the AI response." + else: + submitted_by = f"UNKNOWN ({raw_role})" + forensic_note = "Unknown structural context for media origin." + + rows.append( + ( + row[0], + row[1], + row[7], + row[11] or "", + row[12] or "", + model_name, + "DELETED" if row[14] == 1 else "No", + submitted_by, + row[9] or "", + _convert_ms_timestamp(row[10]), + row[3] or "Unknown", + row[6] or "", + _format_file_size(row[5]), + doc_type_label, + row[2] or "", + forensic_note, + ) + ) + + report_name = "HistoryDetailDocuments" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # Enforce safe framework-side text escaping to block script injection vectors entirely + report.write_artifact_data_table(headers, rows, file_found, html_escape=True) + report.end_artifact_report() + + tsv(report_folder, headers, rows, report_name, file_found) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_documents] Processed {len(rows)} submitted document records." + ) \ No newline at end of file diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py new file mode 100644 index 00000000..6991de63 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailImage.py @@ -0,0 +1,219 @@ +__artifacts_v2__ = { + "nova_chatbot_images": { + "name": "HistoryDetailImages", + "description": ( + "Extracts user-submitted image links and AI-generated image records from the " + "HistoryDetailImage table, enriched with parent message and conversation context." + ), + "author": "Guilherme Guilherme", + "version": "1.0", + "date": "2026-05-21", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": "Database: com.scaleup.chatai/databases/chat-ai.db", + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_images", + "output_types": "standard", + "artifact_icon": "image", + } +} + +import datetime +from datetime import timezone +from scripts.artifact_report import ArtifactHtmlReport +from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly + +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", + 1: "GPT-5", + 2: "GPT-4o", + 3: "Bard / Image Gen.", + 4: "Image Generator", + 5: "Vision", + 6: "Google Vision", + 7: "Document", + 8: "LLaMA 2", + 9: "Nova", + 10: "Gemini", + 11: "Superbot", + 12: "Logo Generator", + 13: "Tattoo Generator", + 14: "Web Search", + 15: "Claude", + 16: "DeepSeek", + 17: "Signature Generator", + 18: "Mistral", + 19: "Grok", + 20: "DeepSeek R1", + 21: "AI Filter", + 22: "Voice Chat", + 23: "Snap & Solve", + 24: "Study Planner", + 25: "Quiz Maker", + 26: "Essay Helper", + 27: "Gemini 3 Pro", + 28: "GPT-5.1", + 29: "GPT-4o Mini", +} + +IMAGE_STATE_MAP = { + 0: "Pending", + 1: "Success", + 2: "Failed", +} + +QUERY = """ +SELECT + i.id, + i.historyDetailID, + i.url, + i.prompt, + i.state, + i.mimeType, + i.styleId, + i.pipeline, + hd.historyID, + hd.type, + hd.text, + hd.createdAt, + h.UUID, + h.title, + h.chatBotModel, + h.softDeleted +FROM HistoryDetailImage i +INNER JOIN HistoryDetail hd ON hd.id = i.historyDetailID +INNER JOIN History h ON h.id = hd.historyID +ORDER BY i.id ASC +""" + + +def _convert_ms_timestamp(ms): + """Safely converts Unix millisecond timestamp using modern timezone.utc.""" + if ms is None: + return "" + try: + return datetime.datetime.fromtimestamp(float(ms) / 1000, timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def get_nova_chatbot_images(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for HistoryDetail Images") + + # Filter out secondary transactional database engines + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + file_found = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) + + if not file_found: + logfunc("[nova_chatbot_images] Nova database file not found.") + return + + try: + db = open_sqlite_db_readonly(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + logfunc(f"[nova_chatbot_images] Error reading {file_found}: {e}") + return + + if not rows_raw: + logfunc(f"[nova_chatbot_images] No image records found in {file_found}.") + return + + headers = ( + "Image ID", + "Msg. ID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Media Submitted By", + "Msg. Text", + "Msg. Timestamp (UTC)", + "Prompt", + "State", + "Pipeline", + "Style ID", + "MIME Type", + "Firebase Storage Path", + "Forensic Notes", + ) + + rows = [] + for row in rows_raw: + model_int = row[14] + model_name = "Unknown" + if model_int is not None: + name_lookup = CHAT_BOT_MODEL_MAP.get(model_int) + model_name = ( + f"{name_lookup} ({model_int})" + if name_lookup + else f"Unknown Model ({model_int})" + ) + + state_int = row[4] + state_label = "" + if state_int is not None: + name_state = IMAGE_STATE_MAP.get(state_int) + state_label = f"{name_state} ({state_int})" if name_state else f"Unknown ({state_int})" + + raw_role = row[9] + if raw_role == 0: + submitted_by = "USER" + forensic_note = ( + "Media element actively selected and submitted by the user to the chatbot interface. " + "The file content resides remotely on Firebase Storage and is not locally cached." + ) + elif raw_role == 1: + submitted_by = "AI ASSISTANT" + forensic_note = ( + "Media element generated or provided back by the AI response. " + "Stored on Firebase Storage; temporary local copies might be found in cache/image_manager_disk_cache/." + ) + else: + submitted_by = f"UNKNOWN ({raw_role})" + forensic_note = "Unknown structural context for media origin." + + rows.append( + ( + row[0], + row[1], + row[8], + row[12] or "", + row[13] or "", + model_name, + "DELETED" if row[15] == 1 else "No", + submitted_by, + row[10] or "", + _convert_ms_timestamp(row[11]), + row[3] or "", + state_label, + row[7] or "", + row[6] if row[6] is not None else "", + row[5] or "", + row[2] or "", + forensic_note, + ) + ) + + report_name = "HistoryDetailImages" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + # Enforce safe framework-side text escaping to block script injection vectors entirely + report.write_artifact_data_table(headers, rows, file_found, html_escape=True) + report.end_artifact_report() + + tsv(report_folder, headers, rows, report_name, file_found) + timeline(report_folder, report_name, rows, headers) + logfunc( + f"[nova_chatbot_images] Processed {len(rows)} submitted image records." + ) diff --git a/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py b/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py new file mode 100644 index 00000000..a88ef782 --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaHistoryDetailLink.py @@ -0,0 +1,296 @@ +__artifacts_v2__ = { + "nova_chatbot_links": { + "name": "Shared Links", + "description": ( + "Extracts all link records from the AI Chatbot - Nova app " + "(HistoryDetailLink table). Each row represents one URL shared within " + "a conversation and is enriched with parent message context from " + "HistoryDetail and parent conversation context from History, including " + "the message text that accompanied the link, the role of the sender " + "(USER or ASSISTANT), the AI model used in the conversation, and the " + "soft-deleted status of the parent conversation. " + "Links are rendered as clickable anchors in the HTML report. " + "The table is currently empty in observed samples but the module is " + "future-proof and will extract records if the table is populated in " + "other device images or application versions." + ), + "author": "Guilherme Guilherme", + "version": "0.2", + "date": "2025-04-27", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": ( + "Database: com.scaleup.chatai/databases/chat-ai.db. " + "HistoryDetailLink.url stores the full URL shared in the message. " + "Links may be shared by the USER (e.g. a webpage submitted for AI " + "analysis) or by the ASSISTANT (e.g. a reference link in a response). " + "The role of the message is determined by HistoryDetail.type: " + "0 = USER, 1 = ASSISTANT. " + "softDeleted is inherited from the parent History record; DELETED means " + "the conversation was removed by the user but the link record remains " + "physically in the database and is forensically recoverable. " + "If this report contains no rows the HistoryDetailLink table was empty " + "in the examined database — this is normal for the current app version." + ), + "paths": ("*/com.scaleup.chatai/databases/chat-ai.db",), + "function": "get_nova_chatbot_links", + } +} + +import sqlite3 +import datetime +import html as html_module +from scripts.artifact_report import ArtifactHtmlReport +import scripts.ilapfuncs + +# --------------------------------------------------------------------------- +# Known mappings for the chatBotModel integer field. +# Source: FirestoreHistory.EngineTypes enum ordinals from decompiled APK source +# (com.scaleup.chatai.ui.conversation.FirestoreHistory). +# The integer stored in the database is the ENUM ORDINAL (0-based position), +# NOT the botId from chatbotAgentMap. These are two independent systems. +# Image-generating engines: 3 (legacy Bard ordinal reused), 4, 12, 13, 17. +# NOTE: ordinal 3 ('bard') was reused for image generation in newer app versions; +# presence of HistoryDetailImage records confirms image generation regardless of label. +# NOTE: ordinal 20 ('deepSeekR1') — if reasoningContent is NULL the actual API +# call may have used DeepSeek V3; the field reflects the UI selector, not the API. +# --------------------------------------------------------------------------- +CHAT_BOT_MODEL_MAP = { + 0: "ChatGPT 3.5", # gpt-3.5 + 1: "GPT-5", # gpt-5 + 2: "GPT-4o", # gpt-4o + 3: "Bard / Image Gen.", # bard (legacy; reused for image generation) + 4: "Image Generator", # image-generator + 5: "Vision", # vision + 6: "Google Vision", # googleVision + 7: "Document", # document + 8: "LLaMA 2", # llama2 + 9: "Nova", # nova + 10: "Gemini", # gemini + 11: "Superbot", # superbot + 12: "Logo Generator", # logo-generator + 13: "Tattoo Generator", # tattoo-generator + 14: "Web Search", # webSearch + 15: "Claude", # claude + 16: "DeepSeek", # deepSeek + 17: "Signature Generator", # signature-generator + 18: "Mistral", # mistral + 19: "Grok", # grok + 20: "DeepSeek R1", # deepSeekR1 + 21: "AI Filter", # aiFilter + 22: "Voice Chat", # voiceChat + 23: "Snap & Solve", # snapAndSolve + 24: "Study Planner", # studyPlanner + 25: "Quiz Maker", # quizMaker + 26: "Essay Helper", # essayHelper + 27: "Gemini 3 Pro", # gemini-3-pro + 28: "GPT-5.1", # gpt-5.1 + 29: "GPT-4o Mini", # 4o-mini +} + +# --------------------------------------------------------------------------- +# SQL +# One row per HistoryDetailLink, enriched with parent message and +# conversation context. +# --------------------------------------------------------------------------- +QUERY = """ +SELECT + -- Link record + l.id AS link_id, + l.historyDetailID AS msg_id, + l.url AS link_url, + + -- Parent message context (HistoryDetail) + hd.historyID AS conv_id, + hd.type AS msg_type, + hd.text AS msg_text, + hd.createdAt AS msg_created_at, + + -- Parent conversation context (History) + h.UUID AS conv_uuid, + h.title AS conv_title, + h.chatBotModel AS chat_bot_model, + h.softDeleted AS soft_deleted + +FROM HistoryDetailLink l +INNER JOIN HistoryDetail hd ON hd.id = l.historyDetailID +INNER JOIN History h ON h.id = hd.historyID +ORDER BY l.id ASC +""" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _e(text): + return html_module.escape(str(text)) if text else "" + + +def _convert_ms_timestamp(ms): + if ms is None: + return "" + try: + return datetime.datetime.utcfromtimestamp(ms / 1000).strftime( + "%Y-%m-%d %H:%M:%S UTC" + ) + except (OSError, OverflowError, ValueError): + return str(ms) + + +def _resolve_model(model_int): + if model_int is None: + return "Unknown" + name = CHAT_BOT_MODEL_MAP.get(model_int) + return f"{name} ({model_int})" if name else f"Unknown Model ({model_int})" + + +def _format_soft_deleted(value): + return "DELETED" if value == 1 else "No" + + +def _format_role(type_int): + return {0: "USER", 1: "ASSISTANT"}.get(type_int, f"UNKNOWN ({type_int})") + + +def _build_link_cell(url): + """Render a URL as a clearly labelled clickable anchor.""" + if not url: + return "" + return ( + f'
' + f' 🔗
' + f' {_e(url)}' + f"
" + ) + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def get_nova_chatbot_links(files_found, report_folder, seeker, wrap_text): + """ + Entry point for the nova_chatbot_links artifact. + + Extracts every HistoryDetailLink record enriched with parent message and + conversation context. Outputs HTML report, TSV, and timeline. + Handles an empty HistoryDetailLink table gracefully. + """ + for file_found in files_found: + file_found = str(file_found) + if not file_found.endswith("chat-ai.db"): + continue + + try: + db = sqlite3.connect(file_found) + cursor = db.cursor() + cursor.execute(QUERY) + rows_raw = cursor.fetchall() + db.close() + except Exception as e: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_links] Error reading {file_found}: {e}" + ) + continue + + # Gracefully handle an empty table — log and produce an empty report + # so the examiner knows the module ran and the table had no records. + if not rows_raw: + scripts.ilapfuncs.logfunc( + f"[nova_chatbot_links] HistoryDetailLink table is empty in {file_found}." + ) + report_name = "HistoryDetailLinks" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + [ + "Link ID", + "Msg. ID", + "Conv. ID", + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + "Msg. Role", + "Msg. Text", + "Msg. Timestamp (UTC)", + "Link URL", + ], + [], + file_found, + html_escape=False, + ) + report.end_artifact_report() + continue + + headers = [ + # Link identity + "Link ID", + "Msg. ID", + "Conv. ID", + # Conversation context + "Conv. UUID", + "Conv. Title", + "AI Model", + "Conv. Deleted", + # Message context + "Msg. Role", + "Msg. Text", + "Msg. Timestamp (UTC)", + # Link (HTML rendered) + "Link URL", + ] + + html_rows = [] + tsv_rows = [] + + for row in rows_raw: + ( + link_id, + msg_id, + link_url, + conv_id, + msg_type, + msg_text, + msg_created_at, + conv_uuid, + conv_title, + chat_bot_model, + soft_deleted, + ) = row + + common = ( + link_id, + msg_id, + conv_id, + conv_uuid or "", + conv_title or "", + _resolve_model(chat_bot_model), + _format_soft_deleted(soft_deleted), + _format_role(msg_type), + msg_text or "", + _convert_ms_timestamp(msg_created_at), + ) + + html_rows.append(common + (_build_link_cell(link_url),)) + tsv_rows.append(common + (link_url or "",)) + + # HTML report + report_name = "HistoryDetailLinks" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + report.write_artifact_data_table( + headers, html_rows, file_found, html_escape=False + ) + report.end_artifact_report() + + # TSV + scripts.ilapfuncs.tsv(report_folder, headers, tsv_rows, report_name, file_found) + + # Timeline (Msg. Timestamp, index 9) + scripts.ilapfuncs.timeline(report_folder, report_name, tsv_rows, headers) diff --git a/scripts/artifacts/AIChatbotNovaMediastore.py b/scripts/artifacts/AIChatbotNovaMediastore.py new file mode 100644 index 00000000..2006f28f --- /dev/null +++ b/scripts/artifacts/AIChatbotNovaMediastore.py @@ -0,0 +1,332 @@ +__artifacts_v2__ = { + "nova_user_submissions": { + "name": "User Media Submissions", + "description": ( + "Identifies media files submitted by the user to Nova AI Chatbot, including " + "uploaded documents, chat-attached images, and photos captured using the in-app camera. " + "The artifact lists recovered filenames, conversation context, timestamps, MIME types, " + "and resolved physical paths from the extracted filesystem." + ), + "author": "Guilherme Guilherme", + "version": "3.4", + "date": "2026-05-21", + "requirements": "none", + "category": "AI Chatbot - Nova", + "notes": "Sources: chat-ai.db and Android MediaStore databases.", + "paths": ( + "*/com.scaleup.chatai/databases/chat-ai.db", + "*/com.android.providers.media/databases/external*.db", + "*/com.google.android.providers.media.module/databases/external*.db", + ), + "function": "get_nova_user_submissions", + "output_types": "standard", + "artifact_icon": "folder", + } +} + +import os +import datetime +from datetime import timezone +from scripts.artifact_report import ArtifactHtmlReport +from scripts.ilapfuncs import logfunc, tsv, open_sqlite_db_readonly, media_to_html + + +def _parse_path(raw_path): + """Normalizes paths and slices them to always start at /data.""" + if not raw_path: + return "" + normalized = str(raw_path).replace("\\", "/") + if "/data/" in normalized: + return "/data/" + normalized.split("/data/", 1)[1] + elif "data/data/" in normalized: + return "/data/data/" + normalized.split("data/data/", 1)[1] + return normalized + + +def get_nova_user_submissions(files_found, report_folder, seeker, wrap_text): + logfunc("Processing data for Nova User Media Submissions") + + files_found = [ + x for x in files_found if not x.endswith(("-journal", "-wal", "-shm")) + ] + nova_db = next((str(x) for x in files_found if "chat-ai.db" in str(x)), None) + media_db = next( + ( + str(x) + for x in files_found + if "external" in str(x) and str(x).endswith(".db") + ), + None, + ) + + if not nova_db: + logfunc("[nova_user_submissions] Nova database not found.") + return + + extraction_root = getattr(seeker, "search_dir", "") or "" + media_lookup = {} + + # 1. Map the MediaStore database records to see what is on local storage + if media_db: + try: + db = open_sqlite_db_readonly(media_db) + cur = db.cursor() + cur.execute(""" + SELECT _display_name, _data, _size, date_added, mime_type + FROM files + WHERE _data IS NOT NULL + """) + for display_name, data_path, size, date_added, mime_type in cur.fetchall(): + local_path = "" + if data_path: + clean_rel = str(data_path).replace("\\", "/").lstrip("/") + if clean_rel.startswith("storage/emulated/0/"): + clean_rel = clean_rel.replace( + "storage/emulated/0/", "data/media/0/", 1 + ) + + candidate_path = os.path.join(extraction_root, clean_rel) + if os.path.exists(candidate_path): + local_path = candidate_path + + key = (display_name or os.path.basename(str(data_path))).lower() + media_lookup[key] = {"data_path": data_path, "local_path": local_path} + db.close() + except Exception as e: + logfunc(f"[nova_user_submissions] Error building MediaStore lookup: {e}") + + all_items = [] + + # 2. Process chat database documents and cross-reference with MediaStore + try: + db = open_sqlite_db_readonly(nova_db) + cur = db.cursor() + cur.execute(""" + SELECT + hdd.name, + hdd.mimeType, + hdd.size, + hd.text, + hd.createdAt, + h.title, + h.UUID + FROM HistoryDetailDocument hdd + INNER JOIN HistoryDetail hd ON hd.id = hdd.historyDetailID + INNER JOIN History h ON h.id = hd.historyID + WHERE hd.type = 0 + ORDER BY hd.createdAt DESC + """) + for ( + file_name, + mime_type, + size_db, + message, + created_at, + conversation, + conv_uuid, + ) in cur.fetchall(): + mtime_str = "" + if created_at: + try: + mtime_str = datetime.datetime.fromtimestamp( + float(created_at) / 1000, timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S UTC") + except Exception: + mtime_str = str(created_at) + + match_key = (file_name or "").lower() + if match_key in media_lookup: + match = media_lookup[match_key] + if match["local_path"]: + media_to_html(file_name, match["local_path"], report_folder) + display_path = _parse_path(match["data_path"]) + else: + display_path = "Cloud-only (Firebase Storage)" + + all_items.append( + ( + file_name or "Unknown", + "Submitted Document", + message or "", + conversation or "Untitled", + conv_uuid or "", + mtime_str, + size_db if size_db is not None else "", + mime_type or "", + display_path, + ) + ) + db.close() + except Exception as e: + logfunc(f"[nova_user_submissions] Error querying documents: {e}") + + # 3. New: Process user chat-submitted images (HistoryDetailImage) + try: + db = open_sqlite_db_readonly(nova_db) + cur = db.cursor() + cur.execute(""" + SELECT + hdi.url, + hdi.prompt, + hd.text, + hd.createdAt, + h.title, + h.UUID + FROM HistoryDetailImage hdi + INNER JOIN HistoryDetail hd ON hd.id = hdi.historyDetailID + INNER JOIN History h ON h.id = hd.historyID + WHERE hd.type = 0 + ORDER BY hd.createdAt DESC + """) + for ( + img_url, + prompt, + message, + created_at, + conversation, + conv_uuid, + ) in cur.fetchall(): + mtime_str = "" + if created_at: + try: + mtime_str = datetime.datetime.fromtimestamp( + float(created_at) / 1000, timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S UTC") + except Exception: + mtime_str = str(created_at) + + # Extract the raw filename out of the remote url endpoint path + file_name = ( + os.path.basename(img_url.split("?")[0]) + if img_url + else "Unknown_Image.jpg" + ) + + # Form an inline context blending user's text message input with any associated image generation prompts + context_pieces = [] + if message: + context_pieces.append(f"Msg: {message}") + if prompt: + context_pieces.append(f"Prompt: {prompt}") + combined_context = " | ".join(context_pieces) + + match_key = file_name.lower() + if match_key in media_lookup: + match = media_lookup[match_key] + if match["local_path"]: + media_to_html(file_name, match["local_path"], report_folder) + display_path = _parse_path(match["data_path"]) + else: + display_path = "Cloud-only (Firebase Storage)" + + all_items.append( + ( + file_name, + "Submitted Image", + combined_context, + conversation or "Untitled", + conv_uuid or "", + mtime_str, + "", # Size metadata is typically absent or cloud-side for image mappings + "image/jpeg", + display_path, + ) + ) + db.close() + except Exception as e: + logfunc(f"[nova_user_submissions] Error querying submitted images: {e}") + + # 4. Process standalone camera storage entries matching the application context + if media_db: + try: + db = open_sqlite_db_readonly(media_db) + cur = db.cursor() + cur.execute(""" + SELECT _display_name, _data, _size, date_added, mime_type + FROM files + WHERE bucket_display_name = 'Nova' OR _data LIKE '%/Nova/%' + ORDER BY date_added DESC + """) + for display_name, data_path, size, date_added, mime_type in cur.fetchall(): + mtime_str = "" + if date_added: + try: + mtime_str = datetime.datetime.fromtimestamp( + int(date_added), timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S UTC") + except Exception: + mtime_str = str(date_added) + + fname = display_name or ( + os.path.basename(str(data_path)) if data_path else "Unknown" + ) + + clean_rel = str(data_path).replace("\\", "/").lstrip("/") + if clean_rel.startswith("storage/emulated/0/"): + clean_rel = clean_rel.replace( + "storage/emulated/0/", "data/media/0/", 1 + ) + + local_path = os.path.join(extraction_root, clean_rel) + if os.path.exists(local_path): + media_to_html(fname, local_path, report_folder) + + all_items.append( + ( + fname, + "Camera Photo", + "", + "Camera photo (not associated with a message)", + "", + mtime_str, + size if size is not None else "", + mime_type or "image/jpeg", + _parse_path(data_path), + ) + ) + db.close() + except Exception as e: + logfunc(f"[nova_user_submissions] Error querying camera photos: {e}") + + if not all_items: + logfunc("[nova_user_submissions] No media found.") + return + + # Deduplicate entries safely using filename and localized storage path attributes + deduped = [] + seen = set() + for row in all_items: + key = (row[0].lower(), row[8]) + if key in seen: + continue + seen.add(key) + deduped.append(row) + + report_name = "User Media Submissions" + report = ArtifactHtmlReport(report_name) + report.start_artifact_report(report_folder, report_name) + report.add_script() + + headers = ( + "File Name", + "Type", + "User Message / Context", + "Conversation Title", + "Conv. UUID", + "Date (UTC)", + "Size (Bytes)", + "MIME Type", + "Path", + ) + + report.write_artifact_data_table( + headers, + deduped, + nova_db, + table_id="NovaUserSubmissions", + html_escape=True, + ) + report.end_artifact_report() + + tsv(report_folder, headers, deduped, report_name, nova_db) + logfunc(f"[nova_user_submissions] Found {len(deduped)} total items.") diff --git a/scripts/report_icons.py b/scripts/report_icons.py index 857fed29..762c4414 100644 --- a/scripts/report_icons.py +++ b/scripts/report_icons.py @@ -38,6 +38,7 @@ 'default': 'user' }, 'AGGREGATE DICTIONARY': 'book', + 'AI CHATBOT - NOVA': 'message-circle', 'AIRDROP DISCOVERABLE': 'search', 'AIRDROP EMAILS': 'send', 'AIRDROP NUMBERS': 'smartphone',