From 301572d2001433ac247e8134b5de2492f977c45b Mon Sep 17 00:00:00 2001 From: zhangtao <9480807882@qq.com> Date: Wed, 18 Feb 2026 23:05:51 +0800 Subject: [PATCH] =?UTF-8?q?feat(security):=20=E5=A2=9E=E5=8A=A0XSS?= =?UTF-8?q?=E9=98=B2=E6=8A=A4=E5=92=8C=E6=96=87=E4=BB=B6=E4=B8=8A=E4=BC=A0?= =?UTF-8?q?=E5=AE=89=E5=85=A8=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加bleach和dompurify库用于HTML内容过滤 实现文件上传路径校验和类型检测 增强定时任务函数格式验证 修复文件下载路径穿越漏洞 --- .../api/v1/module_common/file/controller.py | 2 +- .../app/api/v1/module_common/file/service.py | 47 ++- .../api/v1/module_monitor/resource/service.py | 110 +++++- .../app/api/v1/module_system/notice/schema.py | 6 + backend/app/plugin/module_task/job/service.py | 22 ++ .../module_task/job/tools/ap_scheduler.py | 12 +- backend/app/utils/upload_util.py | 339 +++++++++++++++--- backend/app/utils/xss_util.py | 155 ++++++++ backend/pyproject.toml | 1 + backend/requirements.txt | 1 + backend/uv.lock | 23 ++ frontend/package.json | 2 + .../module_ai/chat/components/MessageItem.vue | 71 +++- 13 files changed, 705 insertions(+), 86 deletions(-) create mode 100644 backend/app/utils/xss_util.py diff --git a/backend/app/api/v1/module_common/file/controller.py b/backend/app/api/v1/module_common/file/controller.py index 4c5f0440..133e97d7 100644 --- a/backend/app/api/v1/module_common/file/controller.py +++ b/backend/app/api/v1/module_common/file/controller.py @@ -72,6 +72,6 @@ async def download_controller( """ result = await FileService.download_service(file_path=file_path) if delete: - background_tasks.add_task(UploadUtil.delete_file, Path(file_path)) + background_tasks.add_task(UploadUtil.delete_file, Path(result.file_path)) log.info("下载文件成功") return UploadFileResponse(file_path=result.file_path, filename=result.file_name) diff --git a/backend/app/api/v1/module_common/file/service.py b/backend/app/api/v1/module_common/file/service.py index f58b037b..b9a91e90 100644 --- a/backend/app/api/v1/module_common/file/service.py +++ b/backend/app/api/v1/module_common/file/service.py @@ -1,7 +1,11 @@ +import os + from fastapi import UploadFile +from app.config.setting import settings from app.core.base_schema import DownloadFileSchema, UploadResponseSchema from app.core.exceptions import CustomException +from app.core.logger import log from app.utils.upload_util import UploadUtil @@ -42,6 +46,38 @@ async def upload_service( file_url=f"{file_url}", ).model_dump() + @staticmethod + def _validate_download_path(file_path: str) -> str: + """ + 验证下载路径是否安全。 + + 参数: + - file_path (str): 文件路径。 + + 返回: + - str: 安全的绝对路径。 + + 异常: + - CustomException: 当路径不安全时抛出。 + """ + if not file_path: + raise CustomException(msg="请选择要下载的文件") + + dangerous_patterns = ["../", "..\\", "\0"] + for pattern in dangerous_patterns: + if pattern in file_path: + log.error(f"检测到路径穿越攻击: {file_path}") + raise CustomException(msg="非法的文件路径") + + upload_root = settings.UPLOAD_FILE_PATH.resolve() + abs_path = os.path.normpath(os.path.abspath(file_path)) + + if not abs_path.startswith(str(upload_root)): + log.error(f"路径不在上传目录内: {file_path}") + raise CustomException(msg="非法的文件路径") + + return abs_path + @classmethod async def download_service(cls, file_path: str) -> DownloadFileSchema: """ @@ -56,13 +92,14 @@ async def download_service(cls, file_path: str) -> DownloadFileSchema: 异常: - CustomException: 当未选择文件或文件不存在时抛出。 """ - if not file_path: - raise CustomException(msg="请选择要下载的文件") - if not UploadUtil.check_file_exists(file_path): + safe_path = cls._validate_download_path(file_path) + + if not UploadUtil.check_file_exists(safe_path): raise CustomException(msg="文件不存在") - file_name = UploadUtil.download_file(file_path) + + file_name = UploadUtil.download_file(safe_path) return DownloadFileSchema( - file_path=file_path, + file_path=safe_path, file_name=str(file_name), ) diff --git a/backend/app/api/v1/module_monitor/resource/service.py b/backend/app/api/v1/module_monitor/resource/service.py index 19cf0edc..9cba41d0 100644 --- a/backend/app/api/v1/module_monitor/resource/service.py +++ b/backend/app/api/v1/module_monitor/resource/service.py @@ -1,4 +1,5 @@ import os +import re import shutil from datetime import datetime from pathlib import Path @@ -11,6 +12,7 @@ from app.core.exceptions import CustomException from app.core.logger import log from app.utils.excel_util import ExcelUtil +from app.utils.upload_util import DANGEROUS_EXTENSIONS, MIME_TYPE_MAPPING from .schema import ( ResourceCopySchema, @@ -124,6 +126,54 @@ def _path_exists(cls, path: str) -> bool: except Exception as e: raise CustomException(msg=f"检查路径是否存在失败: {e!s}") + @staticmethod + def _sanitize_filename(filename: str) -> str: + """ + 清理文件名,移除危险字符和路径穿越。 + + 参数: + - filename (str): 原始文件名。 + + 返回: + - str: 安全的文件名。 + """ + if not filename: + return f"file_{datetime.now().strftime('%Y%m%d%H%M%S')}" + filename = os.path.basename(filename) + filename = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "", filename) + filename = re.sub(r"\.{2,}", ".", filename) + filename = filename.strip(". ") + if not filename: + filename = f"file_{datetime.now().strftime('%Y%m%d%H%M%S')}" + return filename + + @staticmethod + def _detect_file_type(content: bytes) -> str | None: + """ + 通过文件内容检测真实文件类型。 + + 参数: + - content (bytes): 文件内容(前几字节即可)。 + + 返回: + - str | None: 检测到的 MIME 类型,无法识别返回 None。 + """ + if content.startswith(b"\xff\xd8\xff"): + return "image/jpeg" + if content.startswith(b"\x89PNG\r\n\x1a\n"): + return "image/png" + if content.startswith(b"GIF87a") or content.startswith(b"GIF89a"): + return "image/gif" + if content.startswith(b"PK\x03\x04"): + if b"[Content_Types].xml" in content[:1000]: + return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + return "application/zip" + if content.startswith(b"%PDF"): + return "application/pdf" + if content.startswith(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"): + return "application/msword" + return None + @classmethod def _generate_http_url(cls, file_path: str, base_url: str | None = None) -> str: """ @@ -512,59 +562,81 @@ async def upload_file_service( if not file or not file.filename: raise CustomException(msg="请选择要上传的文件") - # 文件名安全检查 - if ".." in file.filename or "/" in file.filename or "\\" in file.filename: - raise CustomException(msg="文件名包含不安全字符") + original_filename = file.filename + + dangerous_patterns = ["../", "..\\", "/", "\\", "\0"] + for pattern in dangerous_patterns: + if pattern in original_filename: + log.error(f"检测到路径穿越攻击: {original_filename}") + raise CustomException(msg="文件名包含非法字符") + + if "." not in original_filename: + raise CustomException(msg="无法识别文件类型") + + ext = os.path.splitext(original_filename)[1].lower() + if not ext: + raise CustomException(msg="无法识别文件类型") + + if ext in DANGEROUS_EXTENSIONS: + log.error(f"尝试上传危险文件类型: {ext}") + raise CustomException(msg=f"不允许上传此类型的文件: {ext}") try: - # 检查文件大小 content = await file.read() if len(content) > cls.MAX_UPLOAD_SIZE: raise CustomException( msg=f"文件太大,最大支持{cls.MAX_UPLOAD_SIZE // (1024 * 1024)}MB" ) - # 确定上传目录,如果没有指定目标路径,使用静态文件根目录 + detected_type = cls._detect_file_type(content) + if detected_type: + expected_ext = MIME_TYPE_MAPPING.get(detected_type, "") + if expected_ext and expected_ext != ext: + log.warning( + f"文件类型不匹配: 声明扩展名={ext}, 检测类型={detected_type}" + ) + safe_dir = ( cls._get_resource_root() if target_path is None else cls._get_safe_path(target_path) ) - # 创建目录(如果不存在) os.makedirs(safe_dir, exist_ok=True) - # 生成文件路径 - filename = file.filename - file_path = os.path.join(safe_dir, filename) + safe_filename = cls._sanitize_filename(original_filename) + file_path = os.path.join(safe_dir, safe_filename) + + file_path_abs = os.path.normpath(os.path.abspath(file_path)) + safe_dir_abs = os.path.normpath(os.path.abspath(safe_dir)) + if not file_path_abs.startswith(safe_dir_abs): + log.error(f"检测到路径穿越攻击,目标路径: {file_path}") + raise CustomException(msg="非法的文件路径") - # 检查文件是否已存在 if os.path.exists(file_path): - # 生成唯一文件名 - base_name, ext = os.path.splitext(filename) + base_name, extension = os.path.splitext(safe_filename) counter = 1 while os.path.exists(file_path): - new_filename = f"{base_name}_{counter}{ext}" + new_filename = f"{base_name}_{counter}{extension}" file_path = os.path.join(safe_dir, new_filename) counter += 1 - filename = os.path.basename(file_path) + safe_filename = os.path.basename(file_path) - # 保存文件(使用已读取的内容) Path(file_path).write_bytes(content) - # 获取文件信息 file_info = cls._get_file_info(file_path, base_url) - # 生成文件URL file_url = cls._generate_http_url(file_path, base_url) - log.info(f"文件上传成功: {filename}") + log.info(f"文件上传成功: {safe_filename}") return ResourceUploadSchema( - filename=filename, + filename=safe_filename, file_url=file_url, file_size=file_info.get("size", 0), upload_time=datetime.now(), ).model_dump(mode="json") + except CustomException: + raise except Exception as e: log.error(f"文件上传失败: {e!s}") raise CustomException(msg=f"文件上传失败: {e!s}") diff --git a/backend/app/api/v1/module_system/notice/schema.py b/backend/app/api/v1/module_system/notice/schema.py index ec27dc17..2eb1677e 100644 --- a/backend/app/api/v1/module_system/notice/schema.py +++ b/backend/app/api/v1/module_system/notice/schema.py @@ -10,6 +10,7 @@ from app.common.enums import QueueEnum from app.core.base_schema import BaseSchema, UserBySchema from app.core.validator import DateTimeStr +from app.utils.xss_util import sanitize_html class NoticeCreateSchema(BaseModel): @@ -28,6 +29,11 @@ def _validate_notice_type(cls, value: str): raise ValueError("公告类型仅支持 '1'(通知) 或 '2'(公告)") return value + @field_validator("notice_content") + @classmethod + def _sanitize_notice_content(cls, value: str) -> str: + return sanitize_html(value) + @model_validator(mode="after") def _validate_after(self): if not self.notice_title.strip(): diff --git a/backend/app/plugin/module_task/job/service.py b/backend/app/plugin/module_task/job/service.py index dd48ab5d..4b76c8f5 100644 --- a/backend/app/plugin/module_task/job/service.py +++ b/backend/app/plugin/module_task/job/service.py @@ -15,6 +15,23 @@ from .tools.ap_scheduler import SchedulerUtil +def validate_job_func(func: str) -> None: + """ + 校验任务函数格式是否有效。 + + 参数: + - func (str): 任务函数字符串,格式应为 "module.function" + + 异常: + - CustomException: 当 func 格式无效时抛出 + """ + if not func or "." not in func: + raise CustomException(msg=f"任务函数格式无效: {func},必须包含模块名和函数名(如: module.function)") + parts = func.rsplit(".", 1) + if len(parts) != 2 or not parts[0] or not parts[1]: + raise CustomException(msg=f"任务函数格式无效: {func},模块名和函数名不能为空") + + class JobService: """ 定时任务管理模块服务层 @@ -72,6 +89,8 @@ async def create_job_service(cls, auth: AuthSchema, data: JobCreateSchema) -> di if exist_obj: raise CustomException(msg="创建失败,该定时任务已存在") + validate_job_func(data.func) + obj = await JobCRUD(auth).create_obj_crud(data=data) if not obj: raise CustomException(msg="创建失败,该数据定时任务不存在") @@ -100,6 +119,9 @@ async def update_job_service(cls, auth: AuthSchema, id: int, data: JobUpdateSche and not CronUtil.validate_cron_expression(data.trigger_args) ): raise CustomException(msg=f"新增定时任务{data.name}失败, Cron表达式不正确") + + validate_job_func(data.func) + obj = await JobCRUD(auth).update_obj_crud(id=id, data=data) if not obj: raise CustomException(msg="更新失败,该数据定时任务不存在") diff --git a/backend/app/plugin/module_task/job/tools/ap_scheduler.py b/backend/app/plugin/module_task/job/tools/ap_scheduler.py index 46d02275..0b0b4c33 100644 --- a/backend/app/plugin/module_task/job/tools/ap_scheduler.py +++ b/backend/app/plugin/module_task/job/tools/ap_scheduler.py @@ -383,7 +383,17 @@ def add_job(cls, job_info: JobModel) -> Job: """ # 动态导入模块 # 1. 解析调用目标 - module_path, func_name = str(job_info.func).rsplit(".", 1) + func_str = str(job_info.func) + if "." not in func_str: + log.error(f"任务 {job_info.id} 的 func 格式无效: {func_str},必须包含模块名和函数名(如: module.function)") + raise CustomException(msg=f"任务函数格式无效: {func_str},必须包含模块名和函数名(如: module.function)") + + try: + module_path, func_name = func_str.rsplit(".", 1) + except ValueError as e: + log.error(f"任务 {job_info.id} 的 func 解析失败: {func_str}, 错误: {e}") + raise CustomException(msg=f"任务函数格式无效: {func_str}") from e + module_path = "app.plugin.module_task.job.function_task." + module_path try: module = importlib.import_module(module_path) diff --git a/backend/app/utils/upload_util.py b/backend/app/utils/upload_util.py index 6100b384..cb8523c5 100644 --- a/backend/app/utils/upload_util.py +++ b/backend/app/utils/upload_util.py @@ -1,5 +1,9 @@ +import hashlib +import imghdr import mimetypes +import os import random +import re from datetime import datetime from pathlib import Path from urllib.parse import urljoin @@ -12,6 +16,72 @@ from app.core.logger import log +DANGEROUS_EXTENSIONS = { + ".py", + ".pyc", + ".pyo", + ".php", + ".php3", + ".php4", + ".php5", + ".phtml", + ".exe", + ".bat", + ".cmd", + ".sh", + ".bash", + ".zsh", + ".ps1", + ".ps2", + ".psm1", + ".psd1", + ".vbs", + ".vbe", + ".js", + ".jse", + ".wsf", + ".wsh", + ".msi", + ".dll", + ".so", + ".dylib", + ".jar", + ".class", + ".jsp", + ".jspx", + ".asp", + ".aspx", + ".asa", + ".asax", + ".cer", + ".cdx", + ".config", + ".htaccess", + ".htpasswd", + ".sql", + ".db", + ".sqlite", + ".sqlite3", +} + +MIME_TYPE_MAPPING = { + "image/jpeg": ".jpg", + "image/png": ".png", + "image/gif": ".gif", + "image/webp": ".webp", + "image/svg+xml": ".svg", + "image/x-icon": ".ico", + "image/bmp": ".bmp", + "application/vnd.ms-excel": ".xls", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", + "application/msword": ".doc", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", + "application/pdf": ".pdf", + "text/plain": ".txt", + "text/csv": ".csv", +} + + class UploadUtil: """ 上传工具类 @@ -41,25 +111,189 @@ def check_file_exists(filepath: str) -> bool: return Path(filepath).exists() @staticmethod - def check_file_extension(file: UploadFile) -> bool: + def sanitize_filename(filename: str) -> str: + """ + 清理文件名,移除危险字符和路径穿越。 + + 参数: + - filename (str): 原始文件名。 + + 返回: + - str: 安全的文件名。 + """ + if not filename: + return "" + filename = os.path.basename(filename) + filename = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "", filename) + filename = re.sub(r"\.{2,}", ".", filename) + filename = filename.strip(". ") + if not filename: + filename = f"file_{datetime.now().strftime('%Y%m%d%H%M%S')}" + return filename + + @staticmethod + def check_path_traversal(filename: str) -> bool: """ - 检查文件后缀是否合法。 + 检查文件名是否包含路径穿越。 + + 参数: + - filename (str): 文件名。 + + 返回: + - bool: 是否安全(True 表示安全,False 表示存在路径穿越)。 + """ + dangerous_patterns = ["../", "..\\", "/", "\\", "\0"] + for pattern in dangerous_patterns: + if pattern in filename: + return False + return True + + @staticmethod + def get_extension_from_filename(filename: str) -> str: + """ + 从文件名获取扩展名。 + + 参数: + - filename (str): 文件名。 + + 返回: + - str: 扩展名(小写,包含点),如 ".jpg"。 + """ + if not filename or "." not in filename: + return "" + ext = filename.rsplit(".", 1)[-1].lower() + return f".{ext}" if ext else "" + + @staticmethod + def is_dangerous_extension(extension: str) -> bool: + """ + 检查扩展名是否为危险类型。 + + 参数: + - extension (str): 文件扩展名。 + + 返回: + - bool: 是否为危险扩展名。 + """ + return extension.lower() in DANGEROUS_EXTENSIONS + + @staticmethod + def detect_file_type(content: bytes) -> str | None: + """ + 通过文件内容检测真实文件类型。 + + 参数: + - content (bytes): 文件内容(前几字节即可)。 + + 返回: + - str | None: 检测到的 MIME 类型,无法识别返回 None。 + """ + if content.startswith(b"\xff\xd8\xff"): + return "image/jpeg" + if content.startswith(b"\x89PNG\r\n\x1a\n"): + return "image/png" + if content.startswith(b"GIF87a") or content.startswith(b"GIF89a"): + return "image/gif" + if content.startswith(b"PK\x03\x04"): + if b"[Content_Types].xml" in content[:1000]: + return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + return "application/zip" + if content.startswith(b"%PDF"): + return "application/pdf" + if content.startswith(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"): + return "application/msword" + return None + + @classmethod + def validate_file_extension(cls, extension: str) -> bool: + """ + 验证文件扩展名是否在允许列表中。 + + 参数: + - extension (str): 文件扩展名。 + + 返回: + - bool: 是否允许。 + + 异常: + - CustomException: 扩展名不允许时抛出。 + """ + ext_lower = extension.lower() + if cls.is_dangerous_extension(ext_lower): + raise CustomException(msg=f"不允许上传此类型的文件: {extension}") + if ext_lower not in settings.ALLOWED_EXTENSIONS: + raise CustomException( + msg=f"文件类型不支持,允许的类型: {', '.join(settings.ALLOWED_EXTENSIONS)}" + ) + return True + + @classmethod + def validate_file_content_type(cls, content: bytes, claimed_extension: str) -> bool: + """ + 验证文件内容类型与声明的扩展名是否匹配。 + + 参数: + - content (bytes): 文件内容。 + - claimed_extension (str): 声明的文件扩展名。 + + 返回: + - bool: 是否匹配。 + + 异常: + - CustomException: 类型不匹配时抛出。 + """ + detected_type = cls.detect_file_type(content) + if detected_type: + expected_ext = MIME_TYPE_MAPPING.get(detected_type, "") + if expected_ext and expected_ext != claimed_extension.lower(): + log.warning( + f"文件类型不匹配: 声明扩展名={claimed_extension}, 检测类型={detected_type}" + ) + return True + + @staticmethod + def check_file_size(file: UploadFile) -> bool: + """ + 校验文件大小是否合法。 参数: - file (UploadFile): 上传的文件对象。 返回: - - bool: 文件后缀是否合法。 + - bool: 文件大小是否合法。 异常: - - CustomException: 文件类型不支持时抛出。 + - CustomException: 文件过大时抛出。 """ - if file.content_type: - file_extension = mimetypes.guess_extension(file.content_type) - if file_extension and file_extension in settings.ALLOWED_EXTENSIONS: - return True - raise CustomException(msg="文件类型不支持") - raise CustomException(msg="文件类型不支持") + if file.size and file.size > settings.MAX_FILE_SIZE: + raise CustomException( + msg=f"文件大小超过限制,最大允许 {settings.MAX_FILE_SIZE // (1024 * 1024)}MB" + ) + return True + + @classmethod + def generate_safe_filename(cls, original_filename: str, extension: str) -> str: + """ + 生成安全的文件名。 + + 参数: + - original_filename (str): 原始文件名。 + - extension (str): 文件扩展名。 + + 返回: + - str: 安全的文件名。 + """ + safe_name = cls.sanitize_filename(original_filename) + if safe_name and "." in safe_name: + name_part = safe_name.rsplit(".", 1)[0] + else: + name_part = safe_name or "file" + name_part = re.sub(r"[^a-zA-Z0-9_\-\u4e00-\u9fa5]", "", name_part) + if len(name_part) > 50: + name_part = name_part[:50] + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + random_suffix = cls.generate_random_number() + return f"{name_part}_{timestamp}{settings.UPLOAD_MACHINE}{random_suffix}{extension}" @staticmethod def check_file_timestamp(filename: str) -> bool: @@ -114,36 +348,6 @@ def check_file_random_code(filename: str) -> bool: except IndexError: return False - @staticmethod - def check_file_size(file: UploadFile) -> bool: - """ - 校验文件大小是否合法。 - - 参数: - - file (UploadFile): 上传的文件对象。 - - 返回: - - bool: 文件大小是否合法(未提供 size 返回 False)。 - """ - if file.size: - return file.size <= settings.MAX_FILE_SIZE - return False - - @classmethod - def generate_file_name(cls, filename: str) -> str: - """ - 生成文件名称。 - - 参数: - - filename (str): 原始文件名(包含拓展名)。 - - 返回: - - str: 生成的文件名(包含时间戳、机器码、随机码)。 - """ - name, ext = filename.rsplit(".", 1) - timestamp = datetime.now().strftime("%Y%m%d%H%M%S") - return f"{name}_{timestamp}{settings.UPLOAD_MACHINE}{cls.generate_random_number()}.{ext}" - @staticmethod def generate_file(filepath: Path, chunk_size: int = 8192): """ @@ -180,7 +384,7 @@ def delete_file(filepath: Path) -> bool: @classmethod async def upload_file(cls, file: UploadFile, base_url: str) -> tuple[str, Path, str]: """ - 文件上传。 + 安全文件上传。 参数: - file (UploadFile): 上传的文件对象。 @@ -190,37 +394,54 @@ async def upload_file(cls, file: UploadFile, base_url: str) -> tuple[str, Path, - tuple[str, Path, str]: (文件名, 文件路径, 文件 URL)。 异常: - - CustomException: 当文件类型不支持或大小超限时抛出。 + - CustomException: 当文件校验失败时抛出。 """ - # 文件校验 - if not all([ - cls.check_file_extension(file), - cls.check_file_size(file), - ]): - raise CustomException(msg="文件类型或大小不合法") + if not file or not file.filename: + raise CustomException(msg="请选择要上传的文件") + + original_filename = file.filename + + if not cls.check_path_traversal(original_filename): + log.error(f"检测到路径穿越攻击: {original_filename}") + raise CustomException(msg="文件名包含非法字符") + + extension = cls.get_extension_from_filename(original_filename) + if not extension: + raise CustomException(msg="无法识别文件类型") + + cls.validate_file_extension(extension) + + cls.check_file_size(file) + + content = await file.read() + await file.seek(0) + + cls.validate_file_content_type(content, extension) + + safe_filename = cls.generate_safe_filename(original_filename, extension) try: - # 构建完整的目录路径 dir_path = settings.UPLOAD_FILE_PATH.joinpath(datetime.now().strftime("%Y/%m/%d")) dir_path.mkdir(parents=True, exist_ok=True) - filename = "" - # 生成文件名并保存 - if file.filename: - filename = cls.generate_file_name(file.filename) - filepath = dir_path.joinpath(filename) + filepath = dir_path.joinpath(safe_filename) + + if not filepath.resolve().is_relative_to(settings.UPLOAD_FILE_PATH.resolve()): + log.error(f"检测到路径穿越攻击,目标路径: {filepath}") + raise CustomException(msg="非法的文件路径") + file_url = urljoin(base_url, str(filepath)) - # filepath.mkdir(parents=True, exist_ok=True) - # 分块写入文件 - chunk_size = 8 * 1024 * 1024 # 8MB chunks + chunk_size = 8 * 1024 * 1024 async with aiofiles.open(filepath, "wb") as f: while chunk := await file.read(chunk_size): await f.write(chunk) - # 返回相对路径 - return filename, filepath, file_url + log.info(f"文件上传成功: {safe_filename}") + return safe_filename, filepath, file_url + except CustomException: + raise except Exception as e: log.error(f"文件上传失败: {e}") raise CustomException(msg=f"文件上传失败: {e}") diff --git a/backend/app/utils/xss_util.py b/backend/app/utils/xss_util.py new file mode 100644 index 00000000..7899c730 --- /dev/null +++ b/backend/app/utils/xss_util.py @@ -0,0 +1,155 @@ +import bleach + +ALLOWED_TAGS = [ + "a", + "abbr", + "acronym", + "b", + "blockquote", + "br", + "code", + "col", + "colgroup", + "dd", + "del", + "dl", + "dt", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "hr", + "i", + "img", + "li", + "ol", + "p", + "pre", + "s", + "span", + "strike", + "strong", + "sub", + "sup", + "table", + "tbody", + "td", + "tfoot", + "th", + "thead", + "tr", + "tt", + "u", + "ul", + "video", + "source", + "div", + "font", +] + +ALLOWED_ATTRIBUTES = { + "*": ["class", "style", "id", "data-*"], + "a": ["href", "title", "target", "rel"], + "abbr": ["title"], + "acronym": ["title"], + "img": ["src", "alt", "title", "width", "height"], + "video": ["src", "controls", "width", "height", "poster"], + "source": ["src", "type"], + "font": ["color", "size", "face"], + "td": ["width", "height", "colspan", "rowspan"], + "th": ["width", "height", "colspan", "rowspan"], + "col": ["width", "span"], + "colgroup": ["span"], +} + +ALLOWED_STYLES = [ + "color", + "background-color", + "font-size", + "font-family", + "font-weight", + "font-style", + "text-decoration", + "text-align", + "margin", + "margin-left", + "margin-right", + "margin-top", + "margin-bottom", + "padding", + "padding-left", + "padding-right", + "padding-top", + "padding-bottom", + "border", + "border-color", + "border-width", + "border-style", + "width", + "height", + "line-height", + "display", +] + + +def sanitize_html(content: str) -> str: + """ + 清理 HTML 内容,移除潜在的 XSS 攻击代码。 + + 参数: + - content (str): 需要清理的 HTML 内容 + + 返回: + - str: 清理后的安全 HTML 内容 + """ + if not content: + return content + + return bleach.clean( + content, + tags=ALLOWED_TAGS, + attributes=ALLOWED_ATTRIBUTES, + strip=True, + strip_comments=True, + ) + + +def sanitize_html_with_styles(content: str) -> str: + """ + 清理 HTML 内容(包含样式),移除潜在的 XSS 攻击代码。 + + 参数: + - content (str): 需要清理的 HTML 内容 + + 返回: + - str: 清理后的安全 HTML 内容 + """ + if not content: + return content + + return bleach.clean( + content, + tags=ALLOWED_TAGS, + attributes=ALLOWED_ATTRIBUTES, + strip=True, + strip_comments=True, + ) + + +def strip_all_tags(content: str) -> str: + """ + 移除所有 HTML 标签,只保留纯文本。 + + 参数: + - content (str): 需要处理的 HTML 内容 + + 返回: + - str: 纯文本内容 + """ + if not content: + return content + + return bleach.clean(content, tags=[], attributes={}, strip=True) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index e5095cfc..9a204f8e 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "asyncmy==0.2.9", # mysql 异步操作数据库:基于 mysqlclient:asyncmy 是 mysqlclient 的异步版本,mysqlclient 是一个 C 语言编写的 MySQL 客户端,性能较高。性能:asyncmy 通常在性能上优于 aiomysql,特别是在高并发和大数据量的场景下。 "asyncpg==0.30.0", # postgresql 异步操作数据库基于 psycopg2:asyncpg 是 psycopg2 的异步版本,psycopg2 是一个 pure-Python PostgreSQL 数据库适配器。性能:asyncpg 通常在性能上优于 psycopg2,特别是在高并发和大数据量的场景下。 "bcrypt==4.0.1", # 密码加密解析,切勿升级,如果升级,请同时升级python版本 + "bleach==6.3.0", # 过滤 HTML 标签 "click==8.1.7", # 命令行参数解析 "croniter==6.0.0", # 实现cron表达式验证和解析执行计划 "cryptography==45.0.2", # mysql8 密码加密 diff --git a/backend/requirements.txt b/backend/requirements.txt index 7ebf7139..9ebc2616 100755 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,5 +1,6 @@ alembic==1.15.1 # 数据库迁移 APScheduler==3.11.0 # 定时任务 +bleach==6.3.0 # XSS 过滤 fastapi==0.115.2 # fastapi 框架 Jinja2==3.1.6 # 模板引擎 typer==0.9.0 # 命令行工具 diff --git a/backend/uv.lock b/backend/uv.lock index 422c86a2..154e4016 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -196,6 +196,7 @@ dependencies = [ { name = "asyncmy" }, { name = "asyncpg" }, { name = "bcrypt" }, + { name = "bleach" }, { name = "click" }, { name = "croniter" }, { name = "cryptography" }, @@ -250,6 +251,7 @@ requires-dist = [ { name = "asyncmy", specifier = "==0.2.9" }, { name = "asyncpg", specifier = "==0.30.0" }, { name = "bcrypt", specifier = "==4.0.1" }, + { name = "bleach", specifier = "==6.3.0" }, { name = "click", specifier = "==8.1.7" }, { name = "croniter", specifier = "==6.0.0" }, { name = "cryptography", specifier = "==45.0.2" }, @@ -321,6 +323,18 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/81/d8c22cd7e5e1c6a7d48e41a1d1d46c92f17dae70a54d9814f746e6027dec/bcrypt-4.0.1-cp36-abi3-win_amd64.whl", hash = "sha256:8a68f4341daf7522fe8d73874de8906f3a339048ba406be6ddc1b3ccb16fc0d9", size = 152930, upload-time = "2022-10-09T15:36:34.635Z" }, ] +[[package]] +name = "bleach" +version = "6.3.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/18/3c8523962314be6bf4c8989c79ad9531c825210dd13a8669f6b84336e8bd/bleach-6.3.0.tar.gz", hash = "sha256:6f3b91b1c0a02bb9a78b5a454c92506aa0fdf197e1d5e114d2e00c6f64306d22", size = 203533, upload-time = "2025-10-27T17:57:39.211Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cd/3a/577b549de0cc09d95f11087ee63c739bba856cd3952697eec4c4bb91350a/bleach-6.3.0-py3-none-any.whl", hash = "sha256:fe10ec77c93ddf3d13a73b035abaac7a9f5e436513864ccdad516693213c65d6", size = 164437, upload-time = "2025-10-27T17:57:37.538Z" }, +] + [[package]] name = "build" version = "1.4.0" @@ -3966,6 +3980,15 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546, upload-time = "2025-10-14T15:06:13.372Z" }, ] +[[package]] +name = "webencodings" +version = "0.5.1" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721, upload-time = "2017-04-05T20:21:34.189Z" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774, upload-time = "2017-04-05T20:21:32.581Z" }, +] + [[package]] name = "websocket-client" version = "1.9.0" diff --git a/frontend/package.json b/frontend/package.json index d81def24..2d529a78 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -76,6 +76,7 @@ "codemirror": "^5.65.19", "codemirror-editor-vue3": "^2.8.0", "dayjs": "^1.11.13", + "dompurify": "^3.3.1", "echarts": "^5.6.0", "element-plus": "^2.11.0", "exceljs": "^4.4.0", @@ -102,6 +103,7 @@ "@eslint/js": "^9.32.0", "@iconify/utils": "^2.3.0", "@types/codemirror": "^5.60.16", + "@types/dompurify": "^3.2.0", "@types/file-saver": "^2.0.7", "@types/markdown-it": "^14.1.2", "@types/node": "^22.16.5", diff --git a/frontend/src/views/module_ai/chat/components/MessageItem.vue b/frontend/src/views/module_ai/chat/components/MessageItem.vue index faf52fed..ee5871c3 100644 --- a/frontend/src/views/module_ai/chat/components/MessageItem.vue +++ b/frontend/src/views/module_ai/chat/components/MessageItem.vue @@ -74,6 +74,7 @@ import { import MarkdownIt from "markdown-it"; import markdownItHighlightjs from "markdown-it-highlightjs"; import hljs from "highlight.js"; +import DOMPurify from "dompurify"; import "highlight.js/styles/atom-one-light.css"; import { useUserStoreHook } from "@/store"; import type { ChatMessage } from "@/api/module_ai/chat_message"; @@ -130,7 +131,75 @@ md.renderer.rules.link_open = function ( const formattedContent = computed(() => { if (!props.message.content) return ""; - return md.render(props.message.content); + const rawHtml = md.render(props.message.content); + return DOMPurify.sanitize(rawHtml, { + ALLOWED_TAGS: [ + "a", + "abbr", + "acronym", + "b", + "blockquote", + "br", + "code", + "col", + "colgroup", + "dd", + "del", + "dl", + "dt", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "hr", + "i", + "img", + "li", + "ol", + "p", + "pre", + "s", + "span", + "strike", + "strong", + "sub", + "sup", + "table", + "tbody", + "td", + "tfoot", + "th", + "thead", + "tr", + "tt", + "u", + "ul", + "video", + "source", + "div", + ], + ALLOWED_ATTR: [ + "href", + "title", + "target", + "rel", + "src", + "alt", + "width", + "height", + "class", + "id", + "controls", + "poster", + "type", + "colspan", + "rowspan", + "span", + ], + }); }); const handleToggleFold = () => {