In [1]:
import requests
import json
import os
from datetime import datetime

# Dify API配置
API_BASE_URL = "https://api.dify.ai/v1"
UPLOAD_ENDPOINT = "/files/upload"
CHAT_ENDPOINT = "/chat-messages"  
API_KEY = "app-cECUzGVa0z8WLN1NuUeJPsJK"
USER_ID = "test"  

# 准备请求数据
headers = {
    "Authorization": f"Bearer {API_KEY}",
}


In [2]:
# 保存函数
def save_data():
    data = {
        "chat_history": chat_history,
        "memory_records": memory_records
    }
    with open("conversation_data.json", "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

# 加载函数
def load_data():
    global chat_history, memory_records
    try:
        with open("conversation_data.json", "r", encoding="utf-8") as f:
            data = json.load(f)
            chat_history = data.get("chat_history", [])
            memory_records = data.get("memory_records", [])
    except FileNotFoundError:
        # 文件不存在，初始化空列表
        chat_history = []  # 存储聊天记录
        memory_records = []  # 存储记忆

In [3]:
# 准备音频文件
audio_file_path = "test.m4a"  # 音频文件路径
#audio_file_path = "test2.wav" 

upload_url = f"{API_BASE_URL}{UPLOAD_ENDPOINT}"
upload_headers = {
    "Authorization": f"Bearer {API_KEY}"
}

# 确定文件类型
file_extension = os.path.splitext(audio_file_path)[1].lower()
mime_type = "audio/mpeg"  # 默认值

if file_extension == ".wav":
    mime_type = "audio/wav"
elif file_extension == ".ogg":
    mime_type = "audio/ogg"
elif file_extension == ".flac":
    mime_type = "audio/flac"
elif file_extension == ".m4a":
    mime_type = "audio/m4a"

with open(audio_file_path, "rb") as file:
    files = {
        "file": (os.path.basename(audio_file_path), file, mime_type),
        "user": (None, USER_ID)
    }
    
    upload_response = requests.post(upload_url, headers=upload_headers, files=files, timeout=30)

    print(f"文件上传响应状态码: {upload_response.status_code}")
    
    if upload_response.status_code == 200 or upload_response.status_code == 201:
        upload_result = upload_response.json()
        print("文件上传成功")
        print(f"上传响应: {json.dumps(upload_result, indent=2)}")
        
        # 提取文件ID
        file_id = upload_result.get("id")
        if not file_id:
            print("错误: 上传响应中没有找到文件ID")
            exit()
            
        print(f"获取到文件ID: {file_id}")


文件上传响应状态码: 201
文件上传成功
上传响应: {
  "id": "303ad9e8-c0ca-4f93-ada1-0356a013e650",
  "name": "test.m4a",
  "size": 65651,
  "extension": "m4a",
  "mime_type": "audio/m4a",
  "created_by": "32c5e1e1-7c82-4510-92f2-9d1dc54a991b",
  "created_at": 1756637679,
  "preview_url": null
}
获取到文件ID: 303ad9e8-c0ca-4f93-ada1-0356a013e650


In [4]:
load_data()
history_text = ""
memory_text = ""

if chat_history:
    history_text = "聊天历史:\n" + "\n".join([
        f"[{record['timestamp']}] {record['content']}" 
        for record in chat_history[-5:]  # 只取最近5条记录
    ]) + "\n\n"


if memory_records:
    memory_text = "记忆:\n" + "\n".join([
        f"[{record['timestamp']}] {record['content']}" 
        for record in memory_records[-3:]  # 只取最近3条记忆
    ]) + "\n\n"

query_text = history_text + memory_text + "\n以上为聊天历史"

In [5]:
query_text

'\n以上为聊天历史'

In [5]:
chat_url = f"{API_BASE_URL}{CHAT_ENDPOINT}"
chat_headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

# 构建对话请求体
chat_payload = {
    "inputs": {},
    "query": query_text,  
    "response_mode": "blocking",  # 使用阻塞模式，等待完整响应
    "conversation_id": "",  
    "user": USER_ID,
    "files": [
        {
            "type": "audio",
            "transfer_method": "local_file",
            "upload_file_id": file_id
        }
    ]
}

# 发送对话请求
chat_response = requests.post(
    chat_url, 
    headers=chat_headers, 
    json=chat_payload, 
    timeout=60  # 设置较长超时，因为模型需要时间处理
)

print(f"对话响应状态码: {chat_response.status_code}")

if chat_response.status_code == 200:
    chat_result = chat_response.json()
    print("对话请求成功")
    print(f"对话响应: {json.dumps(chat_result, indent=2)}")
    
   

对话响应状态码: 200
对话请求成功
对话响应: {
  "event": "message",
  "task_id": "ee94e4b2-fd8c-45c3-b988-4b525129d697",
  "id": "19e39445-4700-4fef-a3c4-ab03e320ebd4",
  "message_id": "19e39445-4700-4fef-a3c4-ab03e320ebd4",
  "conversation_id": "79e98c3f-f036-468b-94b2-1f3ccfc4f9a7",
  "mode": "advanced-chat",
  "answer": "[0b7ebff62b504062959ede9c90748314.wav](https://upload.dify.ai/files/tools/47275ccf-7a2f-4494-a9c4-fee8a09d6fb5.wav?timestamp=1756637705&nonce=a06a8c0126fe5424df0c241fc24325c7&sign=YHjtxJMXm9QxW-6P-Z1AxQ2YluZ5h-2w-3fyKD7dD20=)\uff1b\n\n\u7528\u6237\uff1a\u4e0b\u5348\u6211\u60f3\u53bb\u6253\u724c\u3002\uff1b\n\u5c0f\u4f34\uff1a\u5976\u5976\u4e0b\u5348\u60f3\u53bb\u6253\u724c\u5440\uff0c\u90a3\u771f\u597d\uff01\u8bb0\u5f97\u591a\u7a7f\u4ef6\u5916\u5957\uff0c\u522b\u7740\u51c9\u4e86\u5457\u3002\uff1b\u662f\u548c\u4e0a\u6b21\u8bf4\u7684\u5f20\u4e09\u674e\u56db\u4ed6\u4eec\u4e00\u8d77\u73a9\u4e0d\uff1f\uff1b\n\n\u8bb0\u5fc6\uff1a\u4e0b\u5348\u6253\u724c\u4e60\u60ef, \u53ef\u80fd\u5e38\u4e0

In [6]:
# 提取音频URL和新的上下文内容
audio_url = None
new_context = None

# 从响应中提取音频URL
if "answer" in chat_result:
    answer_text = chat_result["answer"]
    
    # 从answer字段中提取音频URL（Markdown格式的链接）
    import re
    audio_match = re.search(r'\[.*?\]\((.*?)\)', answer_text)
    if audio_match:
        audio_url = audio_match.group(1)
        print(f"从answer字段提取到音频URL: {audio_url}")
        
        # 从answer字段中移除音频URL部分，保留纯文本
        new_context = re.sub(r'\[.*?\]\(.*?\)\s*[；;]?\s*', '', answer_text).strip()
    else:
        # 如果没有找到音频URL，使用整个answer作为上下文
        new_context = answer_text
        print("answer字段中没有找到音频URL格式的链接")
else:
    print("响应中没有answer字段")

# 下载音频文件（如果存在）
if audio_url:
    print(f"找到音频URL: {audio_url}")
    # 生成保存文件名
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    audio_filename = f"processed_audio_{timestamp}.wav"  # 使用.wav扩展名，因为URL指向.wav文件
    
    # 下载音频
    print("开始下载音频文件...")
    audio_response = requests.get(audio_url, timeout=30)
    if audio_response.status_code == 200:
        with open(audio_filename, "wb") as f:
            f.write(audio_response.content)
        print(f"处理后的音频已保存到: {audio_filename}")
    else:
        print(f"音频下载失败，状态码: {audio_response.status_code}")
else:
    print("响应中未找到音频URL")
    
# 输出新的上下文内容
if new_context:
    print("新的上下文内容:")
    print(new_context)
else:
    print("响应中未找到新的上下文内容")

从answer字段提取到音频URL: https://upload.dify.ai/files/tools/47275ccf-7a2f-4494-a9c4-fee8a09d6fb5.wav?timestamp=1756637705&nonce=a06a8c0126fe5424df0c241fc24325c7&sign=YHjtxJMXm9QxW-6P-Z1AxQ2YluZ5h-2w-3fyKD7dD20=
找到音频URL: https://upload.dify.ai/files/tools/47275ccf-7a2f-4494-a9c4-fee8a09d6fb5.wav?timestamp=1756637705&nonce=a06a8c0126fe5424df0c241fc24325c7&sign=YHjtxJMXm9QxW-6P-Z1AxQ2YluZ5h-2w-3fyKD7dD20=
开始下载音频文件...
处理后的音频已保存到: processed_audio_20250831_185508.wav
新的上下文内容:
用户：下午我想去打牌。；
小伴：奶奶下午想去打牌呀，那真好！记得多穿件外套，别着凉了呗。；是和上次说的张三李四他们一起玩不？；

记忆：下午打牌习惯, 可能常与张三李四聚会-心情：期待放松，社交需求明显；


In [7]:
if new_context:
    # 使用两个连续的\n作为分隔符
    parts = new_context.split('\n\n')
    
    # 提取聊天记录部分
    if len(parts) > 0:
        chat_part = parts[0].strip()
        if chat_part:
            # 存储聊天记录
            chat_record = {
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "content": chat_part
            }
            chat_history.append(chat_record)
            print(f"已存储聊天记录: {chat_part}")
    
    # 提取记忆部分
    if len(parts) > 1:
        memory_part = parts[1].strip()
        if memory_part:
            # 存储记忆
            memory_record = {
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "content": memory_part
            }
            memory_records.append(memory_record)
            print(f"已存储记忆: {memory_part}")
save_data()

已存储聊天记录: 用户：下午我想去打牌。；
小伴：奶奶下午想去打牌呀，那真好！记得多穿件外套，别着凉了呗。；是和上次说的张三李四他们一起玩不？；
已存储记忆: 记忆：下午打牌习惯, 可能常与张三李四聚会-心情：期待放松，社交需求明显；


In [9]:
chat_history

[{'timestamp': '2025-08-29 17:47:12',
  'content': '用户：张三和李四。；\n小伴：爷爷您说张三和李四呀，是您的老朋友呗？；他们最近来看您了没？；'}]

In [10]:
memory_records

[{'timestamp': '2025-08-29 17:47:12', 'content': '记忆：张三,李四-心情：可能想起故人，略带怀念；'}]