In [21]:
import jieba
import json
import gensim
from gensim import corpora
import numpy as np

# 读取conversation_history.json文件
with open('/data1/dxw_data/llm/is_agents/agents/six_hats/task5/conversation_history.json', 'r', encoding='utf-8') as file:
    conversation_data = json.load(file)

# 停用词表
stop_words = set()
with open('/data1/dxw_data/llm/ML/LIWC/datasets/stopwords_cn.txt', 'r', encoding='utf-8') as file:
    for line in file:
        stop_words.add(line.strip())

# 文本预处理函数
def preprocess(text):
    tokens = jieba.lcut(text)
    tokens = [word for word in tokens if word not in stop_words and len(word) == 2 and word.isalpha()]
    return tokens

# 对conversation_data中的每个agent的message进行分类
agent_messages = {}
for conversation in conversation_data:
    agent = conversation['agent']
    message = conversation['message']
    if agent not in agent_messages:
        agent_messages[agent] = []
    agent_messages[agent].append(message)

# 对每个agent的message进行LDA分析并将结果写入文件
def lda_analysis(agent, messages, num_topics=5, file=None):
    # 对消息进行预处理
    processed_texts = [preprocess(message) for message in messages]
    
    # 创建词典和语料库
    dictionary = corpora.Dictionary(processed_texts)
    corpus = [dictionary.doc2bow(text) for text in processed_texts]
    
    # 构建LDA模型
    lda_model = gensim.models.LdaModel(corpus, num_topics=num_topics, id2word=dictionary, passes=10)
    
    # 打印并保存该agent的主题
    result = f"\nAgent: {agent}\n"
    for idx, topic in lda_model.print_topics(-1):
        result += f"Topic {idx}: {topic}\n"
    
    # 打印结果到控制台
    print(result)
    
    # 将结果写入文件
    if file:
        file.write(result)

# 打开文件进行写入
with open('/data1/dxw_data/llm/is_agents/agents/six_hats/task5/log_lda1.txt', 'w', encoding='utf-8') as log_file:
    for agent, messages in agent_messages.items():
        if len(messages) > 1:  # 如果该agent有多条消息，则进行LDA分析
            lda_analysis(agent, messages, file=log_file)
        else:
            no_analysis_message = f"\nAgent: {agent} 没有足够的消息进行LDA分析\n"
            print(no_analysis_message)
            log_file.write(no_analysis_message)



Agent: 蓝色思考帽(管理)
Topic 0: 0.154*"思考" + 0.032*"客观" + 0.032*"情感" + 0.032*"讨论" + 0.032*"职业" + 0.024*"红色" + 0.024*"黑色" + 0.024*"创新" + 0.024*"应对" + 0.024*"黄色"
Topic 1: 0.010*"思考" + 0.010*"客观" + 0.010*"情感" + 0.010*"红色" + 0.010*"职业" + 0.010*"顺序" + 0.010*"创新" + 0.010*"白色" + 0.010*"表达" + 0.010*"讨论"
Topic 2: 0.156*"思考" + 0.058*"情感" + 0.044*"客观" + 0.030*"顺序" + 0.030*"黑色" + 0.030*"白色" + 0.030*"红色" + 0.030*"黄色" + 0.030*"批判" + 0.030*"创新"
Topic 3: 0.119*"思考" + 0.030*"客观" + 0.026*"职业" + 0.026*"挑战" + 0.026*"创新" + 0.026*"顺序" + 0.025*"情感" + 0.021*"过程" + 0.021*"绿色" + 0.021*"提出"
Topic 4: 0.010*"思考" + 0.010*"挑战" + 0.010*"客观" + 0.010*"绿色" + 0.010*"创新" + 0.010*"情感" + 0.010*"红色" + 0.010*"职业" + 0.010*"顺序" + 0.010*"提出"


Agent: 红色思考帽(情感)
Topic 0: 0.008*"尝试" + 0.008*"正位" + 0.008*"情感" + 0.008*"改变" + 0.008*"思考" + 0.008*"审视" + 0.008*"寻求" + 0.008*"面对" + 0.008*"代表" + 0.008*"未来"
Topic 1: 0.041*"思考" + 0.033*"情感" + 0.033*"审视" + 0.033*"适合" + 0.033*"感受" + 0.025*"面对" + 0.025*"挑战" + 0.025*"改变" + 0.025*"确实" + 0.025*"期望"
Topi

In [19]:
import jieba
import json
import gensim
from gensim import corpora
import numpy as np

# 读取conversation_history.json文件
with open('/data1/dxw_data/llm/is_agents/agents/six_hats/task5/conversation_history.json', 'r', encoding='utf-8') as file:
    conversation_data = json.load(file)

# 停用词表
stop_words = set()
with open('/data1/dxw_data/llm/ML/LIWC/datasets/stopwords_cn.txt', 'r', encoding='utf-8') as file:
    for line in file:
        stop_words.add(line.strip())

# 文本预处理函数
def preprocess(text):
    tokens = jieba.lcut(text)
    tokens = [word for word in tokens if word not in stop_words and len(word) == 2 and word.isalpha()]
    return tokens

# 对每个message进行LDA分析并输出一个主题
def lda_analysis_for_message(message, num_topics=1):
    # 预处理单个message
    processed_text = preprocess(message)
    
    # 创建词典和语料库
    dictionary = corpora.Dictionary([processed_text])
    corpus = [dictionary.doc2bow(processed_text)]
    
    # 如果词汇量过少，跳过该消息
    if len(dictionary) == 0:
        return None

    # 构建LDA模型
    lda_model = gensim.models.LdaModel(corpus, num_topics=num_topics, id2word=dictionary, passes=10)
    
    # 获取单个主题的关键词
    topic = lda_model.print_topics(num_words=5)[0]  # 获取一个主题
    return topic

# 打开文件进行写入
with open('/data1/dxw_data/llm/is_agents/agents/six_hats/task5/log_lda2.txt', 'w', encoding='utf-8') as log_file:
    # 对每个message进行LDA分析并输出结果
    for conversation in conversation_data:
        agent = conversation['agent']
        message = conversation['message']
        
        # 对每个message进行LDA分析
        topic = lda_analysis_for_message(message)
        
        # 如果LDA分析有结果，打印主题关键词并写入文件
        if topic:
            result = f"\nAgent: {agent}\nTopic: {topic}\n"
        else:
            result = f"\nAgent: {agent}\nTopic: 无法生成主题（可能文本过短或无有效词汇）\n"
        
        # 打印结果到控制台
        print(result)
        
        # 将结果写入文件
        log_file.write(result)



Agent: 蓝色思考帽(管理)
Topic: (0, '0.135*"思考" + 0.053*"情感" + 0.041*"客观" + 0.029*"白色" + 0.029*"黑色"')


Agent: 红色思考帽(情感)
Topic: (0, '0.036*"尝试" + 0.027*"正位" + 0.027*"寻求" + 0.023*"未来" + 0.023*"审视"')


Agent: 黄色思考帽(积极)
Topic: (0, '0.033*"尝试" + 0.033*"挑战" + 0.027*"情感" + 0.022*"需求" + 0.022*"感到"')


Agent: 绿色思考帽(创新)
Topic: (0, '0.065*"寻找" + 0.059*"方式" + 0.043*"思维" + 0.043*"工作" + 0.043*"建议"')


Agent: 黑色思考帽(批判)
Topic: (0, '0.036*"改变" + 0.029*"挑战" + 0.029*"正位" + 0.029*"占卜" + 0.022*"找到"')


Agent: 白色思考帽(客观)
Topic: (0, '0.032*"生活" + 0.025*"心态" + 0.025*"挑战" + 0.025*"占卜" + 0.025*"情感"')


Agent: 蓝色思考帽(管理)
Topic: (0, '0.094*"思考" + 0.026*"客观" + 0.023*"挑战" + 0.023*"职业" + 0.023*"顺序"')


Agent: 白色思考帽(客观)
Topic: (0, '0.032*"生活" + 0.025*"心态" + 0.025*"挑战" + 0.025*"占卜" + 0.025*"情感"')


Agent: 红色思考帽(情感)
Topic: (0, '0.036*"思考" + 0.030*"审视" + 0.030*"适合" + 0.030*"感受" + 0.030*"情感"')


Agent: 黄色思考帽(积极)
Topic: (0, '0.063*"职业" + 0.031*"建议" + 0.031*"应对" + 0.031*"机会" + 0.031*"寻找"')


Agent: 黑色思考帽(批判)
Topic: (0, '0.038*"占卜"