# 基于五大道德基础理论构建语义轴

In [1]:
import os
import pickle
import numpy as np
from gensim.models import KeyedVectors
import warnings
import logging

In [4]:
# 设置日志
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

def load_word2vec_model(model_path):
    """
    加载word2vec模型
    
    Args:
        model_path (str): 模型文件路径
        
    Returns:
        KeyedVectors: 加载的词向量模型
    """
    print(f"正在加载词向量模型: {model_path}")
    try:
        # 加载文本格式的word2vec模型
        word_vectors = KeyedVectors.load_word2vec_format(model_path, binary=False)
        print(f"模型加载成功，词汇量: {len(word_vectors.key_to_index)}")
        return word_vectors
    except Exception as e:
        print(f"模型加载失败: {str(e)}")
        raise

In [5]:
def create_moral_axis(word_vectors, positive_word, negative_word):
    """
    基于正向词和负向词创建道德轴向量
    
    Args:
        word_vectors (KeyedVectors): 词向量模型
        positive_word (str): 正向极词
        negative_word (str): 负向极词
        
    Returns:
        numpy.ndarray: 道德轴向量
    """
    # 检查词是否在词向量模型中
    if positive_word not in word_vectors:
        warnings.warn(f"警告: '{positive_word}' 不在词向量模型中")
        return None
    
    if negative_word not in word_vectors:
        warnings.warn(f"警告: '{negative_word}' 不在词向量模型中")
        return None
   
    # 计算向量差作为道德轴
    axis_vector = word_vectors[positive_word] - word_vectors[negative_word]
    return axis_vector

In [6]:
def build_moral_axes(word_vectors, moral_poles):
    """
    构建所有道德轴
    
    Args:
        word_vectors (KeyedVectors): 词向量模型
        moral_poles (dict): 包含道德轴及其正负极词的字典
        
    Returns:
        dict: 道德轴向量字典
    """
    moral_axes = {}
    
    for axis_name, (positive_word, negative_word) in moral_poles.items():
        print(f"构建 {axis_name} 轴: {positive_word} vs {negative_word}")
        axis_vector = create_moral_axis(word_vectors, positive_word, negative_word)
        
        if axis_vector is not None:
            moral_axes[axis_name] = axis_vector
            # 打印轴的前10维值
            print(f"{axis_name} 轴前10维值: {axis_vector[:10]}")
            print("-" * 50)
    
    return moral_axes

In [7]:
def save_moral_axes(moral_axes, output_path):
    """
    保存道德轴向量到文件
    
    Args:
        moral_axes (dict): 道德轴向量字典
        output_path (str): 输出文件路径
    """
    with open(output_path, 'wb') as f:
        pickle.dump(moral_axes, f)
    print(f"道德轴向量已保存至: {output_path}")

In [8]:
def main():
    # 定义模型路径
    model_path = r"D:\pythonProject\C_MFD2.0_embedding\词嵌入模型文件夹\zhihu\sgns.zhihu.word"
    
    # 确认文件是否存在
    if not os.path.exists(model_path):
        print(f"错误: 模型文件不存在 - {model_path}")
        return
    
    # 定义五大道德基础对应的极性词对
    moral_poles = {
        "care": ("关爱", "伤害"),
        "fairness": ("公平", "欺骗"),
        "loyalty": ("忠诚", "背叛"),
        "authority": ("服从", "反叛"),
        "purity": ("纯洁", "污秽")
    }
    
    # 加载词向量模型
    word_vectors = load_word2vec_model(model_path)
    
    # 构建道德轴
    moral_axes = build_moral_axes(word_vectors, moral_poles)
    
    # 保存道德轴向量
    output_path = "moral_axes.pkl"
    save_moral_axes(moral_axes, output_path)
    
    # 打印缺失的词汇
    missing_words = []
    for axis_name, (positive_word, negative_word) in moral_poles.items():
        if positive_word not in word_vectors:
            missing_words.append(positive_word)
        if negative_word not in word_vectors:
            missing_words.append(negative_word)
    
    if missing_words:
        print(f"警告: 以下词汇不在词向量模型中: {', '.join(missing_words)}")
    else:
        print("所有极性词都在词向量模型中，构建成功!")

if __name__ == "__main__":
    main()

2025-04-13 17:22:41,118 : INFO : loading projection weights from D:\pythonProject\C_MFD2.0_embedding\词嵌入模型文件夹\zhihu\sgns.zhihu.word


正在加载词向量模型: D:\pythonProject\C_MFD2.0_embedding\词嵌入模型文件夹\zhihu\sgns.zhihu.word


2025-04-13 17:23:08,798 : INFO : KeyedVectors lifecycle event {'msg': 'loaded (259922, 300) matrix of type float32 from D:\\pythonProject\\C_MFD2.0_embedding\\词嵌入模型文件夹\\zhihu\\sgns.zhihu.word', 'binary': False, 'encoding': 'utf8', 'datetime': '2025-04-13T17:23:08.798791', 'gensim': '4.3.3', 'python': '3.11.11 | packaged by conda-forge | (main, Mar  3 2025, 20:29:43) [MSC v.1943 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'load_word2vec_format'}


模型加载成功，词汇量: 259869
构建 care 轴: 关爱 vs 伤害
care 轴前10维值: [ 0.19804299  0.170026    0.195031    0.24005601  0.219791   -0.61237895
  0.07190299 -0.3345     -0.17207399 -0.16585898]
--------------------------------------------------
构建 fairness 轴: 公平 vs 欺骗
fairness 轴前10维值: [ 0.156525   -0.549209    0.554018    0.181344   -0.214347   -0.163257
  0.20169699  0.15173802 -0.383707   -0.682281  ]
--------------------------------------------------
构建 loyalty 轴: 忠诚 vs 背叛
loyalty 轴前10维值: [-0.34174198  0.408172    0.168324    0.256419   -0.400453    0.13650301
  0.691694   -0.065331    0.408887   -0.438583  ]
--------------------------------------------------
构建 authority 轴: 服从 vs 反叛
authority 轴前10维值: [-0.061417   -0.010115   -0.020205   -0.007423    0.20648101 -0.30812898
 -0.160135    0.73769796  0.509778   -0.3782    ]
--------------------------------------------------
构建 purity 轴: 纯洁 vs 污秽
purity 轴前10维值: [-0.344228    0.185601   -0.63567     0.157436   -0.586316   -0.358316
 -0.144219   -0.2145769