In [1]:
# import modules & set up logging
import logging
import os

import numpy as np

import gensim
from gensim.models import word2vec

import jieba.analyse
import jieba

In [2]:
# set up logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [3]:
sentence_file_path = './datas/in_the_name_of_people.txt'
word_file_path = './datas/cut_chars_of_in_the_name_of_people.txt'
model_file_path1 = './datas/gensim_char2vec1.w2v'
model_file_path2 = './datas/gensim_char2vec2.bin'
model_file_path3 = './datas/gensim_char2vec3_{}.npy'

## 一、分词

In [4]:
with open(word_file_path,'w', encoding='utf-8') as writer:
    with open(sentence_file_path, 'r', encoding='utf-8') as reader:
        # 加载所有数据
        content = reader.read()
        
        # 分词 --> 以每个字作为独立的词
        content = list(content)
        
        # 合并结果
        result = ' '.join(content)
        
        # 结果输出
        writer.write(result)
print("Done!!!")

Done!!!


## 二、Gensim Word2Vec构建

#### 训练方式一

In [5]:
# 每行数据加载
print(f"文件路径:{word_file_path}")
sentences = word2vec.LineSentence(word_file_path) 

# 训练Word2Vec模型
"""
classgensim.models.word2vec.Word2Vec(
    sentences=None, corpus_file=None, 
    vector_size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, 
    sample=0.001, seed=1, workers=3, min_alpha=0.0001, 
    sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, 
    hashfxn=<built-in function hash>, epochs=5, 
    null_word=0, trim_rule=None, sorted_vocab=1, 
    batch_words=10000, compute_loss=False, 
    callbacks=(), comment=None, max_final_vocab=None, shrink_windows=True)
sg: 1(Skip-gram) 0(CBOW)
hs: 1(hierarchical softmax) 0(negative)
negative: 当hs为0的时候，给定负样本数目，给定为0表示不采用负采样
"""
model = word2vec.Word2Vec(sentences, hs = 1,min_count = 1,window = 3,vector_size = 100)

2025-07-19 11:32:59,793 : INFO : collecting all words and their counts
2025-07-19 11:32:59,795 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2025-07-19 11:32:59,838 : INFO : collected 3269 word types from a corpus of 258973 raw words and 2311 sentences
2025-07-19 11:32:59,839 : INFO : Creating a fresh vocabulary
2025-07-19 11:32:59,845 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=1 retains 3269 unique words (100.00% of original 3269, drops 0)', 'datetime': '2025-07-19T11:32:59.845298', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-11-10.0.22631-SP0', 'event': 'prepare_vocab'}
2025-07-19 11:32:59,847 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=1 leaves 258973 word corpus (100.00% of original 258973, drops 0)', 'datetime': '2025-07-19T11:32:59.847336', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (m

文件路径:./datas/cut_chars_of_in_the_name_of_people.txt


2025-07-19 11:32:59,938 : INFO : built huffman tree with maximum node depth 18
2025-07-19 11:32:59,954 : INFO : estimated required memory for 3269 words and 100 dimensions: 6211100 bytes
2025-07-19 11:32:59,955 : INFO : resetting layer weights
2025-07-19 11:32:59,957 : INFO : Word2Vec lifecycle event {'update': False, 'trim_rule': 'None', 'datetime': '2025-07-19T11:32:59.957347', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-11-10.0.22631-SP0', 'event': 'build_vocab'}
2025-07-19 11:32:59,958 : INFO : Word2Vec lifecycle event {'msg': 'training model with 3 workers on 3269 vocabulary and 100 features, using sg=0 hs=1 sample=0.001 negative=5 window=3 shrink_windows=True', 'datetime': '2025-07-19T11:32:59.958347', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-11-10.0.22631-SP0', 'event':

#### 训练方式二

In [6]:
# 每行数据加载
sentences = word2vec.LineSentence(word_file_path) 

# 训练Word2Vec模型
model = word2vec.Word2Vec(hs = 1,min_count = 1,window = 9,vector_size = 100)

# 构建词典
model.build_vocab(sentences)

# 模型训练
model.train(sentences, total_examples=model.corpus_count, epochs=5)

2025-07-19 11:33:00,862 : INFO : Word2Vec lifecycle event {'params': 'Word2Vec<vocab=0, vector_size=100, alpha=0.025>', 'datetime': '2025-07-19T11:33:00.862240', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-11-10.0.22631-SP0', 'event': 'created'}
2025-07-19 11:33:00,865 : INFO : collecting all words and their counts
2025-07-19 11:33:00,865 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2025-07-19 11:33:00,905 : INFO : collected 3269 word types from a corpus of 258973 raw words and 2311 sentences
2025-07-19 11:33:00,907 : INFO : Creating a fresh vocabulary
2025-07-19 11:33:00,915 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=1 retains 3269 unique words (100.00% of original 3269, drops 0)', 'datetime': '2025-07-19T11:33:00.915171', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 6

(1019350, 1294865)

## 三、Word2Vec应用

### 0. 获取Word2Vec模型相关属性

In [7]:
print("【词汇数目】: {}".format(len(model.wv.key_to_index)))
print("【转换的稠密的特征向量维度数目,每个单词转换的向量维度大小】: {}".format(model.wv.vector_size))
print("【单词到id的映射关系】: \n{}".format(model.wv.key_to_index))

【词汇数目】: 3269
【转换的稠密的特征向量维度数目,每个单词转换的向量维度大小】: 100
【单词到id的映射关系】: 
{'，': 0, '的': 1, '了': 2, '。': 3, '一': 4, '是': 5, '不': 6, '这': 7, '！': 8, '：': 9, '我': 10, '他': 11, '？': 12, '在': 13, '说': 14, '你': 15, '有': 16, '来': 17, '人': 18, '就': 19, '平': 20, '亮': 21, '高': 22, '个': 23, '侯': 24, '老': 25, '上': 26, '大': 27, '着': 28, '也': 29, '到': 30, '同': 31, '们': 32, '和': 33, '…': 34, '下': 35, '出': 36, '要': 37, '啊': 38, '么': 39, '长': 40, '没': 41, '地': 42, '可': 43, '还': 44, '达': 45, '子': 46, '李': 47, '康': 48, '小': 49, '得': 50, '会': 51, '时': 52, '成': 53, '事': 54, '心': 55, '道': 56, '起': 57, '书': 58, '好': 59, '记': 60, '过': 61, '手': 62, '那': 63, '看': 64, '里': 65, '话': 66, '良': 67, '想': 68, '能': 69, '育': 70, '都': 71, '明': 72, '把': 73, '去': 74, '对': 75, '陈': 76, '祁': 77, '师': 78, '伟': 79, '功': 80, '省': 81, '让': 82, '为': 83, '自': 84, '瑞': 85, '蔡': 86, '赵': 87, '局': 88, '意': 89, '然': 90, '委': 91, '现': 92, '天': 93, '学': 94, '家': 95, '情': 96, '中': 97, '生': 98, '问': 99, '发': 100, '金': 101, '头': 102, '前': 103, '开': 

### 1. 获取相似度最高的K个演员

In [8]:
# 夹角余弦相似度
req_count = 20
for key in model.wv.similar_by_word('沙', topn =100):
    req_count -= 1
    print(key[0], key[1])
    if req_count == 0:
        break;

龙 0.6199053525924683
音 0.5921816229820251
田 0.5023218393325806
椭 0.49028563499450684
咀 0.4796002507209778
兰 0.4777561128139496
赞 0.46922075748443604
褪 0.4648893177509308
梁 0.4596570134162903
晋 0.4540477693080902
充 0.452730655670166
嚼 0.4514681100845337
讯 0.44941744208335876
笛 0.4453631639480591
夸 0.4444979429244995
林 0.4434936046600342
弩 0.44140803813934326
浸 0.43772995471954346
金 0.4328945279121399
居 0.41987520456314087


### 2. 获取单词之间的相似度

In [21]:
# 夹角余弦相似度
print(model.wv.similarity('沙', '瑞'))

-0.13382854


### 3. 获取单词的词向量

In [22]:
v1 = model.wv.get_vector("提")
print(v1.shape)
print(v1)

(100,)
[ 0.76671565  0.84560883 -0.09681381  0.41430163  0.24695243  0.00360911
 -0.25832537  0.31480643  0.03082566 -0.58020425 -0.67254853 -0.28356433
  0.7936835   0.6030169  -0.03726152 -0.26380202 -0.9343648  -1.1684713
  0.11910174  0.00386953  0.01292956 -0.42682323 -0.01979294 -0.01301748
  0.10821428 -0.9582002   0.20908956 -0.21849269  0.515125    0.78893656
  0.21784313 -0.88807523  0.08891565  0.5328214  -0.21702658 -0.6652601
 -0.25907016  0.6560923   0.06465239  0.7685119   1.4880674   0.07639578
  0.02057602  0.80324966 -0.11700244 -0.5587195  -0.5434557   0.6459111
 -0.07868312  0.05601974  0.61400825  0.9393746   0.8716823  -0.30767083
 -0.19373795  0.03271548 -0.08578346  1.0625328   1.3942432   0.04010689
  0.4714245   0.5199269   0.43543723  0.29965922 -0.4249727  -0.3832188
 -0.844049    0.3092336   0.6418849  -0.59532994 -0.11605817 -0.4438996
 -0.349885   -0.2642351  -0.3134886  -0.9398317   1.0188463   0.3356084
 -0.12860098  0.37548965  0.28909954 -0.749697   -

In [11]:
model.wv['提']

array([ 0.76671565,  0.84560883, -0.09681381,  0.41430163,  0.24695243,
        0.00360911, -0.25832537,  0.31480643,  0.03082566, -0.58020425,
       -0.67254853, -0.28356433,  0.7936835 ,  0.6030169 , -0.03726152,
       -0.26380202, -0.9343648 , -1.1684713 ,  0.11910174,  0.00386953,
        0.01292956, -0.42682323, -0.01979294, -0.01301748,  0.10821428,
       -0.9582002 ,  0.20908956, -0.21849269,  0.515125  ,  0.78893656,
        0.21784313, -0.88807523,  0.08891565,  0.5328214 , -0.21702658,
       -0.6652601 , -0.25907016,  0.6560923 ,  0.06465239,  0.7685119 ,
        1.4880674 ,  0.07639578,  0.02057602,  0.80324966, -0.11700244,
       -0.5587195 , -0.5434557 ,  0.6459111 , -0.07868312,  0.05601974,
        0.61400825,  0.9393746 ,  0.8716823 , -0.30767083, -0.19373795,
        0.03271548, -0.08578346,  1.0625328 ,  1.3942432 ,  0.04010689,
        0.4714245 ,  0.5199269 ,  0.43543723,  0.29965922, -0.4249727 ,
       -0.3832188 , -0.844049  ,  0.3092336 ,  0.6418849 , -0.59

In [12]:
# 异常：不存在"明"这个单词
# model.wv.get_vector("明")

In [13]:
# 首先判断是否存在单词，如果存在，就返回，否则单词直接过滤
word = "明"
# word = "康"
if word in model.wv:
    print("【向量】:\n{}".format(model.wv[word]))
else:
    print("【单词不存在】!!!")

【向量】:
[-1.8659567  -1.2683804  -1.5203211   1.3393248   1.5698305   1.5766704
  0.5792709  -0.0944344   0.4600724  -2.0212765   1.3290492   3.0347586
 -1.7484974  -2.5461733  -1.2421962  -2.195707    1.1677952   0.12306257
  2.3536565   0.13785753  0.25596005 -0.979864   -1.6725149   0.20529638
  2.3256555   1.465334    1.5475092  -0.7109102  -2.202951   -0.41991773
 -0.57832915  1.9694822  -0.6066874  -0.90539956 -0.7602718  -0.11119125
 -0.19396071 -2.482109   -1.958807    1.8576888  -0.4388546  -0.1392478
  3.226103    1.5821632   3.9528255  -0.9023426  -1.2217813  -2.1251204
  1.319811   -2.383383    2.024607    0.27819237  0.9661102   0.515211
 -2.5315325  -2.0237613   2.2050009  -0.92303145 -0.717984   -1.5594845
  0.82008034 -0.91016847 -0.33634543 -2.836952   -1.8688903   0.26246652
 -0.5537695   1.6250952   0.82133174 -2.4282622  -1.0710108   0.25866285
 -1.2783579  -1.1877224   0.45052952  0.21467869 -0.5023352  -1.0768069
 -2.6007748  -0.15764014 -1.591806    0.71941495  0.2

## 四、模型持久化&模型恢复加载

### 方式一：
直接使用save API进行模型持久化

#### 持久化

In [14]:
model.save(model_file_path1)

2025-07-19 11:33:02,439 : INFO : Word2Vec lifecycle event {'fname_or_handle': './datas/gensim_char2vec1.w2v', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-07-19T11:33:02.439289', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-11-10.0.22631-SP0', 'event': 'saving'}
2025-07-19 11:33:02,441 : INFO : not storing attribute cum_table
2025-07-19 11:33:02,470 : INFO : saved ./datas/gensim_char2vec1.w2v


#### 加载

In [15]:
# 直接基于路径加载
model2 = word2vec.Word2Vec.load(model_file_path1)
print(model2)

v1 = model2.wv.get_vector("提")
print(v1.shape)
print(v1)

2025-07-19 11:33:02,538 : INFO : loading Word2Vec object from ./datas/gensim_char2vec1.w2v
2025-07-19 11:33:02,555 : INFO : loading wv recursively from ./datas/gensim_char2vec1.w2v.wv.* with mmap=None
2025-07-19 11:33:02,558 : INFO : setting ignored attribute cum_table to None
2025-07-19 11:33:02,578 : INFO : Word2Vec lifecycle event {'fname': './datas/gensim_char2vec1.w2v', 'datetime': '2025-07-19T11:33:02.578735', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-11-10.0.22631-SP0', 'event': 'loaded'}


Word2Vec<vocab=3269, vector_size=100, alpha=0.025>
(100,)
[ 0.76671565  0.84560883 -0.09681381  0.41430163  0.24695243  0.00360911
 -0.25832537  0.31480643  0.03082566 -0.58020425 -0.67254853 -0.28356433
  0.7936835   0.6030169  -0.03726152 -0.26380202 -0.9343648  -1.1684713
  0.11910174  0.00386953  0.01292956 -0.42682323 -0.01979294 -0.01301748
  0.10821428 -0.9582002   0.20908956 -0.21849269  0.515125    0.78893656
  0.21784313 -0.88807523  0.08891565  0.5328214  -0.21702658 -0.6652601
 -0.25907016  0.6560923   0.06465239  0.7685119   1.4880674   0.07639578
  0.02057602  0.80324966 -0.11700244 -0.5587195  -0.5434557   0.6459111
 -0.07868312  0.05601974  0.61400825  0.9393746   0.8716823  -0.30767083
 -0.19373795  0.03271548 -0.08578346  1.0625328   1.3942432   0.04010689
  0.4714245   0.5199269   0.43543723  0.29965922 -0.4249727  -0.3832188
 -0.844049    0.3092336   0.6418849  -0.59532994 -0.11605817 -0.4438996
 -0.349885   -0.2642351  -0.3134886  -0.9398317   1.0188463   0.3356084

### 方式二：
保存为二进制词向量

#### 持久化

In [16]:
model.wv.save_word2vec_format(model_file_path2,binary=True)

2025-07-19 11:33:02,615 : INFO : storing 3269x100 projection weights into ./datas/gensim_char2vec2.bin


#### 加载

In [17]:
# 加载模型
model2 = gensim.models.KeyedVectors.load_word2vec_format(model_file_path2,binary=True)
print(model2)

# 应用模型
v1 = model2.get_vector("提")
print(v1.shape)
print(v1)

2025-07-19 11:33:02,672 : INFO : loading projection weights from ./datas/gensim_char2vec2.bin
2025-07-19 11:33:02,688 : INFO : KeyedVectors lifecycle event {'msg': 'loaded (3269, 100) matrix of type float32 from ./datas/gensim_char2vec2.bin', 'binary': True, 'encoding': 'utf8', 'datetime': '2025-07-19T11:33:02.688839', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-11-10.0.22631-SP0', 'event': 'load_word2vec_format'}


KeyedVectors<vector_size=100, 3269 keys>
(100,)
[ 0.76671565  0.84560883 -0.09681381  0.41430163  0.24695243  0.00360911
 -0.25832537  0.31480643  0.03082566 -0.58020425 -0.67254853 -0.28356433
  0.7936835   0.6030169  -0.03726152 -0.26380202 -0.9343648  -1.1684713
  0.11910174  0.00386953  0.01292956 -0.42682323 -0.01979294 -0.01301748
  0.10821428 -0.9582002   0.20908956 -0.21849269  0.515125    0.78893656
  0.21784313 -0.88807523  0.08891565  0.5328214  -0.21702658 -0.6652601
 -0.25907016  0.6560923   0.06465239  0.7685119   1.4880674   0.07639578
  0.02057602  0.80324966 -0.11700244 -0.5587195  -0.5434557   0.6459111
 -0.07868312  0.05601974  0.61400825  0.9393746   0.8716823  -0.30767083
 -0.19373795  0.03271548 -0.08578346  1.0625328   1.3942432   0.04010689
  0.4714245   0.5199269   0.43543723  0.29965922 -0.4249727  -0.3832188
 -0.844049    0.3092336   0.6418849  -0.59532994 -0.11605817 -0.4438996
 -0.349885   -0.2642351  -0.3134886  -0.9398317   1.0188463   0.3356084
 -0.12860

In [18]:
# 加载模型
model2 = gensim.models.KeyedVectors.load_word2vec_format('./datas/vectors.bin',
                                                         binary=True)
print(model2)

# 应用模型
v1 = model2.get_vector("酒")
print(v1.shape)
print(v1)

2025-07-19 11:33:02,763 : INFO : loading projection weights from ./datas/vectors.bin
2025-07-19 11:33:02,818 : INFO : KeyedVectors lifecycle event {'msg': 'loaded (7942, 128) matrix of type float32 from ./datas/vectors.bin', 'binary': True, 'encoding': 'utf8', 'datetime': '2025-07-19T11:33:02.818279', 'gensim': '4.3.3', 'python': '3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-11-10.0.22631-SP0', 'event': 'load_word2vec_format'}


KeyedVectors<vector_size=128, 7942 keys>
(128,)
[ 3.50566626e-01 -1.04986653e-01 -7.67363831e-02  1.02968253e-01
  1.18330494e-01  5.92405088e-02  1.43829891e-02 -2.13755772e-01
  3.01811416e-02  6.38461784e-02  9.29202810e-02 -9.80767310e-02
  3.37989390e-01  1.62496209e-01 -1.00853711e-01  1.86467111e-01
  1.23709984e-01  4.02765274e-02  1.66892633e-01 -1.33713201e-01
  1.33725271e-01 -7.69479200e-02 -4.04792249e-01  1.34307100e-02
 -4.08627130e-02  1.60763144e-01 -2.02138210e-03 -2.05629498e-01
  1.40110895e-01  1.38469696e-01  5.83793372e-02 -6.10959306e-02
  2.09262501e-02  2.13176370e-01 -6.33254573e-02 -1.41293630e-01
  8.06461945e-02  8.95849839e-02  8.80930126e-02 -9.46233943e-02
  5.21726757e-02 -2.96247043e-02 -4.45053317e-02 -1.41549101e-02
 -1.64876487e-02  5.51286805e-03 -1.92858249e-01  1.42684672e-03
  6.35003224e-02 -1.57157220e-02 -1.80648953e-01  1.67108551e-01
 -9.37882364e-02 -4.28168513e-02  4.31317948e-02  2.22156458e-02
 -2.61552483e-01 -2.04422385e-01 -1.230919

### 方式三：
直接使用NumPy API保存词向量信息

#### 持久化

In [19]:
# 获取词向量
norm_word_embeddings = model.wv.get_normed_vectors()
word_embeddings = model.wv.vectors
# 获取词典(词典到idx的映射)
vocab_2_index = list(map(lambda k: (k, model.wv.key_to_index[k]), model.wv.key_to_index))
print(np.shape(norm_word_embeddings), np.shape(word_embeddings), np.shape(vocab_2_index))
# 数据保存
np.save(model_file_path3.format("norm_embedding"), norm_word_embeddings)
np.save(model_file_path3.format("embedding"), word_embeddings)
np.save(model_file_path3.format("vocab_2_index"), vocab_2_index)

(3269, 100) (3269, 100) (3269, 2)


#### 加载

In [20]:
# 加载数据
norm_word_embeddings = np.load(model_file_path3.format("norm_embedding"))
word_embeddings = np.load(model_file_path3.format("embedding"))
vocab_2_index = np.load(model_file_path3.format("vocab_2_index"))

# 字典转换
vocab_2_index = dict(map(lambda t:(t[0], int(t[1])), vocab_2_index))

# 获取数据
word = "提"
index = vocab_2_index[word]
v1 = word_embeddings[index]
print(v1.shape)
print(v1)

(100,)
[ 0.76671565  0.84560883 -0.09681381  0.41430163  0.24695243  0.00360911
 -0.25832537  0.31480643  0.03082566 -0.58020425 -0.67254853 -0.28356433
  0.7936835   0.6030169  -0.03726152 -0.26380202 -0.9343648  -1.1684713
  0.11910174  0.00386953  0.01292956 -0.42682323 -0.01979294 -0.01301748
  0.10821428 -0.9582002   0.20908956 -0.21849269  0.515125    0.78893656
  0.21784313 -0.88807523  0.08891565  0.5328214  -0.21702658 -0.6652601
 -0.25907016  0.6560923   0.06465239  0.7685119   1.4880674   0.07639578
  0.02057602  0.80324966 -0.11700244 -0.5587195  -0.5434557   0.6459111
 -0.07868312  0.05601974  0.61400825  0.9393746   0.8716823  -0.30767083
 -0.19373795  0.03271548 -0.08578346  1.0625328   1.3942432   0.04010689
  0.4714245   0.5199269   0.43543723  0.29965922 -0.4249727  -0.3832188
 -0.844049    0.3092336   0.6418849  -0.59532994 -0.11605817 -0.4438996
 -0.349885   -0.2642351  -0.3134886  -0.9398317   1.0188463   0.3356084
 -0.12860098  0.37548965  0.28909954 -0.749697   -