In [2]:
pip install pandas jieba pyecharts

Note: you may need to restart the kernel to use updated packages.
Collecting pyecharts
  Downloading pyecharts-2.0.8-py3-none-any.whl.metadata (1.6 kB)
Collecting prettytable (from pyecharts)
  Downloading prettytable-3.14.0-py3-none-any.whl.metadata (30 kB)
Collecting simplejson (from pyecharts)
  Downloading simplejson-3.20.1-cp39-cp39-win_amd64.whl.metadata (3.4 kB)
Downloading pyecharts-2.0.8-py3-none-any.whl (153 kB)
   ---------------------------------------- 0.0/153.7 kB ? eta -:--:--
   ------- -------------------------------- 30.7/153.7 kB ? eta -:--:--
   ------------------------------- -------- 122.9/153.7 kB 1.8 MB/s eta 0:00:01
   ---------------------------------------- 153.7/153.7 kB 1.5 MB/s eta 0:00:00
Downloading prettytable-3.14.0-py3-none-any.whl (31 kB)
Downloading simplejson-3.20.1-cp39-cp39-win_amd64.whl (75 kB)
   ---------------------------------------- 0.0/75.7 kB ? eta -:--:--
   ------------------------------------- -- 71.7/75.7 kB 2.0 MB/s eta 0:00:01
   --


[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import os
import pandas as pd
import jieba
from collections import Counter
from pyecharts.charts import WordCloud
from pyecharts import options as opts

# **1. 确保 CSV 文件存在**

csv_path = os.path.join( "图书馆主页网站文章.csv")

if not os.path.exists(csv_path):
    raise FileNotFoundError(f"❌ 文件未找到: {csv_path}")

# **2. 读取 CSV 文件**
df = pd.read_csv(csv_path)

# **3. 确保 "标题" 列存在**
df.columns = [col.strip() for col in df.columns]  # 去除列名空格
if "标题" not in df.columns:
    raise ValueError(f"❌ '标题' 列未找到，请检查 CSV 文件的列名: {df.columns}")

# **4. 获取所有标题文本**
titles = df["标题"].dropna().astype(str).tolist()

# **5. 使用 `jieba` 进行分词**
text = " ".join(titles)
words = jieba.cut(text)

# **6. 统计词频**
stopwords = set(["的", "了", "和", "是", "在", "也", "就", "都", "与", "及"])  # 你可以添加更多停用词
word_freq = Counter(word for word in words if word not in stopwords)

# **7. 生成词云数据**
word_data = word_freq.most_common(200)  # 取前 200 词

# **8. 使用 `pyecharts` 生成词云**
wordcloud = (
    WordCloud()
    .add("", word_data, word_size_range=[20, 100], shape="circle")  # 词大小范围
    .set_global_opts(title_opts=opts.TitleOpts(title="📚 图书馆文章标题词云"))
)

# **9. 直接在 Jupyter Notebook 里显示**
wordcloud.render_notebook()

Building prefix dict from the default dictionary ...
Dumping model to file cache C:\Users\Yvett\AppData\Local\Temp\jieba.cache
Loading model cost 1.649 seconds.
Prefix dict has been built successfully.
