### import packages

In [1]:
import tkinter as tk
import pandas as pd
from wordcloud import WordCloud
import jieba
import jieba.analyse
from collections import Counter
from PIL import ImageTk as itk

### global variables

In [2]:
# 文字雲分辨字元
dictfile = "dict.txt"
# 文字雲去除無用字元
stopfile = "stopwords.txt"
# 文字雲中文視覺化繪圖要使用到的font family
fontpath = "NotoSerifTC-Bold.otf"


# 各家媒體做文字雲的data(取新聞標題)
thenewslens_df = pd.read_csv('thenewslens.csv', index_col=0)
thenewslens = ' '.join(thenewslens_df['title'])
commonwealth_df = pd.read_csv('cw.csv', index_col=0)
commonwealth = ' '.join(commonwealth_df['title'])
businesstoday_df = pd.read_csv('businesstoday.csv', index_col=0)
businesstoday = ' '.join(businesstoday_df['title'])

### tkinter setting and application

In [3]:
# 清除畫布，重讀新的文字雲視覺化產出
def change_img():
    canvas.delete("all")
    canvas.img = itk.PhotoImage(file= 'wordcloud.jpg')
    canvas.create_image(0,0, anchor=tk.NW, image=canvas.img)

# 確認被點擊的按鈕是哪一家媒體，並且執行該家媒體的文字雲視覺化圖表產出
def result():
    choice = radiostr.get()
    if choice == '1':
        target = businesstoday
    elif choice == '2':
        target = commonwealth
    elif choice == '3':
        target = thenewslens
    jieba.set_dictionary(dictfile)
    jieba.analyse.set_stop_words(stopfile)

    tags = jieba.analyse.extract_tags(target, topK=30)
    seg_list = jieba.lcut(target, cut_all=False)
    dictionary = Counter(seg_list)

    freq = {}
    for ele in dictionary:
        if ele in tags:
            freq[ele] = dictionary[ele]

    wordcloud = WordCloud(background_color="white",
                          contour_width=3, 
                          contour_color='steelblue', 
                          font_path= fontpath).generate_from_frequencies(freq)
    # 將文字雲圖表存檔
    wordcloud.to_file('wordcloud.jpg')
    # 將應用擱置3000 milliseconds，並清除畫布
    root.after(3000, change_img)

# 開啟應用
root = tk.Tk()
# 設定頁面大小
root.geometry('380x230')  
# 無法讓使用者自由調整視窗長根寬
root.resizable(False, False)

fm_rad = tk.Frame()
fm_rad.pack(anchor=tk.N)

# 設置按鈕
radiostr = tk.StringVar(None, 1)
B1 = tk.Radiobutton(fm_rad, variable=radiostr, value='1', text='business today', bg='black',fg='white', font=('Arial', 15), command=result)
B1.pack(side=tk.LEFT, anchor=tk.N)
B2 = tk.Radiobutton(fm_rad, variable=radiostr, value='2', text='common wealth', bg='black',fg='white', font=('Arial', 15), command=result)
B2.pack(side=tk.LEFT, anchor=tk.N)
B3 = tk.Radiobutton(fm_rad, variable=radiostr, value='3', text='the news lens', bg='black',fg='white', font=('Arial', 15), command=result)
B3.pack(side=tk.LEFT, anchor=tk.N)

# 設置預設文字雲圖片及位置
photo = itk.PhotoImage(file= 'businesstoday_wordcloud.jpg')
canvas = tk.Canvas(root, width=500, height=500)
canvas.imageList = []
canvas.pack()
canvas.create_image(0, 0, anchor="nw", image=photo)
canvas.imageList.append(photo)

# 執行應用
root.mainloop()

Building prefix dict from /Users/yasmine/Desktop/work/接案/爬蟲/tech_news_crawler/dict.txt ...
Loading model from cache /var/folders/6f/v4g3dkkj0b54w0sc2l43bkqm0000gn/T/jieba.u9202be05f3725e4667d60e7c54b13fdb.cache
Loading model cost 0.544 seconds.
Prefix dict has been built successfully.
Building prefix dict from /Users/yasmine/Desktop/work/接案/爬蟲/tech_news_crawler/dict.txt ...
Loading model from cache /var/folders/6f/v4g3dkkj0b54w0sc2l43bkqm0000gn/T/jieba.u9202be05f3725e4667d60e7c54b13fdb.cache
Loading model cost 0.558 seconds.
Prefix dict has been built successfully.
Building prefix dict from /Users/yasmine/Desktop/work/接案/爬蟲/tech_news_crawler/dict.txt ...
Loading model from cache /var/folders/6f/v4g3dkkj0b54w0sc2l43bkqm0000gn/T/jieba.u9202be05f3725e4667d60e7c54b13fdb.cache
Loading model cost 0.545 seconds.
Prefix dict has been built successfully.
