In [3]:
import tkinter as tk
from tkinter import *
import pickle
from collections import Counter

# Load the model from the pickle file
with open('model_Baum_Welch.pkl', 'rb') as file:
    model = pickle.load(file)

# Extract model parameters
probabilitas_awal = model['probabilitas_awal']
probabilitas_transisi = model['probabilitas_transisi']
probabilitas_emisi = model['probabilitas_emisi']
probabilitas_emisi_kata_kosong = model['probabilitas_emisi_kata_kosong']
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']
pos_freq = model['pos_freq']
vocab_size = model['vocab_size']

# Define the POS tags
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']

In [4]:
# Algoritma Forward
def algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags, probabilitas_emisi_kata_kosong):
    alpha = [{}]
    for pos in pos_tags:
        alpha[0][pos] = probabilitas_awal.get(pos, 0) * probabilitas_emisi.get((sentence[0], pos), probabilitas_emisi_kata_kosong[pos])
        
    for t in range(1, len(sentence)):
        alpha.append({})
        for pos in pos_tags:
            alpha[t][pos] = sum(alpha[t-1][prev_pos] * probabilitas_transisi.get((prev_pos, pos), 0) * probabilitas_emisi.get((sentence[t], pos), probabilitas_emisi_kata_kosong[pos]) for prev_pos in pos_tags)
    
    return alpha

# Algoritma Backward
def algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags, probabilitas_emisi_kata_kosong):
    beta = [{} for _ in range(len(sentence))]
    
    # Inisialisasi beta pada waktu t = T
    for pos in pos_tags:
        beta[-1][pos] = 1
    
    # Iterasi mundur dari t = T-1 ke t = 0
    for t in range(len(sentence) - 2, -1, -1):
        for pos in pos_tags:
            beta[t][pos] = sum(beta[t + 1][next_pos] * probabilitas_transisi.get((pos, next_pos), 0) * probabilitas_emisi.get((sentence[t + 1], next_pos), probabilitas_emisi_kata_kosong[next_pos]) for next_pos in pos_tags)
    
    return beta

def expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, probabilitas_emisi_kata_kosong):
    T = len(sentence)
    gamma = [{} for _ in range(T)]
    ksi = [{} for _ in range(T - 1)]
    
    # menghitung gamma
    for t in range(T):
        normalization_factor = sum(alpha[t][pos] * beta[t][pos] for pos in pos_tags)
        for pos in pos_tags:
            gamma[t][pos] = (alpha[t][pos] * beta[t][pos]) / normalization_factor
    
    # menghitung ksi
    for t in range(T - 1):
        normalization_factor = sum(
            alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), 0) *
            probabilitas_emisi.get((sentence[t + 1], pos2), probabilitas_emisi_kata_kosong.get(pos2, 0)) * beta[t + 1][pos2]
            for pos1 in pos_tags for pos2 in pos_tags
        )
        for pos1 in pos_tags:
            ksi[t][pos1] = {}
            for pos2 in pos_tags:
                ksi[t][pos1][pos2] = (
                    alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), 0) *  # Ensure it returns 0 if missing
                    probabilitas_emisi.get((sentence[t + 1], pos2), probabilitas_emisi_kata_kosong.get(pos2, 0)) * beta[t + 1][pos2]
                ) / normalization_factor if normalization_factor != 0 else 0
    
    return gamma, ksi

def maximization_step(gamma, ksi, sentence, pos_tags):
    probabilitas_awal_baru = {pos: gamma[0][pos] for pos in pos_tags}
    
    probabilitas_transisi_baru = {}
    for pos1 in pos_tags:
        for pos2 in pos_tags:
            a = sum(ksi[t][pos1][pos2] for t in range(len(ksi)))
            b = sum(gamma[t][pos1] for t in range(len(gamma)))
            if b != 0:
                probabilitas_transisi_baru[(pos1, pos2)] = a / b
            else:
                probabilitas_transisi_baru[(pos1, pos2)] = 0
    
    # Menghitung probabilitas emisi baru
    probabilitas_emisi_baru = {}
    for pos in pos_tags:
        for word in set(sentence):
            a = sum(gamma[t][pos] for t in range(len(sentence)) if sentence[t] == word)
            b = sum(gamma[t][pos] for t in range(len(sentence)))
            probabilitas_emisi_baru[(word, pos)] = a / b if b != 0 else 0
    
    return probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru

In [5]:
# # Forward Algorithm dengan Maximization Step
# def algoritma_forward_max(sentence, probabilitas_awal, probabilitas_transisi, probabilitas_emisi):
#     # Inisialisasi alpha
#     alpha = [{}]
#     for pos in pos_tags:
#         alpha[0][pos] = probabilitas_awal[pos] * probabilitas_emisi[pos].get(sentence[0], 0)
        
#     # Iterasi untuk menghitung alpha
#     for t in range(1, len(sentence)):
#         alpha.append({})
#         for pos2 in pos_tags:
#             alpha[t][pos2] = sum(alpha[t-1][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t], 0) for pos1 in pos_tags)
            
#     return alpha

# # Backward Algorithm dengan Maximization Step
# def algoritma_backward_max(sentence, probabilitas_transisi, probabilitas_emisi):
#     # Inisialisasi beta
#     beta = [{} for _ in range(len(sentence))]
#     for pos in pos_tags:
#         beta[len(sentence)-1][pos] = 1
        
#     # Iterasi untuk menghitung beta
#     for t in range(len(sentence)-2, -1, -1):
#         for pos1 in pos_tags:
#             beta[t][pos1] = sum(probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * beta[t+1][pos2] for pos2 in pos_tags)
            
#     return beta

# # Expectation Step setelah Maximization: Menghitung gamma dan ksi
# def expectation_step_max(sentence, alpha, beta, probabilitas_transisi, probabilitas_emisi):
#     gamma = [{} for _ in range(len(alpha))]
#     ksi = [{} for _ in range(len(sentence) - 1)]
    
#     for t in range(len(alpha)):
#         normalization_factor = sum(alpha[t][pos] * beta[t][pos] for pos in pos_tags)
#         for pos in pos_tags:
#             gamma[t][pos] = (alpha[t][pos] * beta[t][pos]) / normalization_factor
    
#     for t in range(len(sentence) - 1):
#         normalization_factor = sum(alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * 
#                         beta[t+1][pos2] for pos1 in pos_tags for pos2 in pos_tags)
#         for pos1 in pos_tags:
#             ksi[t][pos1] = {}
#             for pos2 in pos_tags:
#                 ksi[t][pos1][pos2] = (alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * 
#                                         beta[t+1][pos2]) / normalization_factor
    
#     return gamma, ksi

# GUI

In [None]:
# Function to clear the frame
def clear_frame():
    for widget in root.winfo_children():
        widget.destroy()

# Function to predict POS tags for a new sentence
def predict_pos_tagging():
    clear_frame()

    # Add title
    title_label = tk.Label(root, text="Aplikasi Prediksi Pos Tagging Bahasa Jawa", font=("Helvetica", 16, "bold"), bg="dodger blue", fg="white",padx=10, pady=5)
    title_label.pack(pady=(10, 5), padx=10)

    # Create input prompt
    prompt_label = tk.Label(root, text="Masukkan kalimat untuk prediksi POS tagging:")
    prompt_label.pack(pady=(10, 0))

    # Input field
    input_field = tk.Entry(root, width=50)
    input_field.pack(pady=(5, 5), padx=10)

    # Add note about the maximum number of words
    note_label = tk.Label(root, text="Catatan : Tidak memberikan kalimat yang panjang (Max Kata = 122)", fg="red")
    note_label.pack(pady=(0, 5))

    # Add result label
    result_label = tk.Label(root, text="Hasil prediksi:")
    result_label.pack(pady=(10, 0))
    
    # Frame to hold result box and scrollbar
    result_frame = tk.Frame(root)
    result_frame.pack(pady=(5, 10), padx=10, fill="both", expand=True)

    # Scrollbar for the result box
    scrollbar = tk.Scrollbar(result_frame)
    scrollbar.pack(side="right", fill="y")

    # Result box (Text widget)
    result_box = tk.Text(result_frame, wrap="word", height=10, width=50, yscrollcommand=scrollbar.set)
    result_box.pack(side="left", fill="both", expand=True)
    scrollbar.config(command=result_box.yview)

    # Function to process the input
    def process_input():
        kalimat_testing = input_field.get()
        if kalimat_testing:
            kalimat_testing = kalimat_testing.lower()
            test_sentence = kalimat_testing.split()

            # Baum-Welch process
            test_sentences = [test_sentence]
            result_text = ""
            for kalimat in test_sentences:
                sentence = [kata for kata in kalimat]

                alpha = algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags, probabilitas_emisi_kata_kosong)

                beta = algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags, probabilitas_emisi_kata_kosong)

                gamma, ksi = expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, probabilitas_emisi_kata_kosong)

                probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma, ksi, sentence, pos_tags)

                alpha_max = algoritma_forward(sentence, probabilitas_emisi_baru, probabilitas_transisi_baru, probabilitas_awal_baru, pos_tags, probabilitas_emisi_kata_kosong)

                beta_max = algoritma_backward(sentence, probabilitas_emisi_baru, probabilitas_transisi_baru, pos_tags, probabilitas_emisi_kata_kosong)

                likelihood = sum(alpha_max[-1].get(pos, 0) * beta_max[-1].get(pos, 0) for pos in pos_tags)
                    

                print("Nilai Likelihood (L):", likelihood)

                gamma_max, ksi_max = expectation_step(sentence, alpha_max, beta_max, probabilitas_emisi_baru, probabilitas_transisi_baru, pos_tags, probabilitas_emisi_kata_kosong)

                threshold = 1e-3
                likelihood_diff = float('inf')
                prev_likelihood = 0

                iteration = 0

                while likelihood_diff > threshold:
                    iteration += 1

                    alpha_max = algoritma_forward(sentence, probabilitas_emisi_baru, probabilitas_transisi_baru, probabilitas_awal_baru, pos_tags, probabilitas_emisi_kata_kosong)
                    beta_max = algoritma_backward(sentence, probabilitas_emisi_baru, probabilitas_transisi_baru, pos_tags, probabilitas_emisi_kata_kosong)

                    gamma_max, ksi_max = expectation_step(sentence, alpha_max, beta_max, probabilitas_emisi_baru, probabilitas_transisi_baru, pos_tags, probabilitas_emisi_kata_kosong)

                    probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma_max, ksi_max, sentence, pos_tags)
                    likelihood = sum(alpha_max[-1][pos] * beta_max[-1][pos] for pos in pos_tags)
                    current_likelihood = likelihood
                    if iteration >= 2:
                        likelihood_diff = current_likelihood - prev_likelihood
                        prev_likelihood = current_likelihood
                    else:
                        prev_likelihood = current_likelihood
                    print(f"Iteration {iteration}: Likelihood = {current_likelihood:.9f}, Change in Likelihood = {likelihood_diff:.9f}")

                print("\nFinal Likelihood:", likelihood)

                predicted_tags = []
                for t in range(len(sentence)):
                    kata = sentence[t]
                    if kata in ['.', ',', '!', '?', ':', ';', '(', ')', '[', ']', '{', '}', '"', "'", '-']:
                        predicted_tags.append('PUNCT')
                    elif kata in ['$', '%', '@', '&', '#', '*']:
                        predicted_tags.append('SYM')
                    else:
                        predicted_tags.append(max(gamma_max[t], key=gamma_max[t].get))

                # Add to the result text
                result_text += "\n".join(f"{word}: {tag}" for word, tag in zip(sentence, predicted_tags)) + "\n"

            result_box.delete("1.0", tk.END)  # Clear the box before displaying new results
            result_box.insert(tk.END, f"Hasil Prediksi:\n{result_text}")

    # Frame to hold the buttons
    button_frame = tk.Frame(root)
    button_frame.pack(pady=5)

    # Predict button
    predict_button = tk.Button(button_frame, text="Prediksi", command=process_input, width=15, height=2, bg="dodger blue", fg="white", font=("Helvetica", 12, "bold"), bd=2, relief="solid", highlightbackground="white")
    predict_button.pack(side="left", padx=10)

    # New button to restart the prediction
    def new_prediction():
        predict_pos_tagging()

    new_button = tk.Button(button_frame, text="Hapus", command=new_prediction, width=15, height=2, bg="dodger blue", fg="white", font=("Helvetica", 12, "bold"), bd=2, relief="solid", highlightbackground="white")
    new_button.pack(side="left", padx=10)

# Create the GUI
root = tk.Tk()
root.title("Aplikasi POS Tagging")

predict_pos_tagging()

root.mainloop()

Nilai Likelihood (L): 9.845216324889176e-12
Iteration 1: Likelihood = 0.000000000, Change in Likelihood = inf
Iteration 2: Likelihood = 0.000000007, Change in Likelihood = 0.000000007

Final Likelihood: 7.426076694279858e-09


# Buat Cari Warna

In [8]:
from tkinter import *


class ScrolledFrame(Frame):
    def __init__(self, master=None, *args, **kwargs):
        top = Frame(master)  # create top frame, containing all things
        # attach scrollbars
        vscroll = Scrollbar(top)
        vscroll.pack(side='right', fill='y')
        hscroll = Scrollbar(top, orient='horizontal')
        hscroll.pack(side='bottom', fill='x')
        # hack: insert self into scrollable canvas
        canvas = Canvas(top, highlightthickness=0)
        canvas.pack(expand=True, fill='both')
        super().__init__(master=canvas, *args, **kwargs)  # create
        canvas.create_window(0,0, window=self,
                             anchor='nw', tags='frame')  # insert
        # add a hack to rebuild scrollable area size
        canvas.bind('<Configure>', self.__set_scroll)
        # cross-bind scrolling
        vscroll['command'] = canvas.yview
        canvas['yscrollcommand'] = vscroll.set
        hscroll['command'] = canvas.xview
        canvas['xscrollcommand'] = hscroll.set
        # attach hierarchically
        self._top = top
        self._top._vscroll = vscroll
        self._top._hscroll = hscroll
        self._top._canvas = canvas

    def pack(self, *args, **kwargs):
        '''
        A wrapper over tkinter's pack
        '''
        return self._top.pack(*args, **kwargs) # pack topmost

    def __set_scroll(self, event=None):
        canvas = self._top._canvas
        canvas.config(scrollregion=canvas.bbox('frame'))

def showcolors(colors):
    colors = colors[:]
    root = Tk()
    root.title('Tkinter colors showcase')
    Label(root, text='Double click to copy to clipboard').pack(fill='x')
    top = ScrolledFrame(root)
    colnum = int((len(colors)/3) ** 0.5)
    row = 0
    while colors:
        chunk, colors = colors[:colnum], colors[colnum:]
        for col, color in enumerate(chunk):
            lab = Label(top, text=color, bg=color)
            lab.grid(row=row, column=col, sticky='wens')
            lab.bind('<Double-1>', _clipboard_copy(lab))
            lab.bind('<Enter>', lambda ev, lab=lab: lab.config(fg='white'))
            lab.bind('<Leave>', lambda ev, lab=lab: lab.config(fg='black'))
        row += 1
    top.pack(expand=True, fill='both')
    root.mainloop()

def _clipboard_copy(inst):
    def wrapper(event):
        inst.clipboard_clear()
        inst.clipboard_append(inst['text'])
    return wrapper



COLORS = ['snow', 'ghost white', 'white smoke', 'gainsboro', 'floral white', 'old lace',
          'linen', 'antique white', 'papaya whip', 'blanched almond', 'bisque', 'peach puff',
          'navajo white', 'lemon chiffon', 'mint cream', 'azure', 'alice blue', 'lavender',
          'lavender blush', 'misty rose', 'dark slate gray', 'dim gray', 'slate gray',
          'light slate gray', 'gray', 'light grey', 'midnight blue', 'navy', 'cornflower blue', 'dark slate blue',
          'slate blue', 'medium slate blue', 'light slate blue', 'medium blue', 'royal blue',  'blue',
          'dodger blue', 'deep sky blue', 'sky blue', 'light sky blue', 'steel blue', 'light steel blue',
          'light blue', 'powder blue', 'pale turquoise', 'dark turquoise', 'medium turquoise', 'turquoise',
          'cyan', 'light cyan', 'cadet blue', 'medium aquamarine', 'aquamarine', 'dark green', 'dark olive green',
          'dark sea green', 'sea green', 'medium sea green', 'light sea green', 'pale green', 'spring green',
          'lawn green', 'medium spring green', 'green yellow', 'lime green', 'yellow green',
          'forest green', 'olive drab', 'dark khaki', 'khaki', 'pale goldenrod', 'light goldenrod yellow',
          'light yellow', 'yellow', 'gold', 'light goldenrod', 'goldenrod', 'dark goldenrod', 'rosy brown',
          'indian red', 'saddle brown', 'sandy brown',
          'dark salmon', 'salmon', 'light salmon', 'orange', 'dark orange',
          'coral', 'light coral', 'tomato', 'orange red', 'red', 'hot pink', 'deep pink', 'pink', 'light pink',
          'pale violet red', 'maroon', 'medium violet red', 'violet red',
          'medium orchid', 'dark orchid', 'dark violet', 'blue violet', 'purple', 'medium purple',
          'thistle', 'snow2', 'snow3',
          'snow4', 'seashell2', 'seashell3', 'seashell4', 'AntiqueWhite1', 'AntiqueWhite2',
          'AntiqueWhite3', 'AntiqueWhite4', 'bisque2', 'bisque3', 'bisque4', 'PeachPuff2',
          'PeachPuff3', 'PeachPuff4', 'NavajoWhite2', 'NavajoWhite3', 'NavajoWhite4',
          'LemonChiffon2', 'LemonChiffon3', 'LemonChiffon4', 'cornsilk2', 'cornsilk3',
          'cornsilk4', 'ivory2', 'ivory3', 'ivory4', 'honeydew2', 'honeydew3', 'honeydew4',
          'LavenderBlush2', 'LavenderBlush3', 'LavenderBlush4', 'MistyRose2', 'MistyRose3',
          'MistyRose4', 'azure2', 'azure3', 'azure4', 'SlateBlue1', 'SlateBlue2', 'SlateBlue3',
          'SlateBlue4', 'RoyalBlue1', 'RoyalBlue2', 'RoyalBlue3', 'RoyalBlue4', 'blue2', 'blue4',
          'DodgerBlue2', 'DodgerBlue3', 'DodgerBlue4', 'SteelBlue1', 'SteelBlue2',
          'SteelBlue3', 'SteelBlue4', 'DeepSkyBlue2', 'DeepSkyBlue3', 'DeepSkyBlue4',
          'SkyBlue1', 'SkyBlue2', 'SkyBlue3', 'SkyBlue4', 'LightSkyBlue1', 'LightSkyBlue2',
          'LightSkyBlue3', 'LightSkyBlue4', 'SlateGray1', 'SlateGray2', 'SlateGray3',
          'SlateGray4', 'LightSteelBlue1', 'LightSteelBlue2', 'LightSteelBlue3',
          'LightSteelBlue4', 'LightBlue1', 'LightBlue2', 'LightBlue3', 'LightBlue4',
          'LightCyan2', 'LightCyan3', 'LightCyan4', 'PaleTurquoise1', 'PaleTurquoise2',
          'PaleTurquoise3', 'PaleTurquoise4', 'CadetBlue1', 'CadetBlue2', 'CadetBlue3',
          'CadetBlue4', 'turquoise1', 'turquoise2', 'turquoise3', 'turquoise4', 'cyan2', 'cyan3',
          'cyan4', 'DarkSlateGray1', 'DarkSlateGray2', 'DarkSlateGray3', 'DarkSlateGray4',
          'aquamarine2', 'aquamarine4', 'DarkSeaGreen1', 'DarkSeaGreen2', 'DarkSeaGreen3',
          'DarkSeaGreen4', 'SeaGreen1', 'SeaGreen2', 'SeaGreen3', 'PaleGreen1', 'PaleGreen2',
          'PaleGreen3', 'PaleGreen4', 'SpringGreen2', 'SpringGreen3', 'SpringGreen4',
          'green2', 'green3', 'green4', 'chartreuse2', 'chartreuse3', 'chartreuse4',
          'OliveDrab1', 'OliveDrab2', 'OliveDrab4', 'DarkOliveGreen1', 'DarkOliveGreen2',
          'DarkOliveGreen3', 'DarkOliveGreen4', 'khaki1', 'khaki2', 'khaki3', 'khaki4',
          'LightGoldenrod1', 'LightGoldenrod2', 'LightGoldenrod3', 'LightGoldenrod4',
          'LightYellow2', 'LightYellow3', 'LightYellow4', 'yellow2', 'yellow3', 'yellow4',
          'gold2', 'gold3', 'gold4', 'goldenrod1', 'goldenrod2', 'goldenrod3', 'goldenrod4',
          'DarkGoldenrod1', 'DarkGoldenrod2', 'DarkGoldenrod3', 'DarkGoldenrod4',
          'RosyBrown1', 'RosyBrown2', 'RosyBrown3', 'RosyBrown4', 'IndianRed1', 'IndianRed2',
          'IndianRed3', 'IndianRed4', 'sienna1', 'sienna2', 'sienna3', 'sienna4', 'burlywood1',
          'burlywood2', 'burlywood3', 'burlywood4', 'wheat1', 'wheat2', 'wheat3', 'wheat4', 'tan1',
          'tan2', 'tan4', 'chocolate1', 'chocolate2', 'chocolate3', 'firebrick1', 'firebrick2',
          'firebrick3', 'firebrick4', 'brown1', 'brown2', 'brown3', 'brown4', 'salmon1', 'salmon2',
          'salmon3', 'salmon4', 'LightSalmon2', 'LightSalmon3', 'LightSalmon4', 'orange2',
          'orange3', 'orange4', 'DarkOrange1', 'DarkOrange2', 'DarkOrange3', 'DarkOrange4',
          'coral1', 'coral2', 'coral3', 'coral4', 'tomato2', 'tomato3', 'tomato4', 'OrangeRed2',
          'OrangeRed3', 'OrangeRed4', 'red2', 'red3', 'red4', 'DeepPink2', 'DeepPink3', 'DeepPink4',
          'HotPink1', 'HotPink2', 'HotPink3', 'HotPink4', 'pink1', 'pink2', 'pink3', 'pink4',
          'LightPink1', 'LightPink2', 'LightPink3', 'LightPink4', 'PaleVioletRed1',
          'PaleVioletRed2', 'PaleVioletRed3', 'PaleVioletRed4', 'maroon1', 'maroon2',
          'maroon3', 'maroon4', 'VioletRed1', 'VioletRed2', 'VioletRed3', 'VioletRed4',
          'magenta2', 'magenta3', 'magenta4', 'orchid1', 'orchid2', 'orchid3', 'orchid4', 'plum1',
          'plum2', 'plum3', 'plum4', 'MediumOrchid1', 'MediumOrchid2', 'MediumOrchid3',
          'MediumOrchid4', 'DarkOrchid1', 'DarkOrchid2', 'DarkOrchid3', 'DarkOrchid4',
          'purple1', 'purple2', 'purple3', 'purple4', 'MediumPurple1', 'MediumPurple2',
          'MediumPurple3', 'MediumPurple4', 'thistle1', 'thistle2', 'thistle3', 'thistle4',
          'gray1', 'gray2', 'gray3', 'gray4', 'gray5', 'gray6', 'gray7', 'gray8', 'gray9', 'gray10',
          'gray11', 'gray12', 'gray13', 'gray14', 'gray15', 'gray16', 'gray17', 'gray18', 'gray19',
          'gray20', 'gray21', 'gray22', 'gray23', 'gray24', 'gray25', 'gray26', 'gray27', 'gray28',
          'gray29', 'gray30', 'gray31', 'gray32', 'gray33', 'gray34', 'gray35', 'gray36', 'gray37',
          'gray38', 'gray39', 'gray40', 'gray42', 'gray43', 'gray44', 'gray45', 'gray46', 'gray47',
          'gray48', 'gray49', 'gray50', 'gray51', 'gray52', 'gray53', 'gray54', 'gray55', 'gray56',
          'gray57', 'gray58', 'gray59', 'gray60', 'gray61', 'gray62', 'gray63', 'gray64', 'gray65',
          'gray66', 'gray67', 'gray68', 'gray69', 'gray70', 'gray71', 'gray72', 'gray73', 'gray74',
          'gray75', 'gray76', 'gray77', 'gray78', 'gray79', 'gray80', 'gray81', 'gray82', 'gray83',
          'gray84', 'gray85', 'gray86', 'gray87', 'gray88', 'gray89', 'gray90', 'gray91', 'gray92',
          'gray93', 'gray94', 'gray95', 'gray97', 'gray98', 'gray99']

if __name__ == '__main__':
    showcolors(COLORS)

# Buat Cari Font

In [7]:
from tkinter import *
from tkinter import font

root = Tk()
root.title('Font Families')
fonts=list(font.families())
fonts.sort()

def populate(frame):
    '''Put in the fonts'''
    listnumber = 1
    for i, item in enumerate(fonts):
        label = "listlabel" + str(listnumber)
        label = Label(frame,text=item,font=(item, 16))
        label.grid(row=i)
        label.bind("<Button-1>",lambda e,item=item:copy_to_clipboard(item))
        listnumber += 1

def copy_to_clipboard(item):
    root.clipboard_clear()
    root.clipboard_append("font=('" + item.lstrip('@') + "', 12)")

def onFrameConfigure(canvas):
    '''Reset the scroll region to encompass the inner frame'''
    canvas.configure(scrollregion=canvas.bbox("all"))

canvas = Canvas(root, borderwidth=0, background="#ffffff")
frame = Frame(canvas, background="#ffffff")
vsb = Scrollbar(root, orient="vertical", command=canvas.yview)
canvas.configure(yscrollcommand=vsb.set)

vsb.pack(side="right", fill="y")
canvas.pack(side="left", fill="both", expand=True)
canvas.create_window((4,4), window=frame, anchor="nw")

frame.bind("<Configure>", lambda event, canvas=canvas: onFrameConfigure(canvas))

populate(frame)

root.mainloop()

### Kodingan di bawah enggak fix

In [None]:
import tkinter as tk
from tkinter import simpledialog, messagebox
import pickle

# Load the model results from the pickle file
with open('model_results.pkl', 'rb') as file:
    model_results = pickle.load(file)

# Define the POS tags
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']

# Function to clear the frame
def clear_frame():
    for widget in root.winfo_children():
        widget.destroy()

# Function to predict POS tags for a new sentence
def predict_pos_tagging():
    clear_frame()

    # Create input prompt
    prompt_label = tk.Label(root, text="Masukkan kalimat untuk prediksi POS tagging:")
    prompt_label.pack(pady=10)

    # Input field
    input_field = tk.Entry(root, width=50)
    input_field.pack(pady=10)

    # Result label
    result_label = tk.Label(root, text="", justify="left")
    result_label.pack(pady=10)

    # Function to process the input
    def process_input():
        kalimat_testing = input_field.get()
        if kalimat_testing:
            kalimat_testing = kalimat_testing.lower()
            test_sentence = kalimat_testing.split()

            # Aggregate results from all sentences in the model results
            hasil_Keseluruhan = []
            for result in model_results:
                for word, tag in result:
                    if word in test_sentence:
                        hasil_Keseluruhan.append((word, tag))

            # Remove duplicates while preserving order
            dilihat = set()
            hasil_prediksi = [(word, tag) for word, tag in hasil_Keseluruhan if not (word in dilihat or dilihat.add(word))]

            # Display the predicted POS tags
            if hasil_prediksi:
                result_text = "\n".join(f"{word}: {tag}" for word, tag in hasil_prediksi)
                result_label.config(text=f"Hasil Prediksi: {result_text}")
                
                # Check for words that could not be predicted
                kata_prediksi = {word for word, tag in hasil_prediksi}
                kata_baru = [word for word in test_sentence if word not in kata_prediksi]
                if kata_baru:
                    missing_text = ", ".join(word for word in kata_baru)
                    result_label.config(text=f"Hasil Prediksi: {result_text}\n\nKata yang tidak bisa diprediksi:\n{missing_text}")
            else:
                result_label.config(text="Tidak ada kata yang ditemukan di kalimat input.")

    # Predict button
    predict_button = tk.Button(root, text="Predict", command=process_input)
    predict_button.pack(pady=5)

    # New button to restart the prediction
    def new_prediction():
        predict_pos_tagging()

    new_button = tk.Button(root, text="New", command=new_prediction)
    new_button.pack(pady=5)

# Create the GUI
root = tk.Tk()
root.title("POS Tagging Predictor")

predict_pos_tagging()

root.mainloop()


In [None]:


# Algoritma Forward
def algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags, probabilitas_emisi_kata_kosong):
    alpha = [{} for _ in range(len(sentence))]
    for pos in pos_tags:
        alpha[0][pos] = probabilitas_awal.get(pos, 0) * probabilitas_emisi.get((sentence[0], pos), probabilitas_emisi_kata_kosong[pos])
        
    for t in range(1, len(sentence)):
        alpha.append({})
        for pos in pos_tags:
            alpha[t][pos] = sum(alpha[t-1][prev_pos] * probabilitas_transisi.get((prev_pos, pos), 0) * probabilitas_emisi.get((sentence[t], pos), probabilitas_emisi_kata_kosong[pos]) for prev_pos in pos_tags)
    
    return alpha

# Algoritma Backward
def algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags, probabilitas_emisi_kata_kosong):
    beta = [{} for _ in range(len(sentence))]
    
    # Inisialisasi beta pada waktu t = T
    for pos in pos_tags:
        beta[-1][pos] = 1
    
    # Iterasi mundur dari t = T-1 ke t = 0
    for t in range(len(sentence) - 2, -1, -1):
        for pos in pos_tags:
            beta[t][pos] = sum(beta[t + 1][next_pos] * probabilitas_transisi.get((pos, next_pos), 0) * probabilitas_emisi.get((sentence[t + 1], next_pos), probabilitas_emisi_kata_kosong[next_pos]) for next_pos in pos_tags)
    
    return beta

def expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, pos_freq, vocab_size):
    T = len(sentence)
    gamma = [{} for _ in range(T)]
    ksi = [{} for _ in range(T - 1)]
    
    # menghitung gamma
    for t in range(T):
        normalization_factor = sum(alpha[t][pos] * beta[t][pos] for pos in pos_tags)
        for pos in pos_tags:
            gamma[t][pos] = (alpha[t][pos] * beta[t][pos]) / normalization_factor
    
    # menghitung ksi
    for t in range(T - 1):
        normalization_factor = sum(
            alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), (1 / (pos_freq[pos1] + vocab_size))) *
            probabilitas_emisi.get((sentence[t + 1], pos2), (1 / (pos_freq[pos2] + vocab_size))) * beta[t + 1][pos2]
            for pos1 in pos_tags for pos2 in pos_tags
        )
        for pos1 in pos_tags:
            ksi[t][pos1] = {}
            for pos2 in pos_tags:
                ksi[t][pos1][pos2] = (
                    alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), (1 / (pos_freq[pos1] + vocab_size))) *
                    probabilitas_emisi.get((sentence[t + 1], pos2), (1 / (pos_freq[pos2] + vocab_size))) * beta[t + 1][pos2]
                ) / normalization_factor
    
    return gamma, ksi

def maximization_step(gamma, ksi, sentence, pos_tags):
    probabilitas_awal_baru = {pos: gamma[0][pos] for pos in pos_tags}
    
    probabilitas_transisi_baru = {}
    for pos1 in pos_tags:
        for pos2 in pos_tags:
            a = sum(ksi[t][pos1][pos2] for t in range(len(ksi)))
            b = sum(gamma[t][pos1] for t in range(len(gamma)))
            probabilitas_transisi_baru[(pos1, pos2)] = a / b
    
    probabilitas_emisi_baru = {}
    for pos in pos_tags:
        probabilitas_emisi_baru[pos] = {}
        for word in sentence:
            a = sum(gamma[t][pos] for t in range(len(gamma)) if sentence[t] == word)
            b = sum(gamma[t][pos] for t in range(len(gamma)))
            probabilitas_emisi_baru[pos][word] = a / b
    
    return probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru

# Forward Algorithm dengan Maximization Step
def algoritma_forward_max(sentence, probabilitas_awal, probabilitas_transisi, probabilitas_emisi):
    # Inisialisasi alpha
    alpha = [{}]
    for pos in pos_tags:
        alpha[0][pos] = probabilitas_awal[pos] * probabilitas_emisi[pos].get(sentence[0], 0)
        
    # Iterasi untuk menghitung alpha
    for t in range(1, len(sentence)):
        alpha.append({})
        for pos2 in pos_tags:
            alpha[t][pos2] = sum(alpha[t-1][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t], 0) for pos1 in pos_tags)
            
    return alpha

# Backward Algorithm dengan Maximization Step
def algoritma_backward_max(sentence, probabilitas_transisi, probabilitas_emisi):
    # Inisialisasi beta
    beta = [{} for _ in range(len(sentence))]
    for pos in pos_tags:
        beta[len(sentence)-1][pos] = 1
        
    # Iterasi untuk menghitung beta
    for t in range(len(sentence)-2, -1, -1):
        for pos1 in pos_tags:
            beta[t][pos1] = sum(probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * beta[t+1][pos2] for pos2 in pos_tags)
            
    return beta

# Expectation Step setelah Maximization: Menghitung gamma dan ksi
def expectation_step_max(sentence, alpha, beta, probabilitas_transisi, probabilitas_emisi):
    gamma = [{} for _ in range(len(alpha))]
    ksi = [{} for _ in range(len(sentence) - 1)]
    
    for t in range(len(alpha)):
        normalization_factor = sum(alpha[t][pos] * beta[t][pos] for pos in pos_tags)
        for pos in pos_tags:
            gamma[t][pos] = (alpha[t][pos] * beta[t][pos]) / normalization_factor
    
    for t in range(len(sentence) - 1):
        normalization_factor = sum(alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * 
                        beta[t+1][pos2] for pos1 in pos_tags for pos2 in pos_tags)
        for pos1 in pos_tags:
            ksi[t][pos1] = {}
            for pos2 in pos_tags:
                ksi[t][pos1][pos2] = (alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * 
                                        beta[t+1][pos2]) / normalization_factor
    
    return gamma, ksi

import tkinter as tk
from tkinter import *
import pickle
from collections import Counter

# Load the model from the pickle file
with open('model_Baum_Welch.pkl', 'rb') as file:
    model = pickle.load(file)

# Extract model parameters
probabilitas_awal = model['probabilitas_awal']
probabilitas_transisi = model['probabilitas_transisi']
probabilitas_emisi = model['probabilitas_emisi']
probabilitas_emisi_kata_kosong = model['probabilitas_emisi_kata_kosong']
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']
pos_freq = model['pos_freq']
vocab_size = model['vocab_size']

# Define the POS tags
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']

# Function to clear the frame
def clear_frame():
    for widget in root.winfo_children():
        widget.destroy()

# Function to predict POS tags for a new sentence
def predict_pos_tagging():
    clear_frame()

    # Add title
    title_label = tk.Label(root, text="Aplikasi Prediksi Pos Tagging Bahasa Jawa", font=("Helvetica", 16, "bold"), bg="dodger blue", fg="white",padx=10, pady=5)
    title_label.pack(pady=(10, 5), padx=10)

    # Create input prompt
    prompt_label = tk.Label(root, text="Masukkan kalimat untuk prediksi POS tagging:")
    prompt_label.pack(pady=(10, 0))

    # Input field
    input_field = tk.Entry(root, width=50)
    input_field.pack(pady=(5, 20), padx=10)

    # Add result label
    result_label = tk.Label(root, text="Hasil prediksi:")
    result_label.pack(pady=(10, 0))
    
    # Frame to hold result box and scrollbar
    result_frame = tk.Frame(root)
    result_frame.pack(pady=(5, 10), padx=10, fill="both", expand=True)

    # Scrollbar for the result box
    scrollbar = tk.Scrollbar(result_frame)
    scrollbar.pack(side="right", fill="y")

    # Result box (Text widget)
    result_box = tk.Text(result_frame, wrap="word", height=10, width=50, yscrollcommand=scrollbar.set)
    result_box.pack(side="left", fill="both", expand=True)
    scrollbar.config(command=result_box.yview)

    # Function to process the input
    def process_input():
        kalimat_testing = input_field.get()
        if kalimat_testing:
            kalimat_testing = kalimat_testing.lower()
            test_sentence = kalimat_testing.split()

            # Baum-Welch process
            test_sentences = [test_sentence]
            result_text = ""
            for kalimat in test_sentences:
                sentence = [kata for kata in kalimat]

                alpha = algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags, probabilitas_emisi_kata_kosong)
                # print("Forward Algorithm :")
                # for t in range(len(sentence)):
                #     print("Alpha (t) : ", alpha[t])
                beta = algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags, probabilitas_emisi_kata_kosong)
                # print("\nBackward Algorithm :")
                # for t in range(len(sentence)):
                #     print("Beta (t) : ", beta[t])
                gamma, ksi = expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, pos_freq, vocab_size)
                probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma, ksi, sentence, pos_tags)

                alpha_max = algoritma_forward_max(sentence, probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru)
                beta_max = algoritma_backward_max(sentence, probabilitas_transisi_baru, probabilitas_emisi_baru)
                likelihood = sum(alpha[-1].get(pos, 0) * beta_max[-1].get(pos, 0) for pos in pos_tags)
                gamma_max, ksi_max = expectation_step_max(sentence, alpha_max, beta_max, probabilitas_transisi_baru, probabilitas_emisi_baru)

                threshold = 1e-4
                likelihood_diff = float('inf')
                prev_likelihood = 0
                iteration = 0

                while likelihood_diff > threshold:
                    iteration += 1
                    alpha_max = algoritma_forward_max(sentence, probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru)
                    beta_max = algoritma_backward_max(sentence, probabilitas_transisi_baru, probabilitas_emisi_baru)
                    gamma_max, ksi_max = expectation_step_max(sentence, alpha_max, beta_max, probabilitas_transisi_baru, probabilitas_emisi_baru)
                    probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma_max, ksi_max, sentence, pos_tags)
                    likelihood = sum(alpha_max[-1][pos] * beta_max[-1][pos] for pos in pos_tags)
                    current_likelihood = likelihood
                    if iteration >= 2:
                        likelihood_diff = current_likelihood - prev_likelihood
                        prev_likelihood = current_likelihood
                    else:
                        prev_likelihood = current_likelihood
                    print(f"Iteration {iteration}: Likelihood = {current_likelihood:.9f}, Change in Likelihood = {likelihood_diff:.9f}")

                print("\nFinal Likelihood:", likelihood)

                predicted_tags = []
                for t in range(len(sentence)):
                    kata = sentence[t]
                    if kata in ['.', ',', '!', '?', ':', ';', '(', ')', '[', ']', '{', '}', '"', "'", '-']:
                        predicted_tags.append('PUNCT')
                    elif kata in ['$', '%', '@', '&', '#', '*']:
                        predicted_tags.append('SYM')
                    else:
                        predicted_tags.append(max(gamma_max[t], key=gamma_max[t].get))

                # Add to the result text
                result_text += "\n".join(f"{word}: {tag}" for word, tag in zip(sentence, predicted_tags)) + "\n"

            result_box.delete("1.0", tk.END)  # Clear the box before displaying new results
            result_box.insert(tk.END, f"Hasil Prediksi:\n{result_text}")

    # Frame to hold the buttons
    button_frame = tk.Frame(root)
    button_frame.pack(pady=5)

    # Predict button
    predict_button = tk.Button(button_frame, text="Prediksi", command=process_input, width=15, height=2, bg="dodger blue", fg="white", font=("Helvetica", 12, "bold"), bd=2, relief="solid", highlightbackground="white")
    predict_button.pack(side="left", padx=10)

    # New button to restart the prediction
    def new_prediction():
        predict_pos_tagging()

    new_button = tk.Button(button_frame, text="Hapus", command=new_prediction, width=15, height=2, bg="dodger blue", fg="white", font=("Helvetica", 12, "bold"), bd=2, relief="solid", highlightbackground="white")
    new_button.pack(side="left", padx=10)

# Create the GUI
root = tk.Tk()
root.title("POS Tagging Predictor")

predict_pos_tagging()

root.mainloop()



Iteration 1: Likelihood = 0.000912043, Change in Likelihood = inf
Iteration 2: Likelihood = 0.021741489, Change in Likelihood = 0.020829446
Iteration 3: Likelihood = 0.288917827, Change in Likelihood = 0.267176338
Iteration 4: Likelihood = 0.680785446, Change in Likelihood = 0.391867619
Iteration 5: Likelihood = 0.870912357, Change in Likelihood = 0.190126911
Iteration 6: Likelihood = 0.960590191, Change in Likelihood = 0.089677834
Iteration 7: Likelihood = 0.993148174, Change in Likelihood = 0.032557982
Iteration 8: Likelihood = 0.999492888, Change in Likelihood = 0.006344714
Iteration 9: Likelihood = 0.999974916, Change in Likelihood = 0.000482028
Iteration 10: Likelihood = 0.999999851, Change in Likelihood = 0.000024935

Final Likelihood: 0.9999998505927273
