In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from collections import Counter

In [2]:
data = pd.read_csv('output_with_pos.txt', delimiter='\t', header=None, names=['kalimat'])
data

Unnamed: 0,kalimat
0,Bocah/NOUN kuwi/DET seneng/VERB nggambar/VERB ...
1,Ing/ADP wayah/NOUN sore/NOUN biasane/ADV Siti/...
2,Mbakyune/_ Mbakyu/NOUN e/PRON seneng/VERB ngla...
3,Ibu/NOUN kuwi/DET ngendikakake/VERB yen/SCONJ ...
4,Nonton/VERB tivi/NOUN bisa/AUX njembarake/VERB...
...,...
834,/PUNCT Sesuk/ADV aku/PRON pengen/VERB ngerti/V...
835,"/PUNCT Dhik/PROPN ,/PUNCT kok/ADV sajak/SCONJ ..."
836,Bocah/NOUN enom/ADJ saiki/ADV racake/ADV akeh/...
837,Lha/INTJ ning/PART nek/SCONJ bapak/NOUN biyen/...


In [3]:
# Fungsi untuk memisahkan setiap kalimat menjadi kata dan tag
def split_words_tags(sentence):
    words_tags = sentence.split()
    return [tuple(word_tag.rsplit('/', 1)) for word_tag in words_tags]

data['kata_tag'] = data['kalimat'].apply(split_words_tags)
data

Unnamed: 0,kalimat,kata_tag
0,Bocah/NOUN kuwi/DET seneng/VERB nggambar/VERB ...,"[(Bocah, NOUN), (kuwi, DET), (seneng, VERB), (..."
1,Ing/ADP wayah/NOUN sore/NOUN biasane/ADV Siti/...,"[(Ing, ADP), (wayah, NOUN), (sore, NOUN), (bia..."
2,Mbakyune/_ Mbakyu/NOUN e/PRON seneng/VERB ngla...,"[(Mbakyune, _), (Mbakyu, NOUN), (e, PRON), (se..."
3,Ibu/NOUN kuwi/DET ngendikakake/VERB yen/SCONJ ...,"[(Ibu, NOUN), (kuwi, DET), (ngendikakake, VERB..."
4,Nonton/VERB tivi/NOUN bisa/AUX njembarake/VERB...,"[(Nonton, VERB), (tivi, NOUN), (bisa, AUX), (n..."
...,...,...
834,/PUNCT Sesuk/ADV aku/PRON pengen/VERB ngerti/V...,"[(, PUNCT), (Sesuk, ADV), (aku, PRON), (pengen..."
835,"/PUNCT Dhik/PROPN ,/PUNCT kok/ADV sajak/SCONJ ...","[(, PUNCT), (Dhik, PROPN), (,, PUNCT), (kok, A..."
836,Bocah/NOUN enom/ADJ saiki/ADV racake/ADV akeh/...,"[(Bocah, NOUN), (enom, ADJ), (saiki, ADV), (ra..."
837,Lha/INTJ ning/PART nek/SCONJ bapak/NOUN biyen/...,"[(Lha, INTJ), (ning, PART), (nek, SCONJ), (bap..."


In [4]:
# Case Folding (Lowercase)
def case_folding(word_tag_list):
    return [(word.lower(), tag) for word, tag in word_tag_list]

data['kata_tag'] = data['kata_tag'].apply(case_folding)
data

Unnamed: 0,kalimat,kata_tag
0,Bocah/NOUN kuwi/DET seneng/VERB nggambar/VERB ...,"[(bocah, NOUN), (kuwi, DET), (seneng, VERB), (..."
1,Ing/ADP wayah/NOUN sore/NOUN biasane/ADV Siti/...,"[(ing, ADP), (wayah, NOUN), (sore, NOUN), (bia..."
2,Mbakyune/_ Mbakyu/NOUN e/PRON seneng/VERB ngla...,"[(mbakyune, _), (mbakyu, NOUN), (e, PRON), (se..."
3,Ibu/NOUN kuwi/DET ngendikakake/VERB yen/SCONJ ...,"[(ibu, NOUN), (kuwi, DET), (ngendikakake, VERB..."
4,Nonton/VERB tivi/NOUN bisa/AUX njembarake/VERB...,"[(nonton, VERB), (tivi, NOUN), (bisa, AUX), (n..."
...,...,...
834,/PUNCT Sesuk/ADV aku/PRON pengen/VERB ngerti/V...,"[(, PUNCT), (sesuk, ADV), (aku, PRON), (pengen..."
835,"/PUNCT Dhik/PROPN ,/PUNCT kok/ADV sajak/SCONJ ...","[(, PUNCT), (dhik, PROPN), (,, PUNCT), (kok, A..."
836,Bocah/NOUN enom/ADJ saiki/ADV racake/ADV akeh/...,"[(bocah, NOUN), (enom, ADJ), (saiki, ADV), (ra..."
837,Lha/INTJ ning/PART nek/SCONJ bapak/NOUN biyen/...,"[(lha, INTJ), (ning, PART), (nek, SCONJ), (bap..."


In [5]:
underscore = data['kata_tag'].apply(lambda kalimat: sum(1 for kata, tag in kalimat if kata == '_' or tag == '_')).sum()
print(f"Jumlah kata dan tag yang berupa '_': {underscore}")

Jumlah kata dan tag yang berupa '_': 595


In [6]:
# Menghapus kata dan tag yang berupa '_'
data['kata_tag'] = data['kata_tag'].apply(lambda kalimat: [(kata, tag) for kata, tag in kalimat if kata != '_' and tag != '_'])
underscore = data['kata_tag'].apply(lambda kalimat: sum(1 for kata, tag in kalimat if kata == '_' or tag == '_')).sum()
print(f"Jumlah kata dan tag yang berupa '_': {underscore}")

Jumlah kata dan tag yang berupa '_': 0


In [7]:
# Fungsi untuk memisahkan kalimat berdasarkan kata PUNCT
def split_after_punct(kalimat):
    result = []
    temp = []
    for kata, tag in kalimat:
        temp.append((kata, tag))
        if tag == 'PUNCT' and kata == '.':
            result.append(temp)
            temp = []
    if temp:  # Menambahkan kalimat terakhir jika ada
        result.append(temp)
    return result

# Menerapkan fungsi split_after_punct
data['kalimat_split'] = data['kata_tag'].apply(split_after_punct)

# Membuat DataFrame baru dengan setiap kalimat terpisah
split_rows = []

for index, row in data.iterrows():
    for kalimat in row['kalimat_split']:
        split_rows.append({'kalimat': ' '.join(kata for kata, tag in kalimat), 'kata_tag': kalimat})

# Membuat DataFrame baru dari hasil split
split_df = pd.DataFrame(split_rows)

split_df

Unnamed: 0,kalimat,kata_tag
0,bocah kuwi seneng nggambar sesawangan sing asri .,"[(bocah, NOUN), (kuwi, DET), (seneng, VERB), (..."
1,"ing wayah sore biasane siti sinau , kangmas e ...","[(ing, ADP), (wayah, NOUN), (sore, NOUN), (bia..."
2,"mbakyu e seneng nglangi , nanging adhi e ora s...","[(mbakyu, NOUN), (e, PRON), (seneng, VERB), (n..."
3,ibu kuwi ngendikakake yen putra e mbarep wis n...,"[(ibu, NOUN), (kuwi, DET), (ngendikakake, VERB..."
4,"nonton tivi bisa njembarake kawruh , nanging u...","[(nonton, VERB), (tivi, NOUN), (bisa, AUX), (n..."
...,...,...
1015,"kanthi rasa bungah , dak rangkul wong tuwa ku ...","[(kanthi, ADP), (rasa, NOUN), (bungah, ADJ), (..."
1016,"kados dereng saged nampi kasunyatan niki , ” ...","[(, PUNCT), (kados, ADV), (dereng, ADV), (sage..."
1017,bocah enom saiki racake akeh sing lali karo ka...,"[(bocah, NOUN), (enom, ADJ), (saiki, ADV), (ra..."
1018,"lha ning nek bapak biyen sida lunga , ora ana ...","[(lha, INTJ), (ning, PART), (nek, SCONJ), (bap..."


In [8]:
# # Data yang akan digunakan untuk training, tanpa 'PUNCT' dan 'SYM'
# def remove_punct_sym(kalimat):
#     return [(kata, tag) for kata, tag in kalimat if tag not in ['PUNCT', 'SYM']]

# # Menerapkan fungsi remove_punct_sym
# split_df['train_data'] = split_df['kata_tag'].apply(remove_punct_sym)
# print(split_df['train_data'])

split_df['train_data'] = split_df['kata_tag']
print(split_df['train_data'])

0       [(bocah, NOUN), (kuwi, DET), (seneng, VERB), (...
1       [(ing, ADP), (wayah, NOUN), (sore, NOUN), (bia...
2       [(mbakyu, NOUN), (e, PRON), (seneng, VERB), (n...
3       [(ibu, NOUN), (kuwi, DET), (ngendikakake, VERB...
4       [(nonton, VERB), (tivi, NOUN), (bisa, AUX), (n...
                              ...                        
1015    [(kanthi, ADP), (rasa, NOUN), (bungah, ADJ), (...
1016    [(, PUNCT), (kados, ADV), (dereng, ADV), (sage...
1017    [(bocah, NOUN), (enom, ADJ), (saiki, ADV), (ra...
1018    [(lha, INTJ), (ning, PART), (nek, SCONJ), (bap...
1019    [(sampun, AUX), (damel, VERB), (ibu, NOUN), (k...
Name: train_data, Length: 1020, dtype: object


In [9]:
# Menghitung jumlah tag 'PUNCT' dan 'SYM' dari train data
punct_count = split_df['train_data'].apply(lambda kalimat: sum(1 for kata, tag in kalimat if tag == 'PUNCT')).sum()
sym_count = split_df['train_data'].apply(lambda kalimat: sum(1 for kata, tag in kalimat if tag == 'SYM')).sum()

print(f"Jumlah tag 'PUNCT' dari train data: {punct_count}")
print(f"Jumlah tag 'SYM' dari train data: {sym_count}")



Jumlah tag 'PUNCT' dari train data: 2233
Jumlah tag 'SYM' dari train data: 12


In [10]:
# Testing data tetap mempertahankan semua kata dan tag untuk prediksi otomatis
split_df['test_data'] = split_df['kata_tag']
print(split_df['test_data'])

0       [(bocah, NOUN), (kuwi, DET), (seneng, VERB), (...
1       [(ing, ADP), (wayah, NOUN), (sore, NOUN), (bia...
2       [(mbakyu, NOUN), (e, PRON), (seneng, VERB), (n...
3       [(ibu, NOUN), (kuwi, DET), (ngendikakake, VERB...
4       [(nonton, VERB), (tivi, NOUN), (bisa, AUX), (n...
                              ...                        
1015    [(kanthi, ADP), (rasa, NOUN), (bungah, ADJ), (...
1016    [(, PUNCT), (kados, ADV), (dereng, ADV), (sage...
1017    [(bocah, NOUN), (enom, ADJ), (saiki, ADV), (ra...
1018    [(lha, INTJ), (ning, PART), (nek, SCONJ), (bap...
1019    [(sampun, AUX), (damel, VERB), (ibu, NOUN), (k...
Name: test_data, Length: 1020, dtype: object


In [None]:
# Fungsi untuk menghitung probabilitas transisi awal
def hitung_probabilitas_awal(sentences):
    total_probabilitas_awal = Counter([kalimat[0][1] for kalimat in sentences if kalimat])
    total_sentences = len(sentences)
    transisi_awal = {pos: freq / total_sentences for pos, freq in total_probabilitas_awal.items()}
    return transisi_awal

# Fungsi untuk menghitung probabilitas emisi dengan Laplace smoothing
def hitung_probabilitas_emisi(word, pos, kata_pos_freq, pos_freq, vocab_size):
    kata_pos_count = kata_pos_freq.get((word, pos), 0)
    # if kata_pos_count == 0:
    #     return 1 / (pos_freq[pos] + vocab_size)
    return (kata_pos_count + 1) / (pos_freq[pos] + vocab_size)

def hitung_probabilitas_emisi_kata_kosong(pos, pos_freq, vocab_size):
    return 1 / (pos_freq[pos] + vocab_size)

def hitung_probabilitas_transisi(pos1, pos2, pos_sequence, pos_freq, vocab_size):
    pos_bigram_freq = Counter()
    pos_bigram_freq.update(zip(pos_sequence, pos_sequence[1:]))
    return (pos_bigram_freq[(pos1, pos2)] + 1) / (pos_freq[pos1] + vocab_size)

# # Algoritma Forward
# def algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags):
#     alpha = [{}]
#     for pos in pos_tags:
#         alpha[0][pos] = probabilitas_awal.get(pos, 0) * probabilitas_emisi.get((sentence[0], pos), 0)
    
#     for t in range(1, len(sentence)):
#         alpha.append({})
#         for pos in pos_tags:
#             alpha[t][pos] = sum(alpha[t-1][prev_pos] * probabilitas_transisi.get((prev_pos, pos), 0) * probabilitas_emisi.get((sentence[t], pos), 0) for prev_pos in pos_tags)
    
#     return alpha

#     # Algoritma Backward
# def algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags):
#     beta = [{} for _ in range(len(sentence))]
    
#     # Inisialisasi beta pada waktu t = T
#     for pos in pos_tags:
#         beta[-1][pos] = 1
    
#     # Iterasi mundur dari t = T-1 ke t = 0
#     for t in range(len(sentence) - 2, -1, -1):
#         for pos in pos_tags:
#             beta[t][pos] = sum(beta[t + 1][next_pos] * probabilitas_transisi.get((pos, next_pos), 0) * probabilitas_emisi.get((sentence[t + 1], next_pos), 0) for next_pos in pos_tags)
    
#     return beta

# def expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, pos_freq, vocab_size):
#         T = len(sentence)
#         gamma = [{} for _ in range(T)]
#         ksi = [{} for _ in range(T - 1)]
        
#         # menghitung gamma
#         for t in range(T):
#             normalization_factor = sum(alpha[t][pos] * beta[t][pos] for pos in pos_tags)
#             for pos in pos_tags:
#                 gamma[t][pos] = (alpha[t][pos] * beta[t][pos]) / normalization_factor
        
#         # menghitung ksi
#         for t in range(T - 1):
#             normalization_factor = sum(
#                 alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), (1 / (pos_freq[pos1] + vocab_size))) *
#                 probabilitas_emisi.get((sentence[t + 1], pos2), (1 / (pos_freq[pos2] + vocab_size))) * beta[t + 1][pos2]
#                 for pos1 in pos_tags for pos2 in pos_tags
#             )
#             for pos1 in pos_tags:
#                 ksi[t][pos1] = {}
#                 for pos2 in pos_tags:
#                     ksi[t][pos1][pos2] = (
#                         alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), (1 / (pos_freq[pos1] + vocab_size))) *
#                         probabilitas_emisi.get((sentence[t + 1], pos2), (1 / (pos_freq[pos2] + vocab_size))) * beta[t + 1][pos2]
#                     ) / normalization_factor
        
#         return gamma, ksi

# def maximization_step(gamma, ksi, sentence, pos_tags):
#     probabilitas_awal_baru = {pos: gamma[0][pos] for pos in pos_tags}
    
#     probabilitas_transisi_baru = {}
#     for pos1 in pos_tags:
#         for pos2 in pos_tags:
#             a = sum(ksi[t][pos1][pos2] for t in range(len(ksi)))
#             b = sum(gamma[t][pos1] for t in range(len(gamma)))
#             probabilitas_transisi_baru[(pos1, pos2)] = a / b
    
#     probabilitas_emisi_baru = {}
#     for pos in pos_tags:
#         probabilitas_emisi_baru[pos] = {}
#         for word in sentence:
#             a = sum(gamma[t][pos] for t in range(len(gamma)) if sentence[t] == word)
#             b = sum(gamma[t][pos] for t in range(len(gamma)))
#             probabilitas_emisi_baru[pos][word] = a / b
    
#     return probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru

# def predict_tags(gamma):
#         return [max(gamma[t], key=gamma[t].get) for t in range(len(gamma))]



In [None]:
# # Forward Algorithm dengan Maximization Step
# def algoritma_forward_max(sentence, probabilitas_awal, probabilitas_transisi, probabilitas_emisi, pos_tags):
#     # Inisialisasi alpha
#     alpha = [{}]
#     for pos in pos_tags:
#         alpha[0][pos] = probabilitas_awal[pos] * probabilitas_emisi[pos].get(sentence[0], 0)
        
#     # Iterasi untuk menghitung alpha
#     for t in range(1, len(sentence)):
#         alpha.append({})
#         for pos2 in pos_tags:
#             alpha[t][pos2] = sum(alpha[t-1][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t], 0) for pos1 in pos_tags)
            
#     return alpha

# # Backward Algorithm dengan Maximization Step
# def algoritma_backward_max(sentence, probabilitas_transisi, probabilitas_emisi, pos_tags):
#     # Inisialisasi beta
#     beta = [{} for _ in range(len(sentence))]
#     for pos in pos_tags:
#         beta[len(sentence)-1][pos] = 1
        
#     # Iterasi untuk menghitung beta
#     for t in range(len(sentence)-2, -1, -1):
#         for pos1 in pos_tags:
#             beta[t][pos1] = sum(probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * beta[t+1][pos2] for pos2 in pos_tags)
            
#     return beta

# # Expectation Step setelah Maximization: Menghitung gamma dan ksi
# def expectation_step_max(sentence, alpha, beta, probabilitas_transisi, probabilitas_emisi, pos_tags):
#     gamma = [{} for _ in range(len(alpha))]
#     ksi = [{} for _ in range(len(sentence) - 1)]
    
#     for t in range(len(alpha)):
#         normalization_factor = sum(alpha[t][pos] * beta[t][pos] for pos in pos_tags)
#         for pos in pos_tags:
#             gamma[t][pos] = (alpha[t][pos] * beta[t][pos]) / normalization_factor
    
#     for t in range(len(sentence) - 1):
#         normalization_factor = sum(alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * 
#                         beta[t+1][pos2] for pos1 in pos_tags for pos2 in pos_tags)
#         for pos1 in pos_tags:
#             ksi[t][pos1] = {}
#             for pos2 in pos_tags:
#                 ksi[t][pos1][pos2] = (alpha[t][pos1] * probabilitas_transisi.get((pos1, pos2), 0) * probabilitas_emisi[pos2].get(sentence[t+1], 0) * 
#                                         beta[t+1][pos2]) / normalization_factor
    
#     return gamma, ksi

In [None]:
from collections import Counter
import pickle

# Menggunakan seluruh dataset sebagai data training
train_sentences = split_df['train_data']

# Menghitung frekuensi tag POS
pos_freq = Counter([tag for kalimat in train_sentences for kata, tag in kalimat])

# Menghitung frekuensi pasangan kata-tag POS
kata_pos_freq = Counter([(kata, tag) for kalimat in train_sentences for kata, tag in kalimat])

pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']
vocab_size = len(pos_tags)  # Jumlah jenis kata

# Menghitung probabilitas transisi awal
probabilitas_awal = hitung_probabilitas_awal(train_sentences)

# Menghitung urutan POS dari data
pos_sequence = [tag for sentence in train_sentences for word, tag in sentence]

# Menghitung probabilitas transisi untuk setiap pasangan POS
probabilitas_transisi = {(pos1, pos2): hitung_probabilitas_transisi(pos1, pos2, pos_sequence, pos_freq, vocab_size) for pos1 in pos_tags for pos2 in pos_tags}

# Menghitung probabilitas emisi untuk seluruh kata di training
probabilitas_emisi = {}
for kata, pos in kata_pos_freq:
    probabilitas_emisi[(kata, pos)] = hitung_probabilitas_emisi(kata, pos, kata_pos_freq, pos_freq, vocab_size)

probabilitas_emisi_kata_kosong = {}
for pos in pos_tags:
    probabilitas_emisi_kata_kosong[pos] = hitung_probabilitas_emisi_kata_kosong(pos, pos_freq, vocab_size)

# Menyimpan probabilitas ke dalam model
model = {
    'probabilitas_awal': probabilitas_awal,
    'probabilitas_transisi': probabilitas_transisi,
    'probabilitas_emisi': probabilitas_emisi,
    'vocab_size': vocab_size,
    'pos_freq': pos_freq,
    'probabilitas_emisi_kata_kosong': probabilitas_emisi_kata_kosong
}

# Menyimpan model ke file pickle
with open('model_Baum_Welch.pkl', 'wb') as file:
    pickle.dump(model, file)

# Kode di bawah enggak fix

In [13]:
import tkinter as tk
from tkinter import simpledialog, messagebox
from collections import Counter

# Menggunakan seluruh dataset sebagai data training
train_sentences = split_df['train_data']

# Menghitung frekuensi tag POS
pos_freq = Counter([tag for kalimat in train_sentences for kata, tag in kalimat])

# Menghitung frekuensi pasangan kata-tag POS
kata_pos_freq = Counter([(kata, tag) for kalimat in train_sentences for kata, tag in kalimat])

pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']
vocab_size = len(pos_tags)  # Jumlah jenis kata

# Menghitung probabilitas transisi awal
probabilitas_awal = hitung_probabilitas_awal(train_sentences)

# Menghitung urutan POS dari data
pos_sequence = [tag for sentence in train_sentences for word, tag in sentence]

# Menghitung probabilitas transisi untuk setiap pasangan POS
probabilitas_transisi = {(pos1, pos2): hitung_probabilitas_transisi(pos1, pos2, pos_sequence, pos_freq, vocab_size) for pos1 in pos_tags for pos2 in pos_tags}

# Membuat GUI untuk input kalimat testing
def predict_pos_tagging():
    kalimat_testing = simpledialog.askstring("Input", "Masukkan kalimat untuk prediksi POS tagging:")
    if kalimat_testing:
        kalimat_testing = kalimat_testing.lower()
        test_sentences = [[(kata, '') for kata in kalimat_testing.split()]]
        probabilitas_emisi = {}
        for kalimat in test_sentences:
            for kata, _ in kalimat:
                for pos in pos_tags:
                    probabilitas_emisi[(kata, pos)] = hitung_probabilitas_emisi(kata, pos, kata_pos_freq, pos_freq, vocab_size)

        for kalimat in test_sentences:
            sentence = [kata for kata, tag in kalimat]
            alpha = algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags)
            beta = algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags)
            gamma, ksi = expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, pos_freq, vocab_size)
            probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma, ksi, sentence, pos_tags)
            alpha_max = algoritma_forward_max(sentence, probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)
            beta_max = algoritma_backward_max(sentence, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)
            likelihood = sum(alpha[-1][pos] * beta_max[-1][pos] for pos in pos_tags)
            gamma_max, ksi_max = expectation_step_max(sentence, alpha_max, beta_max, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)

            # Menampilkan hasil prediksi POS tagging
            hasil_prediksi = []
            for t, kata in enumerate(sentence):
                if kata in ['.', ',', '!', '?', ':', ';', '(', ')', '[', ']', '{', '}', '"', "'", '-']:
                    pos_prediksi = 'PUNCT'
                elif kata in ['$', '%', '@', '&', '#', '*']:
                    pos_prediksi = 'SYM'
                else:
                    pos_prediksi = max(gamma_max[t], key=gamma_max[t].get)
                hasil_prediksi.append((kata, pos_prediksi))
            messagebox.showinfo("Hasil Prediksi POS Tagging", hasil_prediksi)

# Membuat GUI
root = tk.Tk()
root.withdraw()  # Menyembunyikan jendela utama
predict_pos_tagging()
root.mainloop()


KeyboardInterrupt: 

# Simpan Model Menggunakan Pickle

In [14]:
from collections import Counter
import pickle

# Menggunakan seluruh dataset sebagai data training
train_sentences = split_df['train_data']

# Menghitung frekuensi tag POS
pos_freq = Counter([tag for kalimat in train_sentences for kata, tag in kalimat])

# Menghitung frekuensi pasangan kata-tag POS
kata_pos_freq = Counter([(kata, tag) for kalimat in train_sentences for kata, tag in kalimat])

pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']
vocab_size = len(pos_tags)  # Jumlah jenis kata

# Menghitung probabilitas transisi awal
probabilitas_awal = hitung_probabilitas_awal(train_sentences)

# Menghitung urutan POS dari data
pos_sequence = [tag for sentence in train_sentences for word, tag in sentence]

# Menghitung probabilitas transisi untuk setiap pasangan POS
probabilitas_transisi = {(pos1, pos2): hitung_probabilitas_transisi(pos1, pos2, pos_sequence, pos_freq, vocab_size) for pos1 in pos_tags for pos2 in pos_tags}

# Menggunakan seluruh dataset sebagai kalimat testing
test_sentences = split_df['test_data']

# Menyimpan hasil model menggunakan pickle
model_results = []

for kalimat in test_sentences:
    # sentence = list(dict.fromkeys([kata for kata, tag in kalimat]))  # Menghapus kata duplikat
    sentence = [kata for kata, tag in kalimat]  # Menggunakan semua kata dalam kalimat
    probabilitas_emisi = {}
    for kata, _ in kalimat:
        for pos in pos_tags:
            probabilitas_emisi[(kata, pos)] = hitung_probabilitas_emisi(kata, pos, kata_pos_freq, pos_freq, vocab_size)

    alpha = algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags)
    beta = algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags)
    gamma, ksi = expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, pos_freq, vocab_size)
    probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma, ksi, sentence, pos_tags)
    alpha_max = algoritma_forward_max(sentence, probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)
    beta_max = algoritma_backward_max(sentence, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)
    likelihood = sum(alpha[-1][pos] * beta_max[-1][pos] for pos in pos_tags)
    gamma_max, ksi_max = expectation_step_max(sentence, alpha_max, beta_max, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)

    # Menyimpan hasil prediksi POS tagging
    hasil_prediksi = []
    for t, kata in enumerate(sentence):
        if kata in ['.', ',', '!', '?', ':', ';', '(', ')', '[', ']', '{', '}', '"', "'", '-']:
            pos_prediksi = 'PUNCT'
        elif kata in ['$', '%', '@', '&', '#', '*']:
            pos_prediksi = 'SYM'
        else:
            pos_prediksi = max(gamma_max[t], key=gamma_max[t].get)
        hasil_prediksi.append((kata, pos_prediksi))
    
    model_results.append(hasil_prediksi)

# Menyimpan hasil model ke file pickle
with open('model_results.pkl', 'wb') as file:
    pickle.dump(model_results, file)

# Simpan Model Menggunakan Pickle kedua

In [None]:
from collections import Counter
import pickle

# Menggunakan seluruh dataset sebagai data training
train_sentences = split_df['train_data']

# Menghitung frekuensi tag POS
pos_freq = Counter([tag for kalimat in train_sentences for kata, tag in kalimat])

# Menghitung frekuensi pasangan kata-tag POS
kata_pos_freq = Counter([(kata, tag) for kalimat in train_sentences for kata, tag in kalimat])

pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']
vocab_size = len(pos_tags)  # Jumlah jenis kata

# Menghitung probabilitas transisi awal
probabilitas_awal = hitung_probabilitas_awal(train_sentences)

# Menghitung urutan POS dari data
pos_sequence = [tag for sentence in train_sentences for word, tag in sentence]

# Menghitung probabilitas transisi untuk setiap pasangan POS
probabilitas_transisi = {(pos1, pos2): hitung_probabilitas_transisi(pos1, pos2, pos_sequence, pos_freq, vocab_size) for pos1 in pos_tags for pos2 in pos_tags}

# Menghitung probabilitas emisi untuk seluruh kata di training
probabilitas_emisi = {}
for kata, pos in kata_pos_freq:
    probabilitas_emisi[(kata, pos)] = hitung_probabilitas_emisi(kata, pos, kata_pos_freq, pos_freq, vocab_size)

probabilitas_emisi_kata_kosong = {}
for pos in pos_tags:
    probabilitas_emisi_kata_kosong[pos] = hitung_probabilitas_emisi_kata_kosong(pos, pos_freq, vocab_size)

# Menyimpan probabilitas ke dalam model
model = {
    'probabilitas_awal': probabilitas_awal,
    'probabilitas_transisi': probabilitas_transisi,
    'probabilitas_emisi': probabilitas_emisi,
    'vocab_size': vocab_size,
    'pos_freq': pos_freq,
    'probabilitas_emisi_kata_kosong': probabilitas_emisi_kata_kosong
}

# Menyimpan model ke file pickle
with open('model_Baum_Welch.pkl', 'wb') as file:
    pickle.dump(model, file)

# Testing Model Menggunakan GUI

In [None]:
import tkinter as tk
from tkinter import simpledialog, messagebox
import pickle

# Load the model results from the pickle file
with open('model_results.pkl', 'rb') as file:
    model_results = pickle.load(file)

# Define the POS tags
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']

# Function to clear the frame
def clear_frame():
    for widget in root.winfo_children():
        widget.destroy()

# Function to predict POS tags for a new sentence
def predict_pos_tagging():
    clear_frame()

    # Create input prompt
    prompt_label = tk.Label(root, text="Masukkan kalimat untuk prediksi POS tagging:")
    prompt_label.pack(pady=10)

    # Input field
    input_field = tk.Entry(root, width=50)
    input_field.pack(pady=10)

    # Result label
    result_label = tk.Label(root, text="", justify="left")
    result_label.pack(pady=10)

    # Function to process the input
    def process_input():
        kalimat_testing = input_field.get()
        if kalimat_testing:
            kalimat_testing = kalimat_testing.lower()
            test_sentence = kalimat_testing.split()

            # Aggregate results from all sentences in the model results
            hasil_Keseluruhan = []
            for result in model_results:
                for word, tag in result:
                    if word in test_sentence:
                        hasil_Keseluruhan.append((word, tag))

            # Remove duplicates while preserving order
            dilihat = set()
            hasil_prediksi = [(word, tag) for word, tag in hasil_Keseluruhan if not (word in dilihat or dilihat.add(word))]

            # Display the predicted POS tags
            if hasil_prediksi:
                result_text = "\n".join(f"{word}: {tag}" for word, tag in hasil_prediksi)
                result_label.config(text=f"Hasil Prediksi: {result_text}")
                
                # Check for words that could not be predicted
                kata_prediksi = {word for word, tag in hasil_prediksi}
                kata_baru = [word for word in test_sentence if word not in kata_prediksi]
                if kata_baru:
                    missing_text = ", ".join(word for word in kata_baru)
                    result_label.config(text=f"Hasil Prediksi: {result_text}\n\nKata yang tidak bisa diprediksi:\n{missing_text}")
            else:
                result_label.config(text="Tidak ada kata yang ditemukan di kalimat input.")

    # Predict button
    predict_button = tk.Button(root, text="Predict", command=process_input)
    predict_button.pack(pady=5)

    # New button to restart the prediction
    def new_prediction():
        predict_pos_tagging()

    new_button = tk.Button(root, text="New", command=new_prediction)
    new_button.pack(pady=5)

# Create the GUI
root = tk.Tk()
root.title("POS Tagging Predictor")

predict_pos_tagging()

root.mainloop()


# Kode Dibawah Masih Salah

In [18]:
import tkinter as tk
from tkinter import simpledialog, messagebox
import pickle

# Load the model results from the pickle file
with open('model_results.pkl', 'rb') as file:
    model_results = pickle.load(file)

# Define the POS tags
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']

# Function to clear the frame
def clear_frame():
    for widget in root.winfo_children():
        widget.destroy()

# Function to predict POS tags for a new sentence
def predict_pos_tagging():
    clear_frame()

    # Create input prompt
    prompt_label = tk.Label(root, text="Masukkan kalimat untuk prediksi POS tagging:")
    prompt_label.pack(pady=10)

    # Input field
    input_field = tk.Entry(root, width=50)
    input_field.pack(pady=10)

    # Result label
    result_label = tk.Label(root, text="", justify="left")
    result_label.pack(pady=10)

    # Function to process the input
    def process_input():
        kalimat_testing = input_field.get()
        if kalimat_testing:
            kalimat_testing = kalimat_testing.lower()
            test_sentence = kalimat_testing.split()

            # Find the most similar sentence in the model results
            best_match = None
            best_score = float('-inf')  # Use -inf for maximizing score
            for result in model_results:
                score = sum(1 for word, _ in result if word in test_sentence)
                if score > best_score:
                    best_score = score
                    best_match = result

            # Display the predicted POS tags
            if best_match:
                hasil_prediksi = [(word, tag) for word, tag in best_match if word in test_sentence]
                if hasil_prediksi:
                    result_text = "\n".join(f"{word}: {tag}" for word, tag in hasil_prediksi)
                    result_label.config(text=f"Hasil Prediksi: {result_text}")
                else:
                    result_label.config(text="Tidak ada kata yang ditemukan di kalimat input.")
            else:
                result_label.config(text="Tidak ada prediksi yang cocok ditemukan.")

    # Predict button
    predict_button = tk.Button(root, text="Predict", command=process_input)
    predict_button.pack(pady=5)

    # New button to restart the prediction
    def new_prediction():
        predict_pos_tagging()

    new_button = tk.Button(root, text="New", command=new_prediction)
    new_button.pack(pady=5)

# Create the GUI
root = tk.Tk()
root.title("POS Tagging Predictor")

predict_pos_tagging()

root.mainloop()


: 

# Kode Dibawah Masih Salah

In [None]:
from collections import Counter
import pickle

# Menggunakan seluruh dataset sebagai data training
train_sentences = split_df['train_data']

# Menghitung frekuensi tag POS
pos_freq = Counter([tag for kalimat in train_sentences for kata, tag in kalimat])

# Menghitung frekuensi pasangan kata-tag POS
kata_pos_freq = Counter([(kata, tag) for kalimat in train_sentences for kata, tag in kalimat])

pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']
vocab_size = len(pos_tags)  # Jumlah jenis kata

# Menghitung probabilitas transisi awal
probabilitas_awal = hitung_probabilitas_awal(train_sentences)

# Menghitung urutan POS dari data
pos_sequence = [tag for sentence in train_sentences for word, tag in sentence]

# Menghitung probabilitas transisi untuk setiap pasangan POS
probabilitas_transisi = {(pos1, pos2): hitung_probabilitas_transisi(pos1, pos2, pos_sequence, pos_freq, vocab_size) for pos1 in pos_tags for pos2 in pos_tags}

# Menggunakan seluruh dataset sebagai kalimat testing
test_sentences = split_df['test_data']

# Menyimpan hasil model menggunakan pickle
model_results = []

for kalimat in test_sentences:
    sentence = [kata for kata, tag in kalimat]
    kalimat_testing = ' '.join(sentence)
    if kalimat_testing:
        kalimat_testing = kalimat_testing.lower()
        test_sentences = [[(kata, '') for kata in kalimat_testing.split()]]
        probabilitas_emisi = {}
        for kalimat in test_sentences:
            for kata, _ in kalimat:
                for pos in pos_tags:
                    probabilitas_emisi[(kata, pos)] = hitung_probabilitas_emisi(kata, pos, kata_pos_freq, pos_freq, vocab_size)

        for kalimat in test_sentences:
            sentence = [kata for kata, tag in kalimat]
            alpha = algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags)
            beta = algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags)
            gamma, ksi = expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, pos_freq, vocab_size)
            probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma, ksi, sentence, pos_tags)
            alpha_max = algoritma_forward_max(sentence, probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)
            beta_max = algoritma_backward_max(sentence, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)
            likelihood = sum(alpha[-1][pos] * beta_max[-1][pos] for pos in pos_tags)
            gamma_max, ksi_max = expectation_step_max(sentence, alpha_max, beta_max, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)

            # Menyimpan hasil prediksi POS tagging
            hasil_prediksi = []
            for t, kata in enumerate(sentence):
                if kata in ['.', ',', '!', '?', ':', ';', '(', ')', '[', ']', '{', '}', '"', "'", '-']:
                    pos_prediksi = 'PUNCT'
                elif kata in ['$', '%', '@', '&', '#', '*']:
                    pos_prediksi = 'SYM'
                else:
                    pos_prediksi = max(gamma_max[t], key=gamma_max[t].get)
                hasil_prediksi.append((kata, pos_prediksi))
    
            model_results.append(hasil_prediksi)

# Menyimpan hasil model ke file pickle
with open('model_results_Baru.pkl', 'wb') as file:
    pickle.dump(model_results, file)

In [22]:
import tkinter as tk
from tkinter import simpledialog, messagebox
import pickle

# Load the model results from the pickle file
with open('model_results_Baru.pkl', 'rb') as file:
    model_results = pickle.load(file)

# Define the POS tags
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']

# Function to clear the frame
def clear_frame():
    for widget in root.winfo_children():
        widget.destroy()

# Function to predict POS tags for a new sentence
def predict_pos_tagging():
    clear_frame()

    # Create input prompt
    prompt_label = tk.Label(root, text="Masukkan kalimat untuk prediksi POS tagging:")
    prompt_label.pack(pady=10)

    # Input field
    input_field = tk.Entry(root, width=50)
    input_field.pack(pady=10)

    # Result label
    result_label = tk.Label(root, text="", justify="left")
    result_label.pack(pady=10)

    # Function to process the input
    def process_input():
        kalimat_testing = input_field.get()
        if kalimat_testing:
            kalimat_testing = kalimat_testing.lower()
            test_sentence = kalimat_testing.split()

            # Find the most similar sentence in the model results
            best_match = None
            best_score = float('-inf')  # Use -inf for maximizing score
            for result in model_results:
                score = sum(1 for word, _ in result if word in test_sentence)
                if score > best_score:
                    best_score = score
                    best_match = result

            # Display the predicted POS tags
            if best_match:
                hasil_prediksi = [(word, tag) for word, tag in best_match if word in test_sentence]
                if hasil_prediksi:
                    result_text = "\n".join(f"{word}: {tag}" for word, tag in hasil_prediksi)
                    result_label.config(text=f"Hasil Prediksi: {result_text}")
                else:
                    result_label.config(text="Tidak ada kata yang ditemukan di kalimat input.")
            else:
                result_label.config(text="Tidak ada prediksi yang cocok ditemukan.")

    # Predict button
    predict_button = tk.Button(root, text="Predict", command=process_input)
    predict_button.pack(pady=5)

    # New button to restart the prediction
    def new_prediction():
        predict_pos_tagging()

    new_button = tk.Button(root, text="New", command=new_prediction)
    new_button.pack(pady=5)

# Create the GUI
root = tk.Tk()
root.title("POS Tagging Predictor")

predict_pos_tagging()

root.mainloop()


In [19]:
from collections import Counter
import pickle

# Menggunakan seluruh dataset sebagai data training
train_sentences = split_df['train_data']

# Menghitung frekuensi tag POS
pos_freq = Counter([tag for kalimat in train_sentences for kata, tag in kalimat])

# Menghitung frekuensi pasangan kata-tag POS
kata_pos_freq = Counter([(kata, tag) for kalimat in train_sentences for kata, tag in kalimat])

pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']
vocab_size = len(pos_tags)  # Jumlah jenis kata

# Menghitung probabilitas transisi awal
probabilitas_awal = hitung_probabilitas_awal(train_sentences)

# Menghitung urutan POS dari data
pos_sequence = [tag for sentence in train_sentences for word, tag in sentence]

# Menghitung probabilitas transisi untuk setiap pasangan POS
probabilitas_transisi = {(pos1, pos2): hitung_probabilitas_transisi(pos1, pos2, pos_sequence, pos_freq, vocab_size) for pos1 in pos_tags for pos2 in pos_tags}

# Menghitung probabilitas emisi untuk seluruh kata di training
probabilitas_emisi = {}
for kata, pos in kata_pos_freq:
    probabilitas_emisi[(kata, pos)] = hitung_probabilitas_emisi(kata, pos, kata_pos_freq, pos_freq, vocab_size)

probabilitas_emisi_kata_kosong = {}
for pos in pos_tags:
    probabilitas_emisi_kata_kosong[pos] = hitung_probabilitas_emisi_kata_kosong(pos, pos_freq, vocab_size)

# Menyimpan probabilitas ke dalam model
model = {
    'probabilitas_awal': probabilitas_awal,
    'probabilitas_transisi': probabilitas_transisi,
    'probabilitas_emisi': probabilitas_emisi,
    'vocab_size': vocab_size,
    'pos_freq': pos_freq,
    'probabilitas_emisi_kata_kosong': probabilitas_emisi_kata_kosong
}

# Menyimpan model ke file pickle
with open('model_Baum_Welch.pkl', 'wb') as file:
    pickle.dump(model, file)

In [20]:
import tkinter as tk
from tkinter import simpledialog, messagebox
import pickle

# Load the model results from the pickle file
with open('model_Baum_Welch.pkl', 'rb') as file:
    model_results = pickle.load(file)

# Define the POS tags
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'SCONJ', 'VERB', 'X']

# Function to clear the frame
def clear_frame():
    for widget in root.winfo_children():
        widget.destroy()

# Function to predict POS tags for a new sentence
def predict_pos_tagging():
    clear_frame()

    # Create input prompt
    prompt_label = tk.Label(root, text="Masukkan kalimat untuk prediksi POS tagging:")
    prompt_label.pack(pady=10)

    # Input field
    input_field = tk.Entry(root, width=50)
    input_field.pack(pady=10)

    # Result label
    result_label = tk.Label(root, text="", justify="left")
    result_label.pack(pady=10)

    # Function to process the input
    def process_input():
        kalimat_testing = input_field.get()
        if kalimat_testing:
            kalimat_testing = kalimat_testing.lower()
            test_sentence = kalimat_testing.split()

            # Extract probabilities from the model
            probabilitas_awal = model_results['probabilitas_awal']
            probabilitas_transisi = model_results['probabilitas_transisi']
            probabilitas_emisi = model_results['probabilitas_emisi']
            vocab_size = model_results['vocab_size']
            pos_freq = model_results['pos_freq']
            probabilitas_emisi_kata_kosong = model_results['probabilitas_emisi_kata_kosong']

            # Process the input sentence
            sentence = test_sentence

            # Initialize alpha and beta
            alpha = algoritma_forward(sentence, probabilitas_emisi, probabilitas_transisi, probabilitas_awal, pos_tags)
            beta = algoritma_backward(sentence, probabilitas_emisi, probabilitas_transisi, pos_tags)

            # Perform expectation step
            gamma, ksi = expectation_step(sentence, alpha, beta, probabilitas_emisi, probabilitas_transisi, pos_tags, pos_freq, vocab_size)

            # Perform maximization step
            probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma, ksi, sentence, pos_tags)

            # Initialize alpha_max and beta_max
            alpha_max = algoritma_forward_max(sentence, probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)
            beta_max = algoritma_backward_max(sentence, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)

            # Calculate likelihood
            likelihood = sum(alpha[-1][pos] * beta_max[-1][pos] for pos in pos_tags)

            # Perform expectation step with maximization
            gamma_max, ksi_max = expectation_step_max(sentence, alpha_max, beta_max, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)

            # Set threshold and initialize variables for iteration
            threshold = 1e-10
            likelihood_diff = float('inf')
            prev_likelihood = 0
            iteration = 0

            # Iterate until convergence
            while likelihood_diff > threshold:
                iteration += 1

                alpha_max = algoritma_forward_max(sentence, probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)
                beta_max = algoritma_backward_max(sentence, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)

                gamma_max, ksi_max = expectation_step_max(sentence, alpha_max, beta_max, probabilitas_transisi_baru, probabilitas_emisi_baru, pos_tags)

                probabilitas_awal_baru, probabilitas_transisi_baru, probabilitas_emisi_baru = maximization_step(gamma_max, ksi_max, sentence, pos_tags)

                likelihood = sum(alpha_max[-1][pos] * beta_max[-1][pos] for pos in pos_tags)

                current_likelihood = likelihood
                if iteration >= 2:
                    likelihood_diff = current_likelihood - prev_likelihood
                    prev_likelihood = current_likelihood
                else:
                    prev_likelihood = current_likelihood

                print(f"Iteration {iteration}: Likelihood = {current_likelihood:.9f}, Change in Likelihood = {likelihood_diff:.9f}")

            print("\nFinal Likelihood:", likelihood)

            # Predict POS tags
            predicted_tags = []
            for t in range(len(sentence)):
                kata = sentence[t]
                if kata in ['.', ',', '!', '?', ':', ';', '(', ')', '[', ']', '{', '}', '"', "'", '-']:
                    predicted_tags.append('PUNCT')
                elif kata in ['$', '%', '@', '&', '#', '*']:
                    predicted_tags.append('SYM')
                else:
                    predicted_tags.append(max(gamma_max[t], key=gamma_max[t].get))

            # Display the predicted POS tags
            result_text = "\n".join(f"{word}: {tag}" for word, tag in zip(sentence, predicted_tags))
            result_label.config(text=f"Hasil Prediksi: {result_text}")

            

    # Predict button
    predict_button = tk.Button(root, text="Predict", command=process_input)
    predict_button.pack(pady=5)

    # New button to restart the prediction
    def new_prediction():
        predict_pos_tagging()

    new_button = tk.Button(root, text="New", command=new_prediction)
    new_button.pack(pady=5)

# Create the GUI
root = tk.Tk()
root.title("POS Tagging Predictor")

predict_pos_tagging()

root.mainloop()
