In [1]:
import gradio as gr
import tensorflow as tf
import pickle
import unicodedata
import contractions
import re
import nltk
import pandas as pd
import numpy as np
from nltk.corpus import stopwords, words
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model #type:ignore
from tensorflow.keras.utils import pad_sequences # type: ignore

nltk.download('words')
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('stopwords') 

  from .autonotebook import tqdm as notebook_tqdm
2025-04-12 16:14:06.410469: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-12 16:14:06.423302: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744449246.437801  116764 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744449246.442018  116764 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744449246.452843  116764 computation_placer.cc:177] computation placer already r

True

In [2]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
english_words = set(words.words())

def loadCustomDict(path):
    with open(path, 'r') as file:
        return set(line.strip().lower() for line in file if line.strip())

def normalizeWhitespace(text):
    text = unicodedata.normalize('NFKC', text)
    text = contractions.fix(text)
    text = re.sub(r'[\t\r]+', ' ', text) # Menghapus tab
    text = re.sub(r'\b\d+\b', '', text) # Menghilangkan angka
    text = re.sub(r'[-‐‑‒–—―]+', '', text)
    text = re.sub(r'[_﹍﹎＿]', '', text)
    text = re.sub(r'[^\w\s]', '', text) # Hilangkan symbol punctuation
    text = re.sub(r'\b(\w+)(?:\s+\1\b)+', r'\1', text)
    text = re.sub(r'\s+', ' ', text).strip().lower()
    return text

def removeOtherLanguage(text):
    phrase = ' translated'
    pos = text.find(phrase)
    if pos != -1:
        text = text[:pos].rstrip()
    text = re.sub(r'\b\w*[^\x00-\x7F]\w*\b', '', text)
    text = re.sub(r'\s+', ' ', text).strip().lower()
    return text

def removeNonEnglish(text_series, custom_dict):
    pattern = r'\b(?:' + '|'.join(re.escape(word) for word in custom_dict) + r')\b'
    temp_series = text_series.str.replace(pattern, '', case=False, regex=True)
    split_words = temp_series.str.split()
    exploded = split_words.explode()
    exploded = exploded[exploded.str.lower().isin(english_words)]
    filtered = exploded[~exploded.str.lower().isin(stop_words)]
    lemmatized = filtered.apply(lambda word: lemmatizer.lemmatize(word.lower()))
    cleaned_text_series = lemmatized.groupby(level=0).agg(' '.join)
    pattern2 = r'\b(\w+)(?:\s+\1\b)+' #, r'\1', text)
    ser = cleaned_text_series.reindex(text_series.index, fill_value='')
    text = ser.str.replace(pattern2, r'\1', case=False, regex=True)
    return text

def cleanInference(df):
    custom_dict = loadCustomDict('custom_vocab.txt')
    df['poem'] = df['poem'].apply(normalizeWhitespace)
    df['poem'] = df['poem'].apply(removeOtherLanguage)
    df['poem'] = removeNonEnglish(df['poem'], custom_dict)
    return df

def kerasTokenizer(text, tokenizer):
    text_sequence = tokenizer.texts_to_sequences(text)
    text_padded = pad_sequences(text_sequence, maxlen=128)
    return text_padded

def getLabelEncoder(name):
    hartmann = ['sadness', 'fear', 'anger', 'joy', 'neutral', 'surprise', 'disgust']
    savani = ['joy', 'sadness', 'anger', 'fear', 'love', 'surprise']
    deepseek = ['other', 'sadness', 'joy', 'hope', 'love']
    if name=='hartmann':
        return {i : label for i, label in enumerate(sorted(hartmann))}
    if name=='savani':
        return {i : label for i, label in enumerate(sorted(savani))}
    if name=='deepseek':
        return {i : label for i, label in enumerate(sorted(deepseek))}

In [3]:
poem1 = '''
Deliverance is not for me in renunciation.
I feel the embrace of freedom in a thousand bonds of delight.

Thou ever pourest for me the fresh draught of thy wine of various
colours and fragrance, filling this earthen vessel to the brim.

My world will light its hundred different lamps with thy flame
and place them before the altar of thy temple.

No, I will never shut the doors of my senses.
The delights of sight and hearing and touch will bear thy delight.

Yes, all my illusions will burn into illumination of joy,
and all my desires ripen into fruits of love.
'''

In [4]:
with open(f"./tokenizer/tokenizer_savani_0.1_lstm.pkl", "rb") as f:
    tokenizer_savani = pickle.load(f)
with open(f"./tokenizer/tokenizer_hartmann_0.1_lstm.pkl", "rb") as g:
    tokenizer_hartman = pickle.load(g)
with open(f"./tokenizer/tokenizer_deepseek_0.1_lstm.pkl", "rb") as h:
    tokenizer_deepseek = pickle.load(h)

model_savani = load_model(f"./model/best_model_savani_0.1_lstm.keras")
model_hartman = load_model(f"./model/best_model_hartmann_0.1_lstm.keras")
model_deepseek = load_model(f"./model/best_model_deepseek_0.1_lstm.keras")

MODELS = {
    "savani": {
        "model": model_savani,
        "tokenizer": tokenizer_savani
    },
    "hartmann": {
        "model": model_hartman,
        "tokenizer": tokenizer_hartman
    },
    "deepseek": {
        "model": model_deepseek,
        "tokenizer": tokenizer_deepseek
    },
}



I0000 00:00:1744449252.416426  116764 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [7]:
loaded_models = {}

def load_model(model_name):
    if model_name not in loaded_models:
        tokenizer = MODELS[model_name]['tokenizer']
        model = MODELS[model_name]['model']
        loaded_models[model_name] = (tokenizer, model)
    return loaded_models[model_name]
    

def predict_poem(poem, model_name):
    tokenizer, model = load_model(model_name)
    poem_df = pd.DataFrame({'poem' : [poem]})
    clean_poem_df = cleanInference(poem_df)
    text_keras = kerasTokenizer(clean_poem_df['poem'], tokenizer)
    result = model.predict(text_keras, verbose=0)
    predicted_labels = np.argmax(result, axis=1)
    dic = getLabelEncoder(model_name)
    return dic[predicted_labels[0]]

In [None]:
with gr.Blocks(title="NLP Model Text Classifier") as demo:
    gr.Markdown("## 📜 Poem Emotion Classification")
    gr.Markdown("""
    ### - **Step 1:** Select a labeling technique (model - each has different emotion labels)  
    ### - **Step 2:** Enter your poem text  
    ### - **Output:** Predicted emotion
    """)
    with gr.Row():
        with gr.Column():
            model_selector = gr.Dropdown(
                choices=list(MODELS.keys()),
                value="savani",
                interactive=True,
                label="Select Labelling Technique Model"
            )
            text_input = gr.Textbox(
                lines=5,
                placeholder="Enter text here...",
                label="Input Text",
                interactive=True
            )
            submit_btn = gr.Button("Classify", variant="primary")
        
        with gr.Column():
            output_label = gr.Label(label="Classification Results")
            gr.Markdown("""
            **Poem References**            
            - [Poem Hunter](https://www.poemhunter.com)
            - [Poem Generator](https://www.poem-generator.org.uk)
            - [HelloPoetry](https://hellopoetry.com)
            """)
            gr.Markdown("""
            **Class Available for Each Labelling Model Technique**
            - **Hartmann**: ['sadness', 'fear', 'anger', 'joy', 'neutral', 'surprise', 'disgust']
            - **Savani**: ['joy', 'sadness', 'anger', 'fear', 'love', 'surprise']
            - **Deepseek**: ['other', 'sadness', 'joy', 'hope', 'love']
            """)
            
    
    submit_btn.click(
        fn=predict_poem,
        inputs=[text_input, model_selector],
        outputs=[output_label]
    )

demo.launch(debug=True)

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


