# Import Libraries & Model

In [1]:
import pandas as pd
import numpy as np 
from string import punctuation
import re
from nltk.tokenize import word_tokenize
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import joblib
import gradio as gr
import os
from os.path import dirname, join, realpath

In [2]:
# Load the sentiment model 
with open(
    join( "Bertopic_model_cpu.pkl"), "rb"
) as f:
    model = joblib.load(f) 

# Load the topic list
topics_name = pd.read_excel("topic_list_cpu.xlsx")

topic_dict = topics_name.set_index("Topic")["Representation"].to_dict()  

# Predefined Functions

In [3]:
# Text cleaning function
def text_cleaning(text, remove_stop_words=True, lemmatize_words=True):
    text = re.sub(r"[^A-Za-z0-9]", " ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"http\S+", " link ", text)
    text = re.sub(r"\b\d+(?:\.\d+)?\s+", "", text)
    text = "".join([c for c in text if c not in punctuation])
    if lemmatize_words:
        text = text.split()
        lemmatizer = WordNetLemmatizer()
        lemmatized_words = [lemmatizer.lemmatize(word) for word in text]
        text = " ".join(lemmatized_words)
    return text

def format_dictionary(dictionary):
    formatted_string = "{\n"
    for key, value in dictionary.items():
        formatted_string += f"    '{key}': {value},\n"
    formatted_string += "}"
    return formatted_string

# Prediction function
def predict_topics(review):
    cleaned_review = text_cleaning(review)
    num_of_topics = 3
    similar_topics, similarity = model.find_topics(cleaned_review, top_n=num_of_topics)
    predictTopicOutput = {}
    predictTopicOutput['Number of Topics'] = num_of_topics
    for i in range(num_of_topics):
        # topic_num = str(similar_topics[i])
        predictTopicOutput[f'Topic {i+1}'] = f'Topic Number: {similar_topics[i]}, Associated Words: {topic_dict.get(similar_topics[i], "")}'
    predictTopicOutput['Probability'] = np.round(similarity, 2)
    return format_dictionary(predictTopicOutput)


# Launch Gradio Interface

In [4]:

# Interface
iface = gr.Interface(fn=predict_topics, inputs="text", outputs="text")

# Launching the interface
iface.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


