# Model Inference

Nama: Elia Oktaviani

Pada bagian ini akan dijalankan uji model dengan data yang belum pernah ditemuui sebelumnya. Data yang diuji berupa teks dengan kata-kata positif dan bertujuan untuk menghasilkan suatu sentiment positif.

#Import Library

In [1]:
# General use
import numpy as np
import pandas as pd

# Text-related
import re
import string
import nltk
nltk.download('stopwords') 
nltk.download('punkt')
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# NN-related
from tensorflow.keras.saving import load_model


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


# Model Loading

In [None]:
# Load the trained model
model = load_model('model_improved')

#Model Inference

Pada bagian ini telah dibuat rangkaian fungsi yang sudah disusun sedemikian rupa sehingga raw data yang dimasukan dapat di proses jadi hasil prediksi jenis sentimen.

In [3]:
def predict_tfidf(text):

    # Placeholder for text preprocessing (cleaning, lemmatization, and removal of stopwords)
    def preprocess_text(text):
        # Expand contractions
        text = expand_contractions(text)

        # Case folding
        text = text.lower()

        # Mention removal
        text = re.sub("@[A-Za-z0-9_]+", " ", text)
        text = re.sub("@ [A-Za-z0-9_]+", " ", text)

        # Hashtags removal
        text = re.sub("#[A-Za-z0-9_]+", " ", text)

        # Newline removal (\n)
        text = re.sub(r"\\n", " ",text)

        # Whitespace removal
        text = text.strip()

        # URL removal
        text = re.sub(r"http\S+", " ", text)
        text = re.sub(r"www.\S+", " ", text)
        text = re.sub(r"twitch.tv\S+", " ", text)
        text = re.sub(r"twitch tv\S+", " ", text)
        text = re.sub(r"pic.twitter.com\S+", " ", text)
        text = re.sub(r"dlvr.it\S+", " ", text)
        text = re.sub(r"dfr.it / RMTrgF", " ", text)
        text = re.sub(r"dlvr.it\S+", " ", text)
        text = re.sub(r"dlvr.it \S+", " ", text)


        # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
        text = re.sub("[^A-Za-z\s']", " ", text)

        # Tokenization
        tokens = word_tokenize(text)

        # Stopwords removal
        tokens = [word for word in tokens if word not in stpwds_en ]
        tokens = [word for word in tokens if word not in additional_stopwords]

        # Lemmatizing
        tokens = [lemmatizer.lemmatize(word) for word in tokens]

        # Combining Tokens
        text = ' '.join(tokens)
        return text

    # Preprocess the input text
    preprocessed_text = preprocess_text(text)

    # Vectorize the preprocessed text
    x = vectorizer.transform([preprocessed_text])

    # Predict the probabilities
    predictions_proba = model.predict(x.toarray())

    # Assuming a multi-class classification with one-hot encoded labels
    emotions = {0: 'Irrelevant', 1: 'Negative', 2: 'Neutral', 3:'Positive'}

    # Get class with maximum probability
    prediction = np.argmax(predictions_proba, axis=-1)

    if prediction == 0:
        print("Sentiment: Irrelevant")
    elif prediction == 1:
        print("Sentiment: Negative")
    elif prediction == 2:
        print("Sentiment: Neutral")
    elif prediction == 3:
        print("Sentiment: Positive")
    else:
        print("Unknown sentiment")


In [None]:

txt = 'This game is very interesting, and i do super love it'
predict_tfidf(txt)