In [11]:
!pip install -q contractions scikit-learn Sastrawi googletrans==4.0.0-rc1 langdetect

import joblib
from bs4 import BeautifulSoup
import nltk
import re
import unicodedata
from googletrans import Translator
import contractions
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

nltk.download('stopwords')
nltk.download('punkt')

output_tfidf = 'tfidf_vectorizer.joblib'
output_rf = 'random_forest_model.joblib'


[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


**Input Data**

In [12]:
new_text = "Former FTX executive (yes, FTX, Mas SBF), launched a new crypto currency exchange called Backpack Exchange.This exchange aims to avoid mistakes that cause the fall of FTX by using an independent custody wallet that gives users full control of their funds.Backpack Exchange is looking for an investment of $ 100 million with 10% shares"

In [13]:
# Fungsi-fungsi pra-pemrosesan teks
def strip_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    [s.extract() for s in soup(['iframe', 'script'])]
    stripped_text = soup.get_text()
    stripped_text = re.sub(r'[\r|\n|\r\n]+', '\n', stripped_text)
    return stripped_text

def remove_accented_chars(text):
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
    return text

def pre_process_text(text, language):
    text = text.lower()
    text = strip_html_tags(text)
    text = text.translate(text.maketrans("\n\t\r", "   "))
    text = remove_accented_chars(text)
    text = contractions.fix(text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text, re.I | re.A)
    text = re.sub(' +', ' ', text)
    if language == 'indonesian':
        text = preprocess_text_sastrawi(text)
    return text

# Fungsi pra-pemrosesan teks khusus Bahasa Indonesia
def preprocess_text_sastrawi(text):
    factory1 = StopWordRemoverFactory()
    stopword_sastrawi = factory1.create_stop_word_remover()

    factory2 = StemmerFactory()
    stemmer_sastrawi = factory2.create_stemmer()

    tokens = nltk.word_tokenize(text)
    tokens = [stopword_sastrawi.remove(token) for token in tokens]
    tokens = [stemmer_sastrawi.stem(token) for token in tokens if token != '']
    return " ".join(tokens)

# Load the models
tfidf_vectorizer = joblib.load(output_tfidf)
rf_classifier = joblib.load(output_rf)

# Preprocess the new text
preprocessed_text = pre_process_text(new_text, 'indonesian')

# Convert the preprocessed text to TF-IDF features using the loaded tfidf_vectorizer
new_text_tfidf = tfidf_vectorizer.transform([preprocessed_text])

# Predict the label for the new text using the loaded rf_classifier
predicted_label = rf_classifier.predict(new_text_tfidf)

translator = Translator()
translated_text = translator.translate(new_text, dest='en').text

# Convert the translated text to TF-IDF features using the loaded tfidf_vectorizer
translated_text_tfidf = tfidf_vectorizer.transform([translated_text])

# Display the sentiment prediction for the translated text
predicted_sentiment = rf_classifier.predict(translated_text_tfidf)
sentiment_probability = rf_classifier.predict_proba(translated_text_tfidf)[0, 1]

threshold = 0.5  # Threshold bisa diatur sesuai kebutuhan
sentiment = "Positive" if sentiment_probability > threshold else "Negative"

# Print hasil prediksi
print("\nText:", translated_text)
print("Sentiment Probability:", sentiment_probability)
print("Sentiment:", sentiment)


Text: Former FTX executive (yes, FTX, Mas SBF), launched a new crypto currency exchange called Backpack Exchange.This exchange aims to avoid mistakes that cause the fall of FTX by using an independent custody wallet that gives users full control of their funds.Backpack Exchange is looking for an investment of $ 100 million with 10% shares
Sentiment Probability: 0.7
Sentiment: Positive
