# **Import Libraries**

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import re
import unicodedata
import tensorflow as tf
import spacy
import pickle
from tensorflow.keras.models import load_model

# **Load Model dan LabelEncoder**

In [2]:
# Load model ANN terbaik
model = load_model('model_sentiment_ann.h5')

# Load LabelEncoder
with open('label_encoder.pkl', 'rb') as file:
    le = pickle.load(file)



# **Preprocessing Data**

In [None]:
# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Stopwords
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

# Fungsi cleaning
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r'http\S+|www\S+', '', text)
    text = re.sub(r'\brt\b', '', text)
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('utf-8', 'ignore')
    return text

# Stopword removal
def remove_stopwords(text):
    return ' '.join([word for word in text.split() if word not in stop_words])

# Lemmatization
def lemmatize_text(text):
    doc = nlp(text)
    return ' '.join([token.lemma_ for token in doc])

# Normalisasi istilah financial
def normalize_financial_terms(text):
    text = re.sub(r'\bmln\b|\bmn\b|\bmillion\b', 'million', text)
    text = re.sub(r'\beur\b|\beuro\b', 'euro', text)
    return text

# Pipeline preprocessing
def preprocess_pipeline(text):
    text = clean_text(text)
    text = remove_stopwords(text)
    text = lemmatize_text(text)
    text = normalize_financial_terms(text)
    return text

# **Load dan Preprocess Data Baru**

In [4]:
# Contoh data baru
new_data = pd.DataFrame({
    'Sentence': [
        "The company's earnings grew by 15% this quarter, exceeding analyst expectations.",
        "Market conditions remain uncertain amid economic slowdown.",
        "Investors show strong optimism following the announcement."
    ]
})

# Terapkan preprocessing
new_data['clean_text'] = new_data['Sentence'].apply(preprocess_pipeline)

# **Vectorization Data Baru**

In [5]:
# Load kembali vectorizer
loaded_vectorizer_model = tf.keras.models.load_model('text_vectorizer_model.keras')

# Konversi kolom clean_text ke array bertipe string
texts = new_data['clean_text'].astype(str).values

# Gunakan vectorizer untuk memproses data
X_new_seq = loaded_vectorizer_model.predict(texts)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 368ms/step


# **Prediksi**

In [None]:
# Prediksi
predictions = model.predict(X_new_seq)
predicted_labels = np.argmax(predictions, axis=1)

# Konversi ke label asli
new_data['Predicted Label'] = le.inverse_transform(predicted_labels)

new_data[['Sentence', 'Predicted Label']]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 558ms/step


Unnamed: 0,Sentence,Predicted Label
0,The company's earnings grew by 15% this quarte...,positive
1,Market conditions remain uncertain amid econom...,neutral
2,Investors show strong optimism following the a...,neutral
