In [5]:
# Sentiment Analysis Dependencies
!pip install -q contractions scikit-learn Sastrawi googletrans==4.0.0-rc1 langdetect
import joblib
from bs4 import BeautifulSoup
import nltk
import re
import unicodedata
from googletrans import Translator
import contractions
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

nltk.download('stopwords')
nltk.download('punkt')

# Time Series Dependencies
from tensorflow.keras.models import load_model
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import yfinance as yf

output_tfidf = 'tfidf_vectorizer.joblib'
output_rf = 'random_forest_model.joblib'
output_time_series = 'time_series_model.h5'

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llama-cloud 0.1.13 requires httpx>=0.20.0, but you have httpx 0.13.3 which is incompatible.
llama-index-llms-lmstudio 0.2.1 requires llama-index-core<0.12.0,>=0.11.0, but you have llama-index-core 0.12.22 which is incompatible.
openai 1.65.4 requires httpx<1,>=0.23.0, but you have httpx 0.13.3 which is incompatible.

[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


**Input Data**

In [6]:
# sentiment
new_text = "Revision of Subsidized Fertilizer Policy, Now Farmers Can Redeem Using KTP"

# time series
stock_symbol = 'FTT-USD' # tambahkan .JK untuk bursa efek indonesia (BBCA.JK) | -USD untuk global
start_date = '2022-11-14'
end_date = '2023-11-14'

In [7]:
# Fungsi-fungsi pra-pemrosesan teks
def strip_html_tags(text):
    # Fungsi ini menghapus tag HTML dari teks menggunakan BeautifulSoup
    soup = BeautifulSoup(text, "html.parser")
    [s.extract() for s in soup(['iframe', 'script'])]
    stripped_text = soup.get_text()
    stripped_text = re.sub(r'[\r|\n|\r\n]+', '\n', stripped_text)
    return stripped_text

def remove_accented_chars(text):
    # Fungsi ini menghapus karakter aksen dari teks menggunakan normalisasi Unicode
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
    return text

def pre_process_text(text, language):
    # Fungsi ini melakukan pra-pemrosesan teks seperti mengonversi teks ke huruf kecil,
    # menghapus tag HTML, karakter aksen, kontraksi, dan karakter khusus
    text = text.lower()
    text = strip_html_tags(text)
    text = text.translate(text.maketrans("\n\t\r", "   "))
    text = remove_accented_chars(text)
    text = contractions.fix(text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text, re.I | re.A)
    text = re.sub(' +', ' ', text)
    if language == 'indonesian':
        text = preprocess_text_sastrawi(text)
    return text

# Fungsi pra-pemrosesan teks khusus Bahasa Indonesia
def preprocess_text_sastrawi(text):
    # Fungsi ini menggunakan Sastrawi untuk menghapus stop word dan melakukan stemming pada teks Bahasa Indonesia
    factory1 = StopWordRemoverFactory()
    stopword_sastrawi = factory1.create_stop_word_remover()

    factory2 = StemmerFactory()
    stemmer_sastrawi = factory2.create_stemmer()

    tokens = nltk.word_tokenize(text)
    tokens = [stopword_sastrawi.remove(token) for token in tokens]
    tokens = [stemmer_sastrawi.stem(token) for token in tokens if token != '']
    return " ".join(tokens)

# Load the models
# Memuat model TF-IDF Vectorizer dan Random Forest dari file yang telah diunduh
tfidf_vectorizer = joblib.load(output_tfidf)
rf_classifier = joblib.load(output_rf)

# Pra-pemrosesan teks baru untuk analisis sentimen
preprocessed_text = pre_process_text(new_text, 'indonesian')

# Mengonversi teks yang telah di-preprocess menjadi fitur TF-IDF
new_text_tfidf = tfidf_vectorizer.transform([preprocessed_text])

# Melakukan prediksi sentimen menggunakan model Random Forest
predicted_label = rf_classifier.predict(new_text_tfidf)

# Menerjemahkan teks ke bahasa Inggris
translator = Translator()
translated_text = translator.translate(new_text, dest='en').text

# Mengonversi teks yang telah diterjemahkan menjadi fitur TF-IDF
translated_text_tfidf = tfidf_vectorizer.transform([translated_text])

# Menampilkan prediksi sentimen
predicted_sentiment = rf_classifier.predict(translated_text_tfidf)
sentiment_probability = rf_classifier.predict_proba(translated_text_tfidf)[0, 1]

threshold = 0.5  # Threshold bisa diatur sesuai kebutuhan
sentiment = "Positif" if sentiment_probability > threshold else "Negatif"

# Analisis Time Series

# Mendapatkan data historis saham
new_df = yf.download(stock_symbol, start=start_date, end=end_date)

# Memilih kolom 'Open' (butuhnya opening price)
new_ts = new_df['Open'].values

# Normalisasi data
scaler = StandardScaler()
new_data_normalized = scaler.fit_transform(np.array(new_ts).reshape(-1, 1))

# Memastikan data baru dalam format urutan yang serupa dengan data latihan
seq_length = 30

# Menyiapkan X_new_data
X_new_data = []

for i in range(len(new_data_normalized) - seq_length):
    X_new_data.append(new_data_normalized[i:i + seq_length])

# Mengonversi X_new_data menjadi array numpy
X_new_data = np.array(X_new_data)

# Memuat model analisis time series yang telah dilatih sebelumnya
model = load_model(output_time_series)

# Melakukan prediksi menggunakan model time series
predictions = model.predict(X_new_data)

# Peramalan
forecast_days = 5
X_forecast = np.copy(new_data_normalized[-seq_length:])

forecasted_values = []
for _ in range(forecast_days):
    forecasted_value = model.predict(X_forecast.reshape(1, seq_length, 1))
    forecasted_values.append(forecasted_value[0, 0])

    X_forecast = np.roll(X_forecast, -1)
    X_forecast[-1] = forecasted_value

last_actual_day = new_df.index[-1]  # Hari terakhir data aktual
forecast_dates = pd.date_range(last_actual_day, periods=forecast_days + 1)[1:]

last_actual_opening_price = new_df['Open'].iloc[-1]  # Harga pembukaan hari terakhir pada data aktual
first_forecast_opening_price = forecasted_values[0]  # Harga pembukaan hari pertama dalam peramalan

price_difference = first_forecast_opening_price - last_actual_opening_price
percentage_change = price_difference / last_actual_opening_price

print(f"Harga pembukaan aktual terakhir: {last_actual_day} = {last_actual_opening_price}")
print(f"Harga pembukaan peramalan hari pertama: {forecast_dates[0]} = {first_forecast_opening_price}")
print(f"Selisih harga pembukaan saham antara hari aktual terakhir dan hari pertama peramalan: {price_difference}")
print(f"Persentase Perubahan: {percentage_change*100}%")

# Persentase perubahan disesuaikan menjadi metrik terbobot
weighted_metric = (percentage_change + 1) / 2
print(f"\nMetrik terbobot: {weighted_metric}")

time_series_weight = weighted_metric


[*********************100%***********************]  1 of 1 completed


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Harga pembukaan aktual terakhir: 2023-11-13 00:00:00 = Ticker
FTT-USD    3.425437
Name: 2023-11-13 00:00:00, dtype: float64
Harga pembukaan peramalan hari pertama: 2023-11-14 00:00:00 = 1.9761346578598022
Selisih harga pembukaan saham antara hari aktual terakhir dan hari pertama peramalan: Ticker
FTT-USD   -1.449302
Name: 2023-11-13 00:00:00, dtype: float64
Persentase Perubahan: Ticker
FTT-USD   -42.309998
Name: 2023-11-13 00:00:00, dtype: float64%

Metrik terbobot: Ticker
FTT-USD    0.28845
Name: 2023-11-13 00:00:00, dtype: float64


In [8]:
# Fungsi untuk mengkombinasikan bobot
def combine_weights(sentiment_probability, time_series_weight, sentiment_ratio=0.65):
    time_series_ratio = 1 - sentiment_ratio

    combined_weight = (sentiment_ratio * sentiment_probability + time_series_ratio * time_series_weight.iloc[-1])
    return combined_weight

final_weight = combine_weights(sentiment_probability, time_series_weight)
final_sentiment = "Positive📈" if final_weight > 0.5 else "Negative📉"

print("Bobot:", final_weight)
print("Sentiment:", final_sentiment)

Bobot: 0.5299575034641106
Sentiment: Positive📈
