### **1. Mengimpor Library**

In [33]:
#---MULAI---
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import time
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from xgboost import XGBRegressor
#---SELESAI---

### **2. Mengambil Data dari Yahoo Finance**

In [42]:
#---MULAI---
# Ticker saham
ticker = "KLBF.JK"

# Rentang waktu
start_date = "2019-01-01"
end_date = "2024-12-31"

# Ambil data historis menggunakan yfinance
data = yf.download(ticker, start=start_date, end=end_date)

# Simpan data ke file CSV
data.to_csv("KLBF_JK_Historical.csv")

# # Tampilkan 5 baris pertama
data = pd.read_csv("KLBF_JK_Historical_fix.csv")
data.head()
#---SELESAI---

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2019-01-01,1353.74585,1353.74585,1353.74585,1353.74585,0
1,2019-01-02,1358.199097,1362.652208,1335.933538,1358.199097,5035800
2,2019-01-03,1371.55835,1376.011461,1340.386569,1367.105238,15603900
3,2019-01-04,1398.2771,1402.730211,1353.745982,1367.105317,19765600
4,2019-01-07,1420.54248,1433.901814,1407.183147,1407.183147,28281300


### **3. Persiapan Dataset**

In [43]:
#---MULAI---
# Tambahkan target Next_Day_Close
data['Next_Day_Close'] = data['Close'].shift(-1)
data.dropna(inplace=True)

# Pilih fitur dan target
features = ['Open','High','Low','Close']
target = 'Next_Day_Close'

X = data[features]
y = data[target]
#---SELESAI---

### **4. Normalisasi Data**

In [54]:
#---MULAI---
# Normalisasi fitur
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
#---SELESAI---

In [56]:
#export scaler
import pickle
pickle.dump(scaler, open("scaler_xgb.pkl", "wb"))

### **5. Membagi Data Latih dan Uji**

In [46]:
#---MULAI---
# Bagi data menjadi data latih dan uji
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Data Training dan Testing untuk Skenario 2 (XGBoost-PSO)
x_train2 = X_train.copy()  # Salin data latih untuk skenario 2
y_train2 = y_train.copy()
x_test2 = X_test.copy()    # Salin data uji untuk skenario 2
y_test2 = y_test.copy()
#---SELESAI---

### **6. Melatih Model XGBoost**




In [47]:
#---MULAI---
# Train model XGBoost
model = xgb.XGBRegressor(random_state=42)
model.fit(X_train, y_train)

# Simpan model ke file
joblib.dump(model, "xgboost_model_klbf.pkl")
#---SELESAI---

['xgboost_model_klbf.pkl']

### **7. Eksperimen Parameter XGBoost**

#### **7.1 XGBoost tanpa optimasi GridSearch**



In [48]:
# --- MULAI ---
# Training Model XGBoost dengan Parameter default
model_default = XGBRegressor(
    max_depth=6,
    gamma=0,
    reg_lambda=1,
    learning_rate=0.3,
    min_child_weight=1,
    subsample=1,
    colsample_bytree=1
)

model_default.fit(X_train, y_train)

# Simpan model default ke file
joblib.dump(model_default, "xgboost_model_default.pkl")
# --- SELESAI ---

['xgboost_model_default.pkl']

#### **7.2 XGBoost dengan optimasi parameter GridSearch**

In [49]:
# --- MULAI ---
# Training Model XGBoost dengan optimasi parameter GridSearch
from sklearn.model_selection import GridSearchCV

# Parameter yang akan dioptimasi
param_grid = {
    "max_depth" :[3,6,9],
    "gamma" :[0.01,0.1,0.5],
    "reg_lambda" :[0.01, 0.1, 1, 10],
    "learning_rate" :[0.01, 0.1, 0.3],
    "min_child_weight" :[1,5,10],
    "subsample" :[0.5,0.7,0.9],
    "colsample_bytree" :[0.6,0.8,1]
}

# Inisialisasi model XGBoost
model2 = XGBRegressor(random_state=42)

# Inisialisasi GridSearchCV
grid_search = GridSearchCV(
    model2,
    param_grid,
    cv=5,
    n_jobs=-1,
    verbose=2
)

# Lakukan optimasi parameter
grid_search.fit(X_train, y_train)

# Simpan model hasil optimasi ke file
joblib.dump(grid_search.best_estimator_, "xgboost_model_gridsearch.pkl")

# Tampilkan parameter terbaik
print('Best max_depth :', grid_search.best_estimator_.max_depth)
print('Best gamma :', grid_search.best_estimator_.gamma)
print('Best reg_lambda :', grid_search.best_estimator_.reg_lambda)
print('Best learning_rate :', grid_search.best_estimator_.learning_rate)
print('Best min_child_weight :', grid_search.best_estimator_.min_child_weight)
print('Best subsample :', grid_search.best_estimator_.subsample)
print('Best colsample_bytree :', grid_search.best_estimator_.colsample_bytree)



# Simpan model Grid Search ke file setelah eksperimen
print("Model hasil optimasi Grid Search telah disimpan ke file 'xgboost_model_gridsearch.pkl'")
# --- SELESAI ---

Fitting 5 folds for each of 2916 candidates, totalling 14580 fits
Best max_depth : 3
Best gamma : 0.01
Best reg_lambda : 10
Best learning_rate : 0.1
Best min_child_weight : 1
Best subsample : 0.9
Best colsample_bytree : 0.8
Model hasil optimasi Grid Search telah disimpan ke file 'xgboost_model_gridsearch.pkl'


In [50]:
# --- MULAI ---
# Training Best Model XGBoost dengan Parameter Terbaik hasil Gridsearch
best_model = XGBRegressor(
    max_depth=3,  # Hasil dari Gridsearch
    gamma=0.01,  # Hasil dari Gridsearch
    reg_lambda=10,  # Hasil dari Gridsearch
    learning_rate=0.1,  # Hasil dari Gridsearch
    min_child_weight=1,  # Hasil dari Gridsearch
    subsample=0.9,  # Hasil dari Gridsearch
    colsample_bytree=0.8,  # Hasil dari Gridsearch
    random_state=42  # Untuk reproduktifitas
)

# Fit model ke data latih
best_model.fit(x_train2, y_train2)

# Simpan best model ke file
joblib.dump(best_model, "best_xgboost_model_gridsearch.pkl")

print("Best model berhasil dilatih dan disimpan sebagai 'best_xgboost_model_gridseaerch.pkl'")
# --- SELESAI ---

Best model berhasil dilatih dan disimpan sebagai 'best_xgboost_model_gridseaerch.pkl'


### **8. Prediksi dan Evaluasi Hasil**

In [51]:
#---MULAI---
# Langkah 8: Prediksi dan Evaluasi

# Prediksi data uji untuk Model 1 (XGBoost tanpa optimasi PSO)
y_pred = model.predict(X_test)

# Prediksi data uji untuk Model 2 (XGBoost dengan optimasi parameter dari PSO)
y_pred2 = best_model.predict(x_test2)

# Pastikan y_test dan y_test2 adalah numerik
if y_test.dtype == 'object':
    y_test = y_test.astype(float)

if y_test2.dtype == 'object':
    y_test2 = y_test2.astype(float)

# Evaluasi Hasil untuk Model 1
mse1 = mean_squared_error(y_test, y_pred)
rmse1 = np.sqrt(mse1)
mae1 = mean_absolute_error(y_test, y_pred)
mape1 = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
r2_1 = r2_score(y_test, y_pred)

print("=== Model 1: XGBoost tanpa optimasi Gridsearch ===")
print("Mean Squared Error (MSE):", mse1)
print("Root Mean Squared Error (RMSE):", rmse1)
print("Mean Absolute Error (MAE):", mae1)
print("Mean Absolute Percentage Error (MAPE):", mape1)
print("R-squared:", r2_1)

# Evaluasi Hasil untuk Model 2
mse2 = mean_squared_error(y_test2, y_pred2)
rmse2 = np.sqrt(mse2)
mae2 = mean_absolute_error(y_test2, y_pred2)
mape2 = np.mean(np.abs((y_test2 - y_pred2) / y_test2)) * 100
r2_2 = r2_score(y_test2, y_pred2)

print("\n=== Model 2: XGBoost dengan optimasi Gridsearch ===")
print("Mean Squared Error (MSE):", mse2)
print("Root Mean Squared Error (RMSE):", rmse2)
print("Mean Absolute Error (MAE):", mae2)
print("Mean Absolute Percentage Error (MAPE):", mape2)
print("R-squared:", r2_2)

# # Save Model
# pickle.dump(model, open('model_xgboost.pkl', 'wb'))
# pickle.dump(best_model, open('model_xgboost_pso.pkl', 'wb'))

# print("\nModel 1 dan Model 2 berhasil disimpan ke file.")
#---SELESAI---

=== Model 1: XGBoost tanpa optimasi Gridsearch ===
Mean Squared Error (MSE): 1406.367046204681
Root Mean Squared Error (RMSE): 37.501560583590134
Mean Absolute Error (MAE): 27.51816771630527
Mean Absolute Percentage Error (MAPE): 1.8396499395483343
R-squared: 0.976194509693605

=== Model 2: XGBoost dengan optimasi Gridsearch ===
Mean Squared Error (MSE): 919.3785589276656
Root Mean Squared Error (RMSE): 30.321255892981505
Mean Absolute Error (MAE): 22.575748158149977
Mean Absolute Percentage Error (MAPE): 1.5079700599801582
R-squared: 0.9844377344936205


### **9. GUI**

#### **9.1 Backend Flask**

In [84]:
from flask import Flask, render_template, request
import pickle
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import numpy as np

# Load model
model = pickle.load(open("best_xgboost_model_gridsearch.pkl", "rb"))

app = Flask(__name__)

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/predict', methods=["POST"])
def predict():
    close_price = float(request.form['close'])

    # Predict next day close price
    input_data = [[close_price]]
    predicted_close = model.predict(input_data)[0]

    # Example evaluation metrics (static for demo)
    y_test = np.array([1500, 1520, 1535, 1550])
    y_pred = np.array([1495, 1525, 1530, 1545])
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    metrics = {
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'mape': mape,
        'r2': r2
    }

    return render_template(
        'index.html',
        prediction=f"Harga prediksi untuk hari berikutnya: {predicted_close:.2f}",
        metrics=metrics
    )

if __name__ == "__main__":
    app.run(debug=True, port=5000)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
