In [31]:
# Install Prophet jika belum terpasang
# !pip install prophet

import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Load dataset
df = pd.read_csv("data/tiketcom_bestprice.csv", sep='|')

# Filter hanya rute dari Jakarta
df_jkt = df[df['origin'] == 'JKTC'].copy()
df_jkt['depart_date'] = pd.to_datetime(df_jkt['depart_date'])

# Ambil semua tujuan unik dari Jakarta
destinations = df_jkt['destination'].unique()

# Inisialisasi list untuk forecast dan evaluasi
all_forecasts = []
eval_results = []

# Loop untuk setiap tujuan
for dest in destinations:
    df_dest = df_jkt[df_jkt['destination'] == dest]
    
    # Group data harian dan ambil harga minimum per tanggal
    df_daily = df_dest.groupby('depart_date')['best_price'].min().reset_index()
    df_daily.columns = ['ds', 'y']
    
    if len(df_daily) < 30:
        continue  # skip jika data terlalu sedikit
    
    # Split train-test
    df_train = df_daily.iloc[:-7]
    df_test = df_daily.iloc[-7:]

    # Training Prophet
    model = Prophet(daily_seasonality=True)
    model.fit(df_train)
    
    # Forecast 7 hari ke depan
    future = model.make_future_dataframe(periods=7)
    forecast = model.predict(future)

    # Evaluasi
    forecast_test = forecast[['ds', 'yhat']].tail(7)
    y_true = df_test['y'].values
    y_pred = forecast_test['yhat'].values
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    eval_results.append({'destination': dest, 'MAE': mae, 'RMSE': rmse})

    # Simpan prediksi
    forecast_7 = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7)
    forecast_7['destination'] = dest
    all_forecasts.append(forecast_7)

# Gabungkan hasil
result_df = pd.concat(all_forecasts, ignore_index=True)
eval_df = pd.DataFrame(eval_results)

# Tentukan 10 tujuan terpopuler berdasarkan jumlah data (frekuensi) di df_jkt
top_dest = df_jkt['destination'].value_counts().head(10).index.tolist()

import plotly.graph_objs as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=5, cols=2, subplot_titles=[f'Tujuan {d}' for d in top_dest])

for i, dest in enumerate(top_dest):
    subset = result_df[result_df['destination'] == dest]
    row = i // 2 + 1
    col = i % 2 + 1

    fig.add_trace(go.Scatter(x=subset['ds'], y=subset['yhat'], name=f'Forecast {dest}', mode='lines'), row=row, col=col)
    fig.add_trace(go.Scatter(x=subset['ds'], y=subset['yhat_upper'], name='Upper', mode='lines', line=dict(width=0), showlegend=False), row=row, col=col)
    fig.add_trace(go.Scatter(x=subset['ds'], y=subset['yhat_lower'], name='Lower', mode='lines', fill='tonexty', line=dict(width=0), fillcolor='rgba(173,216,230,0.3)', showlegend=False), row=row, col=col)

fig.update_layout(height=1200, width=1000, title_text="Forecast Harga Tiket 7 Hari ke 10 Tujuan Terpopuler dari Jakarta")
fig.show()

# Tampilkan tabel hasil forecasting untuk 10 tujuan terpopuler
for dest in top_dest:
    print(f"\n=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan {dest} ===")
    display(result_df[result_df['destination'] == dest][['ds', 'yhat', 'yhat_lower', 'yhat_upper']].round(2))

import os
import pickle

# Buat folder untuk menyimpan model
os.makedirs("models", exist_ok=True)

# Loop untuk setiap tujuan
for dest in destinations:
    df_dest = df_jkt[df_jkt['destination'] == dest]
    
    df_daily = df_dest.groupby('depart_date')['best_price'].min().reset_index()
    df_daily.columns = ['ds', 'y']
    
    if len(df_daily) < 30:
        continue
    
    df_train = df_daily.iloc[:-7]
    df_test = df_daily.iloc[-7:]

    model = Prophet(daily_seasonality=True)
    model.fit(df_train)

    # 🚀 Export model Prophet ke file .pkl
    with open(f"models/prophet_model_{dest}.pkl", "wb") as f:
        pickle.dump(model, f)

    # Forecast
    future = model.make_future_dataframe(periods=7)
    forecast = model.predict(future)

    forecast_test = forecast[['ds', 'yhat']].tail(7)
    y_true = df_test['y'].values
    y_pred = forecast_test['yhat'].values
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    eval_results.append({'destination': dest, 'MAE': mae, 'RMSE': rmse})

    forecast_7 = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7)
    forecast_7['destination'] = dest
    all_forecasts.append(forecast_7)


# Simpan ke file jika perlu
# result_df.to_csv("forecast_7days_all_destinations.csv", index=False)
# eval_df.to_csv("evaluation_metrics_all_destinations.csv", index=False)


19:10:55 - cmdstanpy - INFO - Chain [1] start processing
19:10:55 - cmdstanpy - INFO - Chain [1] done processing
19:10:56 - cmdstanpy - INFO - Chain [1] start processing
19:10:56 - cmdstanpy - INFO - Chain [1] done processing
19:10:57 - cmdstanpy - INFO - Chain [1] start processing
19:10:57 - cmdstanpy - INFO - Chain [1] done processing
19:10:58 - cmdstanpy - INFO - Chain [1] start processing
19:10:58 - cmdstanpy - INFO - Chain [1] done processing
19:10:59 - cmdstanpy - INFO - Chain [1] start processing
19:10:59 - cmdstanpy - INFO - Chain [1] done processing
19:11:00 - cmdstanpy - INFO - Chain [1] start processing
19:11:00 - cmdstanpy - INFO - Chain [1] done processing
19:11:01 - cmdstanpy - INFO - Chain [1] start processing
19:11:01 - cmdstanpy - INFO - Chain [1] done processing
19:11:01 - cmdstanpy - INFO - Chain [1] start processing
19:11:02 - cmdstanpy - INFO - Chain [1] done processing
19:11:02 - cmdstanpy - INFO - Chain [1] start processing
19:11:02 - cmdstanpy - INFO - Chain [1]


=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan JOG ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
35,2024-02-28,1179271.74,1118539.6,1238079.93
36,2024-02-29,1159670.18,1103113.89,1220507.75
37,2024-03-01,1168128.86,1108173.49,1228710.2
38,2024-03-02,1157739.38,1094830.4,1212627.42
39,2024-03-03,1173640.67,1114295.25,1233812.66
40,2024-03-04,1168617.95,1110432.0,1229040.25
41,2024-03-05,1157828.56,1097501.72,1215740.01



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan YIA ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
42,2024-03-14,351708.67,277639.74,425197.85
43,2024-03-15,390192.01,313971.72,462513.42
44,2024-03-16,346384.65,273017.69,423782.39
45,2024-03-17,353544.88,279397.89,431262.32
46,2024-03-18,346536.35,272033.99,418544.1
47,2024-03-19,346310.46,268516.78,419654.14
48,2024-03-20,348531.61,272582.4,419873.76



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan DPS ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
168,2024-03-14,997107.59,927178.36,1068641.93
169,2024-03-15,1043647.12,968718.08,1112432.97
170,2024-03-16,996527.13,918903.7,1068074.13
171,2024-03-17,990236.13,921167.8,1062693.99
172,2024-03-18,991200.09,922994.05,1063041.69
173,2024-03-19,991835.44,919373.55,1059918.61
174,2024-03-20,1011227.01,944775.47,1084234.81



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan BPN ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
126,2023-07-27,1321373.5,1055268.14,1580222.97
127,2023-07-28,1302354.81,1054546.32,1539998.9
128,2023-07-29,1359291.64,1107757.47,1622775.09
129,2023-07-30,1350986.72,1089477.55,1625862.09
130,2023-07-31,1338801.1,1070206.97,1585406.54
131,2023-08-01,1343531.05,1100338.19,1610106.88
132,2023-08-02,1347090.59,1100660.44,1578414.01



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan LOP ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
175,2023-07-27,1051443.32,958332.95,1151141.1
176,2023-07-28,1079552.61,981055.99,1168129.41
177,2023-07-29,1052344.02,955249.55,1147863.83
178,2023-07-30,1091221.28,992504.06,1187712.78
179,2023-07-31,1095435.23,1001810.43,1192486.8
180,2023-08-01,1059018.91,959298.53,1150561.31
181,2023-08-02,1095612.28,999193.04,1188014.9



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan SUB ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
28,2023-07-26,738906.53,684222.9,792265.24
29,2023-07-27,739645.16,686972.59,795006.52
30,2023-07-28,742183.67,690587.48,794789.49
31,2023-07-29,738858.47,689456.96,794949.73
32,2023-07-30,737643.34,686947.86,790826.48
33,2023-07-31,740522.56,689250.4,789067.98
34,2023-08-01,738623.89,687370.37,791606.29



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan UPG ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
154,2023-07-26,1623868.3,1444161.62,1804777.64
155,2023-07-27,1589226.99,1415761.01,1774475.27
156,2023-07-28,1607622.88,1422700.84,1786715.3
157,2023-07-29,1585584.23,1410445.21,1767540.33
158,2023-07-30,1585136.53,1405520.99,1769128.49
159,2023-07-31,1583501.78,1398990.5,1753203.37
160,2023-08-01,1643045.71,1456176.2,1827320.26



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan KNO ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
84,2023-07-26,865332.01,770176.62,958272.54
85,2023-07-27,850592.47,752843.04,942092.73
86,2023-07-28,867611.86,767520.34,964668.0
87,2023-07-29,845355.73,746236.86,946665.68
88,2023-07-30,858474.01,755700.26,959384.21
89,2023-07-31,852878.21,751773.58,943688.01
90,2023-08-01,839715.03,744487.47,932558.0



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan BDJ ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
133,2023-07-25,1087126.75,880068.86,1274123.32
134,2023-07-26,1117414.91,932186.71,1316072.95
135,2023-07-27,1113550.39,934996.03,1300570.96
136,2023-07-28,1104127.51,910145.67,1286472.27
137,2023-07-29,1111608.44,938283.13,1296759.72
138,2023-07-30,1121920.69,918683.97,1306655.39
139,2023-07-31,1116139.49,927657.25,1318199.63



=== Forecast Harga Tiket 7 Hari ke Depan untuk Tujuan MDC ===


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
161,2023-07-25,2910602.18,2576172.48,3237442.63
162,2023-07-26,2887547.84,2566446.52,3210359.15
163,2023-07-27,2977425.46,2640793.81,3326760.55
164,2023-07-28,2941618.42,2573631.83,3286581.57
165,2023-07-29,2909778.4,2576363.67,3250918.49
166,2023-07-30,2885128.83,2534028.25,3240218.86
167,2023-07-31,2879023.51,2514155.05,3205144.9


19:11:22 - cmdstanpy - INFO - Chain [1] start processing
19:11:23 - cmdstanpy - INFO - Chain [1] done processing
19:11:23 - cmdstanpy - INFO - Chain [1] start processing
19:11:23 - cmdstanpy - INFO - Chain [1] done processing
19:11:24 - cmdstanpy - INFO - Chain [1] start processing
19:11:24 - cmdstanpy - INFO - Chain [1] done processing
19:11:24 - cmdstanpy - INFO - Chain [1] start processing
19:11:25 - cmdstanpy - INFO - Chain [1] done processing
19:11:25 - cmdstanpy - INFO - Chain [1] start processing
19:11:26 - cmdstanpy - INFO - Chain [1] done processing
19:11:26 - cmdstanpy - INFO - Chain [1] start processing
19:11:27 - cmdstanpy - INFO - Chain [1] done processing
19:11:27 - cmdstanpy - INFO - Chain [1] start processing
19:11:27 - cmdstanpy - INFO - Chain [1] done processing
19:11:28 - cmdstanpy - INFO - Chain [1] start processing
19:11:28 - cmdstanpy - INFO - Chain [1] done processing
19:11:28 - cmdstanpy - INFO - Chain [1] start processing
19:11:29 - cmdstanpy - INFO - Chain [1]