# USD/IDR Forecast (ARIMA + LSTM)

Replicates the methodology of Lakhal (2024) on USD/IDR using only open-source libraries.

**Pipeline**: Fetch data → Stationarity tests (ADF, KPSS) → ARIMA (or ARMA) → LSTM → Compare MAE/MSE.

**Note**: If the LSTM section is too heavy for Binder, set `RUN_LSTM = False` below to run ARIMA-only.

In [None]:
RUN_LSTM = False  # set to False if Binder is slow
START = '2018-01-01'
END = None  # use latest
FREQ = 'M'  # monthly to match the paper

In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

import yfinance as yf
symbol = 'USDIDR=X'
raw = yf.download(symbol, start=START, end=END)
if raw.empty:
    raise RuntimeError('No data fetched. Try a different date range or check network access.')

# Monthly close
ts = raw['Close'].resample(FREQ).last().dropna()
ts.name = 'USDIDR'
display(ts.head(), ts.tail())
ts.plot(title=f'{symbol} ({FREQ})'); plt.show()

In [None]:
# Stationarity tests: ADF, KPSS
from statsmodels.tsa.stattools import adfuller, kpss

adf_stat, adf_p, _, _, adf_crit, _ = adfuller(ts.values, autolag='AIC')
kpss_stat, kpss_p, kpss_lags, kpss_crit = kpss(ts.values, regression='c', nlags='auto')

print('ADF: stat=%.4f, p=%.4f' % (adf_stat, adf_p))
print('ADF crit:', adf_crit)
print('KPSS: stat=%.4f, p=%.4f, lags=%s' % (kpss_stat, kpss_p, kpss_lags))
print('KPSS crit:', kpss_crit)

In [None]:
# Train/test split: last 12 months as test (like the paper)
h = 12
train = ts.iloc[:-h]
test = ts.iloc[-h:]
train.tail(), test.head()

In [None]:
# ARIMA/ARMA via pmdarima auto_arima (fallback to ARIMA(1,1,1) if needed)
import pmdarima as pm
try:
    auto = pm.auto_arima(train, seasonal=False, stepwise=True, suppress_warnings=True, information_criterion='aic')
    print(auto.summary())
    arima_forecast = auto.predict(n_periods=h)
except Exception as e:
    print('auto_arima failed, fallback to simple ARIMA(1,1,1):', e)
    from statsmodels.tsa.arima.model import ARIMA
    mdl = ARIMA(train, order=(1,1,1)).fit()
    arima_forecast = mdl.forecast(steps=h)

arima_pred = pd.Series(arima_forecast, index=test.index, name='ARIMA')
mae_arima = mean_absolute_error(test, arima_pred)
mse_arima = mean_squared_error(test, arima_pred)
print('ARIMA MAE=%.6f MSE=%.6f' % (mae_arima, mse_arima))

ax = ts.plot(label='Actual')
arima_pred.plot(ax=ax, label='ARIMA forecast')
plt.legend(); plt.title('ARIMA vs Actual'); plt.show()

In [None]:
# LSTM (optional)
lstm_results = None
if RUN_LSTM:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train.to_frame())

    # create sequences
    def make_sequences(a, n_steps=6):
        X, y = [], []
        for i in range(len(a)-n_steps):
            X.append(a[i:i+n_steps, 0])
            y.append(a[i+n_steps, 0])
        X = np.array(X)
        y = np.array(y)
        return X.reshape((X.shape[0], X.shape[1], 1)), y

    n_steps = 6
    X, y = make_sequences(train_scaled, n_steps)

    model = Sequential([
        LSTM(32, activation='tanh', input_shape=(n_steps,1)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X, y, epochs=200, batch_size=8, verbose=0)

    # Recursive forecasting for h months ahead
    history = train_scaled.copy()
    last_seq = history[-n_steps:, 0].reshape((1, n_steps, 1))
    preds_scaled = []
    for _ in range(h):
        yhat = model.predict(last_seq, verbose=0)[0,0]
        preds_scaled.append(yhat)
        last_seq = np.append(last_seq.flatten()[1:], yhat).reshape((1, n_steps, 1))

    lstm_pred = scaler.inverse_transform(np.array(preds_scaled).reshape(-1,1)).flatten()
    lstm_pred = pd.Series(lstm_pred, index=test.index, name='LSTM')
    mae_lstm = mean_absolute_error(test, lstm_pred)
    mse_lstm = mean_squared_error(test, lstm_pred)
    print('LSTM MAE=%.6f MSE=%.6f' % (mae_lstm, mse_lstm))
    ax = ts.plot(label='Actual')
    lstm_pred.plot(ax=ax, label='LSTM forecast')
    plt.legend(); plt.title('LSTM vs Actual'); plt.show()
    lstm_results = (mae_lstm, mse_lstm)
else:
    print('LSTM skipped. Set RUN_LSTM=True to run it (may be slower on Binder).')

In [None]:
# Comparison table
rows = [{'Model':'ARIMA','MAE':mae_arima,'MSE':mse_arima}]
if lstm_results:
    rows.append({'Model':'LSTM','MAE':lstm_results[0],'MSE':lstm_results[1]})
df_metrics = pd.DataFrame(rows)
display(df_metrics)
df_metrics.plot(x='Model', y=['MAE','MSE'], kind='bar'); plt.title('Metrics'); plt.show()

### Notes
- Using **monthly** data mirrors the paper. You can switch `FREQ='D'` for daily.
- If ADF says **non-stationary**, `auto_arima` will difference the series.
- The LSTM is intentionally small to run on Binder's limited CPU.