In [24]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [25]:
# Read data generated from extract.py
df = pd.read_csv("rates.csv")

In [26]:
df.index = pd.to_datetime(df['date'], format="%Y-%m-%d")

In [27]:
fig = px.line(df, x="date", y="rate")
fig.show()

In [28]:
train = df[df.index < pd.to_datetime("2024-07-12", format='%Y-%m-%d')]
test = df[df.index >= pd.to_datetime("2024-07-12", format='%Y-%m-%d')]

In [29]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['date'], y=train['rate'],
                    mode='lines',
                    name='train'))
fig.add_trace(go.Scatter(x=test['date'], y=test['rate'],
                    mode='lines',
                    name='test'))

fig.show()

In [30]:
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
from sklearn.metrics import mean_squared_error

In [31]:
y = train['rate']

In [None]:
p = 1  # AutoRegressive (AR) order
d = 1  # Differencing order
q = 1  # Moving Average (MA) order


ARIMAmodel = ARIMA(y, order = (p, d, q))
ARIMAmodel = ARIMAmodel.fit()

y_pred = ARIMAmodel.get_forecast(len(test.index))
y_pred_df = y_pred.conf_int(alpha = 0.05)
y_pred_df["Predictions"] = ARIMAmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
y_pred_df.index = test.index
y_pred_out = y_pred_df["Predictions"]


min_rmse = np.sqrt(mean_squared_error(test["rate"].values, y_pred_df["Predictions"]))
best_p, best_d, best_q = p, d, q

In [None]:
# grid search the best parameters
for p in range(2, 10):
  for d in range(2, 10):
    for q in range(2, 10):
      ARIMAmodel = ARIMA(y, order = (p, d, q))
      ARIMAmodel = ARIMAmodel.fit(method_kwargs={'maxiter':300})

      y_pred = ARIMAmodel.get_forecast(len(test.index))
      y_pred_df = y_pred.conf_int(alpha = 0.05)
      y_pred_df["Predictions"] = ARIMAmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
      y_pred_df.index = test.index
      y_pred_out = y_pred_df["Predictions"]

      current_rmse = np.sqrt(mean_squared_error(test["rate"].values, y_pred_df["Predictions"]))

      if current_rmse < min_rmse:
        best_p, best_d, best_q = p, d, q
        min_rmse = current_rmse

In [35]:
print(f"Min rmse: {min_rmse}")
print(f"The best paramaters - p: {best_p}, d: {d}, q: {q}")

Min rmse: 0.0017504244358549162
The best paramaters - p: 2, d: 8, q: 4


In [None]:
ARIMAmodel = ARIMA(y, order = (best_p, best_d, best_q))
ARIMAmodel = ARIMAmodel.fit()

y_pred = ARIMAmodel.get_forecast(len(test.index))
y_pred_df = y_pred.conf_int(alpha = 0.05)
y_pred_df["Predictions"] = ARIMAmodel.predict(start = y_pred_df.index[0], end = y_pred_df.index[-1])
y_pred_df.index = test.index
y_pred_out = y_pred_df["Predictions"]

In [37]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['date'], y=train['rate'],
                    mode='lines',
                    name='train'))
fig.add_trace(go.Scatter(x=test['date'], y=test['rate'],
                    mode='lines',
                    name='test'))
fig.add_trace(go.Scatter(x=test['date'], y=y_pred_out,
                    mode='lines',
                    name='predict'))

fig.show()