# SARIMAX (23 exogenous) ? 1/7/30 kunlik forecast

- Target: `rate`, exog: qolgan 23 ustun.
- Outlier clip, 70/15/15 split, MAE/MAPE.
- 1/7/30 forecast: exog kelajak bo'lmasa, oxirgi qatordagi featurelar takrorlanadi.


In [1]:
print('hi')

hi


In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

In [4]:
# Ma'lumotlarni yuklash
p = Path('datasets/usd_rates_ready.csv')
df = pd.read_csv(p)
df.columns = [c.lower() for c in df.columns]
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

# Target = rate, date ishlatilmaydi, qolgan 23 ta ustun feature sifatida
feature_cols = [c for c in df.columns if c not in ['date', 'rate']]

# Outlierlarni IQR bilan yumshatish (faqat targetda)
def clip_iqr(s, k=1.5):
    q1, q3 = s.quantile([0.25, 0.75])
    iqr = q3 - q1
    lower, upper = q1 - k * iqr, q3 + k * iqr
    return s.clip(lower, upper)

df['rate'] = clip_iqr(df['rate'])

# Vaqt bo'yicha train/val/test (70/15/15)
n = len(df)
train_end = int(n * 0.70)
val_end = int(n * 0.85)
train_df = df.iloc[:train_end]
val_df = df.iloc[train_end:val_end]
test_df = df.iloc[val_end:]

# Bashorat ufqlari
horizons = [1, 7, 30]

try:
    import statsmodels.api as sm
except ImportError:
    raise SystemExit("statsmodels o'rnatilmagan: pip install statsmodels")

X_train = train_df[feature_cols]
X_val = val_df[feature_cols]
X_test = test_df[feature_cols]

order = (1, 1, 1)
seasonal_order = (0, 0, 0, 7)
model = sm.tsa.statespace.SARIMAX(train_df['rate'], order=order, seasonal_order=seasonal_order,
                                 exog=X_train, enforce_stationarity=False, enforce_invertibility=False)
res = model.fit(disp=False)

# Val + test bashoratini bitta chaqiriqda olish (exog uzunligi mos kelsin)
future_exog = pd.concat([X_val, X_test])
future_pred = res.predict(start=len(train_df), end=len(df)-1, exog=future_exog)

val_len = len(val_df)
val_pred = future_pred.iloc[:val_len]
test_pred = future_pred.iloc[val_len:]

mae_val = mean_absolute_error(val_df['rate'], val_pred)
rmse_val = root_mean_squared_error(val_df['rate'], val_pred)

mae_test = mean_absolute_error(test_df['rate'], test_pred)
rmse_test = root_mean_squared_error(test_df['rate'], test_pred)

print(f"Val MAE: {mae_val:.2f}, RMSE: {rmse_val:.4f}")
print(f"Test MAE: {mae_test:.2f}, RMSE: {rmse_test:.4f}")

# 1/7/30 forecast
last_feat = df[feature_cols].iloc[-1].values
horizon_forecasts = {}
for h in horizons:
    future_exog = np.repeat([last_feat], h, axis=0)
    fc = res.forecast(steps=h, exog=future_exog)
    horizon_forecasts[f'h_{h}'] = fc

horizon_forecasts['h_30'].head()




Val MAE: 0.00, RMSE: 0.0000
Test MAE: 0.00, RMSE: 0.0000


1800    11990.09
1801    11990.09
1802    11990.09
1803    11990.09
1804    11990.09
Name: predicted_mean, dtype: float64