## XGBoost

- StandardScaler, MAE/MAPE.
- 1/7/30 forecast: agar kelajak featurelari yo'q bo'lsa, oxirgi qatordagi featurelar takrorlanadi (shu sababli bashoratlar bir xil chiqadi).


In [None]:
print('hi')

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

In [None]:
# Ma'lumotlarni yuklash
p = Path('datasets/usd_rates_ready.csv')
df = pd.read_csv(p)
df.columns = [c.lower() for c in df.columns]
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

# Target = rate, date ishlatilmaydi, qolgan 23 ta ustun feature sifatida
feature_cols = [c for c in df.columns if c not in ['date', 'rate']]

# Outlierlarni IQR bilan yumshatish (faqat targetda)
def clip_iqr(s, k=1.5):
    q1, q3 = s.quantile([0.25, 0.75])
    iqr = q3 - q1
    lower, upper = q1 - k * iqr, q3 + k * iqr
    return s.clip(lower, upper)

df['rate'] = clip_iqr(df['rate'])

# Vaqt bo'yicha train/val/test (70/15/15)
n = len(df)
train_end = int(n * 0.70)
val_end = int(n * 0.85)
train_df = df.iloc[:train_end]
val_df = df.iloc[train_end:val_end]
test_df = df.iloc[val_end:]

# Bashorat kunlari
horizons = [1, 7, 30]

try:
    from xgboost import XGBRegressor
except ImportError:
    raise SystemExit("xgboost o'rnatilmagan: pip install xgboost")

X_train = train_df[feature_cols]
X_val = val_df[feature_cols]
X_test = test_df[feature_cols]
y_train = train_df['rate']
y_val = val_df['rate']
y_test = test_df['rate']

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)
X_test_s = scaler.transform(X_test)

model = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.9,
    colsample_bytree=0.9,
    objective='reg:squarederror',
    random_state=42,
)
model.fit(X_train_s, y_train, eval_set=[(X_val_s, y_val)], verbose=False)

val_pred = model.predict(X_val_s)
test_pred = model.predict(X_test_s)
mae_val = mean_absolute_error(y_val, val_pred)
mape_val = mean_absolute_percentage_error(y_val, val_pred)
mae_test = mean_absolute_error(y_test, test_pred)
mape_test = mean_absolute_percentage_error(y_test, test_pred)
print(f"Val MAE: {mae_val:.2f}, MAPE: {mape_val:.4f}")
print(f"Test MAE: {mae_test:.2f}, MAPE: {mape_test:.4f}")

# 1/7/30 forecast (featurelar takrorlanadi -> prognozlar bir xil bo'lishi mumkin)
last_feat = df[feature_cols].iloc[-1]
future_preds = {}
for h in horizons:
    future_feats = pd.DataFrame(np.repeat([last_feat.values], h, axis=0), columns=feature_cols)
    preds = model.predict(scaler.transform(future_feats))
    future_dates = pd.date_range(df['date'].max() + pd.Timedelta(days=1), periods=h, freq='D')
    future_preds[f'h_{h}'] = pd.DataFrame({'date': future_dates, 'pred_rate': preds})

future_preds['h_30'].head()
