# ML Baseline Models
Notebook ini membangun baseline machine learning berbasis fitur OHLCV harian ETHUSDT.
Dataset processed akan disimpan ulang agar dapat dipakai ulang oleh pipeline berikutnya,
kemudian model linear (Lasso/ElasticNet) serta LightGBM dievaluasi menggunakan skema TimeSeriesSplit
dan window out-of-sample tahun 2023.

In [1]:
from __future__ import annotations

from pathlib import Path
import sys

import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score
import lightgbm as lgb
import joblib
from IPython.display import display


def locate_project_root() -> Path:
    """Cari folder proyek yang menyimpan data dan notebook."""

    current = Path.cwd().resolve()
    for candidate in (current, *current.parents):
        if (candidate / 'data').exists() and (candidate / 'notebooks').exists():
            return candidate
    return current


PROJECT_ROOT = locate_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
DATA_PATH = PROJECT_ROOT / 'data' / 'OKX_ETHUSDT.P, 60.csv'
PROCESSED_PATH = PROJECT_ROOT / 'data' / 'processed' / 'ethusdt_hourly_features.csv'
MODEL_DIR = PROJECT_ROOT / 'outputs' / 'models'
PREDICTION_DIR = PROJECT_ROOT / 'outputs' / 'predictions'
from src.performance.metrics import DEFAULT_BARS_PER_YEAR, summarise_fold_performance
BARS_PER_DAY = 24.0
BARS_PER_YEAR = float(BARS_PER_DAY * 365.0)
HORIZON_HOURS = 5
RETURN_TYPE = 'simple'
WALKFORWARD_SPLITS = 5

PROCESSED_PATH.parent.mkdir(parents=True, exist_ok=True)
MODEL_DIR.mkdir(parents=True, exist_ok=True)
PREDICTION_DIR.mkdir(parents=True, exist_ok=True)


In [2]:
def load_ohlcv(path: Path) -> pd.DataFrame:
    """Load OHLCV CSV dan bersihkan kolomnya."""

    path = Path(path).expanduser()
    if not path.exists():
        raise FileNotFoundError(
            f"Dataset {path} tidak tersedia. Download data terlebih dahulu atau ubah DATA_PATH."
        )

    raw = pd.read_csv(path)
    raw.columns = [col.strip().lower().replace(' ', '_') for col in raw.columns]
    keep = ['time', 'open', 'high', 'low', 'close', 'volume']
    missing = [col for col in keep if col not in raw.columns]
    if missing:
        raise KeyError(f"Missing columns {missing} in {path}")
    ohlcv = raw[keep].copy()
    ohlcv['time'] = pd.to_datetime(ohlcv['time'], utc=True)
    numeric_cols = [col for col in keep if col != 'time']
    ohlcv[numeric_cols] = ohlcv[numeric_cols].apply(pd.to_numeric, errors='coerce')
    ohlcv = ohlcv.dropna().set_index('time').sort_index()
    return ohlcv


def engineer_features(ohlcv: pd.DataFrame) -> pd.DataFrame:
    close = ohlcv['close'].astype(float)
    volume = ohlcv['volume'].astype(float)
    features = pd.DataFrame(index=ohlcv.index)
    features['ret_1d'] = close.pct_change(1)
    features['ret_5d'] = close.pct_change(5)
    features['ret_20d'] = close.pct_change(20)
    features['momentum_20d'] = features['ret_1d'].rolling(20).mean()
    features['momentum_60d'] = features['ret_1d'].rolling(60).mean()
    features['volatility_20d'] = features['ret_1d'].rolling(20).std()
    features['volatility_60d'] = features['ret_1d'].rolling(60).std()
    features['volume_change'] = volume.pct_change(1)
    features['volume_zscore_20d'] = (volume - volume.rolling(20).mean()) / volume.rolling(20).std()
    ema_fast = close.ewm(span=20, adjust=False).mean()
    ema_slow = close.ewm(span=60, adjust=False).mean()
    features['price_ema_spread'] = (ema_fast - ema_slow) / ema_slow
    features['high_low_range'] = (ohlcv['high'] - ohlcv['low']) / close
    return features


def build_dataset(
    ohlcv: pd.DataFrame,
    horizon: int = HORIZON_HOURS,
    return_type: str = RETURN_TYPE,
) -> pd.DataFrame:
    features = engineer_features(ohlcv)
    if return_type == 'log':
        forward_returns = (np.log(ohlcv['close'].shift(-horizon)) - np.log(ohlcv['close']))
    elif return_type == 'simple':
        forward_returns = ohlcv['close'].pct_change(horizon).shift(-horizon)
    else:
        raise ValueError("return_type harus 'simple' atau 'log'.")
    labels = (forward_returns > 0).astype(int).rename('target')
    dataset = features.join(labels).join(forward_returns.rename('future_return'))
    dataset = dataset.replace([np.inf, -np.inf], np.nan).dropna()
    return dataset


ohlcv = load_ohlcv(DATA_PATH)
dataset = build_dataset(ohlcv, horizon=HORIZON_HOURS, return_type=RETURN_TYPE)
print(f"Dataset memiliki {len(dataset)} baris dengan {dataset.shape[1]-2} fitur.")
dataset.to_csv(PROCESSED_PATH)
print(f"Dataset tersimpan ke {PROCESSED_PATH}")

dataset_metadata = pd.DataFrame(
    [
        ("horizon_hours", HORIZON_HOURS),
        ("return_type", RETURN_TYPE),
        ("target_definition", "1 jika forward_return > 0 else 0"),
        ("rows", len(dataset)),
        ("start_time", dataset.index.min()),
        ("end_time", dataset.index.max()),
    ],
    columns=["key", "value"],
).set_index("key")


Dataset memiliki 4559 baris dengan 11 fitur.
Dataset tersimpan ke C:\Users\jefri\backtest\data\processed\ethusdt_hourly_features.csv


In [3]:
feature_columns = [col for col in dataset.columns if col not in ["target", "future_return"]]

splitter = TimeSeriesSplit(n_splits=WALKFORWARD_SPLITS)
train_folds = []
test_folds = []
split_records = []

for fold_id, (train_idx, test_idx) in enumerate(splitter.split(dataset)):
    fold_train = dataset.iloc[train_idx].copy()
    fold_test = dataset.iloc[test_idx].copy()
    train_folds.append((fold_id, fold_train))
    test_folds.append((fold_id, fold_test))
    split_records.append(
        {
            "fold": fold_id,
            "train_start_time": fold_train.index.min(),
            "train_end_time": fold_train.index.max(),
            "test_start_time": fold_test.index.min(),
            "test_end_time": fold_test.index.max(),
            "n_train": len(fold_train),
            "n_test": len(fold_test),
        }
    )

if not train_folds or not test_folds:
    raise ValueError("TimeSeriesSplit gagal menghasilkan fold untuk dataset.")

train = train_folds[-1][1]
test = test_folds[-1][1]

train_split = pd.concat({fold: frame for fold, frame in train_folds}, names=["fold", "time"])
test_split = pd.concat({fold: frame for fold, frame in test_folds}, names=["fold", "time"])
cv_split_summary = pd.DataFrame(split_records).set_index("fold")

train_fold_performance = summarise_fold_performance(
    train_folds, return_column='future_return', bars_per_year=BARS_PER_YEAR
)
test_fold_performance = summarise_fold_performance(
    test_folds, return_column='future_return', bars_per_year=BARS_PER_YEAR
)
print("Performa walk-forward (train folds):")
display(train_fold_performance)
print("Performa walk-forward (test folds):")
display(test_fold_performance)

scaler = StandardScaler()
X_train = pd.DataFrame(
    scaler.fit_transform(train[feature_columns]),
    index=train.index,
    columns=feature_columns,
)
X_test = pd.DataFrame(
    scaler.transform(test[feature_columns]),
    index=test.index,
    columns=feature_columns,
)

def drop_low_variance_features(
    X_tr: pd.DataFrame, X_te: pd.DataFrame, tol: float = 1e-9
) -> tuple[pd.DataFrame, pd.DataFrame, list[str]]:
    variances = X_tr.var(axis=0)
    keep_cols = variances[variances > tol].index.tolist()
    dropped = sorted(set(X_tr.columns) - set(keep_cols))
    if dropped:
        print(
            "Menghapus fitur dengan varians sangat kecil: " + ", ".join(dropped)
        )
    return X_tr[keep_cols], X_te[keep_cols], keep_cols

X_train, X_test, feature_columns = drop_low_variance_features(X_train, X_test)

y_train = train["target"]
y_test = test["target"]
y_test_returns = test["future_return"]

cv_splits = min(WALKFORWARD_SPLITS, len(train) - 1)
if cv_splits < 2:
    raise ValueError("Dataset train terlalu pendek untuk membuat CV splits.")
tscv = TimeSeriesSplit(n_splits=cv_splits)

def get_probabilities(model, X: pd.DataFrame) -> np.ndarray:
    if hasattr(model, "predict_proba"):
        return model.predict_proba(X)[:, 1]
    if hasattr(model, "decision_function"):
        decision = model.decision_function(X)
        return 1.0 / (1.0 + np.exp(-decision))
    preds = model.predict(X)
    return preds.astype(float)

def sharpe_ratio(signal: pd.Series, realized_returns: pd.Series, periods: int = 252) -> float:
    pnl = signal * realized_returns
    std = pnl.std(ddof=0)
    if std == 0 or np.isnan(std):
        return 0.0
    return pnl.mean() / std * np.sqrt(periods)

def rolling_cv_metrics(model, X: pd.DataFrame, y: pd.Series, splitter: TimeSeriesSplit):
    preds = pd.Series(index=y.index, dtype=float)
    accs, aucs = [], []
    for train_idx, val_idx in splitter.split(X):
        X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[train_idx], y.iloc[val_idx]
        estimator = clone(model)
        estimator.fit(X_tr, y_tr)
        probs = get_probabilities(estimator, X_val)
        preds.iloc[val_idx] = probs
        accs.append(accuracy_score(y_val, (probs >= 0.5).astype(int)))
        try:
            aucs.append(roc_auc_score(y_val, probs))
        except ValueError:
            aucs.append(np.nan)
    return preds, {"cv_accuracy": float(np.nanmean(accs)), "cv_auc": float(np.nanmean(aucs))}

def fit_and_evaluate(model, X_tr, y_tr, X_te, y_te, realized_returns):
    estimator = clone(model)
    estimator.fit(X_tr, y_tr)
    probs = get_probabilities(estimator, X_te)
    predictions = (probs >= 0.5).astype(int)
    accuracy = accuracy_score(y_te, predictions)
    auc = roc_auc_score(y_te, probs)
    signal = pd.Series(probs, index=y_te.index)
    signal = 2 * signal - 1
    sharpe = sharpe_ratio(signal, realized_returns.loc[signal.index], periods=24 * 365)
    metrics = {
        "accuracy": float(accuracy),
        "roc_auc": float(auc),
        "signal_sharpe": float(sharpe),
    }
    return estimator, probs, signal, metrics


Performa walk-forward (train folds):


Unnamed: 0_level_0,start_time,end_time,n_bars,mean_return,volatility,annualised_vol,sharpe_ratio,hit_rate
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2025-05-07 23:00:00+00:00,2025-06-08 18:00:00+00:00,764.0,0.002235,0.018246,1.707748,11.465297,0.544503
1,2025-05-07 23:00:00+00:00,2025-07-10 09:00:00+00:00,1523.0,0.001496,0.016719,1.564824,8.372955,0.525279
2,2025-05-07 23:00:00+00:00,2025-08-11 00:00:00+00:00,2282.0,0.001995,0.016035,1.500774,11.64427,0.543821
3,2025-05-07 23:00:00+00:00,2025-09-11 15:00:00+00:00,3041.0,0.001573,0.016203,1.516551,9.087557,0.537981
4,2025-05-07 23:00:00+00:00,2025-10-13 06:00:00+00:00,3800.0,0.001196,0.015863,1.484669,7.054783,0.528158


Performa walk-forward (test folds):


Unnamed: 0_level_0,start_time,end_time,n_bars,mean_return,volatility,annualised_vol,sharpe_ratio,hit_rate
fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2025-06-08 19:00:00+00:00,2025-07-10 09:00:00+00:00,759.0,0.000751,0.014989,1.402921,4.69156,0.505929
1,2025-07-10 10:00:00+00:00,2025-08-11 00:00:00+00:00,759.0,0.002997,0.014513,1.358366,19.325181,0.581028
2,2025-08-11 01:00:00+00:00,2025-09-11 15:00:00+00:00,759.0,0.000306,0.016636,1.557013,1.718917,0.520422
3,2025-09-11 16:00:00+00:00,2025-10-13 06:00:00+00:00,759.0,-0.000317,0.014318,1.340084,-2.073497,0.488801
4,2025-10-13 07:00:00+00:00,2025-11-13 21:00:00+00:00,759.0,-0.0015,0.018209,1.70428,-7.709342,0.492754


In [4]:
linear_models = {
    "lasso": LogisticRegression(penalty="l1", solver="liblinear", max_iter=5000, random_state=42),
    "elasticnet": LogisticRegression(
        penalty="elasticnet",
        solver="saga",
        l1_ratio=0.5,
        max_iter=5000,
        random_state=42,
    ),
}

linear_results = {}
for name, model in linear_models.items():
    cv_preds, cv_metrics = rolling_cv_metrics(model, X_train, y_train, tscv)
    estimator, probs, signal, test_metrics = fit_and_evaluate(
        model, X_train, y_train, X_test, y_test, y_test_returns
    )
    linear_results[name] = {
        "model": estimator,
        "cv_metrics": cv_metrics,
        "test_metrics": test_metrics,
    }
    print(f"[{name}] CV metrics: {cv_metrics}")
    print(f"[{name}] Test metrics: {test_metrics}")
linear_results

linear_metrics = pd.DataFrame.from_dict(
    {
        name: {
            "cv_accuracy": result['cv_metrics'].get('cv_accuracy'),
            "cv_auc": result['cv_metrics'].get('cv_auc'),
            "test_accuracy": result['test_metrics'].get('accuracy'),
            "test_roc_auc": result['test_metrics'].get('roc_auc'),
            "test_signal_sharpe": result['test_metrics'].get('signal_sharpe'),
        }
        for name, result in linear_results.items()
    },
    orient="index",
).rename_axis("model")
linear_metrics["deployment_decision"] = np.where(
    linear_metrics["test_signal_sharpe"] > 0,
    "candidate",
    "reject_negative_sharpe",
)
linear_metrics


[lasso] CV metrics: {'cv_accuracy': 0.5071090047393365, 'cv_auc': 0.5024398434382649}
[lasso] Test metrics: {'accuracy': 0.5046113306982872, 'roc_auc': 0.5204805889297868, 'signal_sharpe': -4.529764479797216}
[elasticnet] CV metrics: {'cv_accuracy': 0.5077409162717219, 'cv_auc': 0.502076034847968}
[elasticnet] Test metrics: {'accuracy': 0.5019762845849802, 'roc_auc': 0.520876449753455, 'signal_sharpe': -4.540199898666112}


Unnamed: 0_level_0,cv_accuracy,cv_auc,test_accuracy,test_roc_auc,test_signal_sharpe,deployment_decision
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
lasso,0.507109,0.50244,0.504611,0.520481,-4.529764,reject_negative_sharpe
elasticnet,0.507741,0.502076,0.501976,0.520876,-4.5402,reject_negative_sharpe


In [5]:
X_train_lgb = X_train.astype(np.float32)
X_test_lgb = X_test.astype(np.float32)

lgb_model = lgb.LGBMClassifier(
    objective="binary",
    boosting_type="gbdt",
    n_estimators=800,
    learning_rate=0.03,
    num_leaves=31,
    max_depth=-1,
    subsample=0.8,
    colsample_bytree=0.8,
    min_child_samples=5,
    min_child_weight=1e-3,
    feature_pre_filter=False,
    random_state=42,
)

_, lgb_cv_metrics = rolling_cv_metrics(lgb_model, X_train_lgb, y_train, tscv)
lgb_fitted, lgb_probs, lgb_signal, lgb_test_metrics = fit_and_evaluate(
    lgb_model, X_train_lgb, y_train, X_test_lgb, y_test, y_test_returns
)

print("[lightgbm] CV metrics:", lgb_cv_metrics)
print("[lightgbm] Test metrics:", lgb_test_metrics)

joblib.dump(lgb_fitted, MODEL_DIR / "lightgbm_ml_baseline.pkl")
prediction_frame = pd.DataFrame(
    {
        "probability": lgb_probs,
        "signal": lgb_signal,
        "future_return": y_test_returns.loc[X_test.index],
    },
    index=X_test.index,
)
prediction_frame["position"] = np.sign(prediction_frame["signal"])
prediction_frame["pnl"] = prediction_frame["position"] * prediction_frame["future_return"]
prediction_path = PREDICTION_DIR / "lightgbm_ml_baseline_predictions.csv"
prediction_frame.to_csv(prediction_path, index_label="time")
prediction_frame.head()

try:
    probability_bins = pd.qcut(
        prediction_frame["probability"], q=10, duplicates="drop"
    )
except ValueError:
    probability_bins = pd.cut(prediction_frame["probability"], bins=5)
probability_calibration = (
    prediction_frame.assign(prob_bucket=probability_bins)
    .groupby("prob_bucket", observed=False)
    .agg(
        sample_size=("future_return", "size"),
        avg_future_return=("future_return", "mean"),
        avg_signal=("signal", "mean"),
        avg_position=("position", "mean"),
    )
)

lgb_metrics = pd.DataFrame(
    {
        "cv_accuracy": [lgb_cv_metrics.get('cv_accuracy')],
        "cv_auc": [lgb_cv_metrics.get('cv_auc')],
        "test_accuracy": [lgb_test_metrics.get('accuracy')],
        "test_roc_auc": [lgb_test_metrics.get('roc_auc')],
        "test_signal_sharpe": [lgb_test_metrics.get('signal_sharpe')],
    },
    index=pd.Index(["lightgbm"], name="model"),
)
lgb_metrics


[LightGBM] [Info] Number of positive: 347, number of negative: 288
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2332
[LightGBM] [Info] Number of data points in the train set: 635, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.546457 -> initscore=0.186364
[LightGBM] [Info] Start training from score 0.186364
[LightGBM] [Info] Number of positive: 673, number of negative: 595
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000323 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2805
[LightGBM] [Info] Number of data points in the train set: 1268, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.530757 -> initscore=0.123184
[LightGBM] [Info] Start training from score 0.123184
[LightGBM] [Info] Number of

Unnamed: 0_level_0,cv_accuracy,cv_auc,test_accuracy,test_roc_auc,test_signal_sharpe
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
lightgbm,0.509953,0.524022,0.541502,0.547663,5.469826


In [6]:
from pathlib import Path
import importlib.util
import pandas as pd
from datetime import datetime, timezone
try:
    PROJECT_ROOT
except NameError:  # pragma: no cover - notebook convenience
    PROJECT_ROOT = Path.cwd()



def export_tables_to_excel(tables, path: Path) -> Path:
    def strip_timezone_from_value(value):
        if value is pd.NaT:
            return value
        if isinstance(value, pd.Timestamp):
            if value.tz is not None:
                return value.tz_convert("UTC").tz_localize(None)
            return value
        if isinstance(value, datetime):
            if value.tzinfo is not None:
                return value.astimezone(timezone.utc).replace(tzinfo=None)
            return value
        return value

    def strip_timezone_from_axis(axis):
        if isinstance(axis, pd.MultiIndex):
            new_levels = [strip_timezone_from_axis(level) for level in axis.levels]
            return axis.set_levels(new_levels)
        if isinstance(axis, pd.DatetimeIndex) and axis.tz is not None:
            return axis.tz_convert("UTC").tz_localize(None)
        if getattr(axis, "dtype", None) == object:
            return pd.Index([strip_timezone_from_value(val) for val in axis], name=axis.name)
        return axis

    def make_excel_safe(frame: pd.DataFrame) -> pd.DataFrame:
        frame = frame.copy()
        frame.index = strip_timezone_from_axis(frame.index)
        frame.columns = strip_timezone_from_axis(frame.columns)
        for column in frame.columns:
            series = frame[column]
            if isinstance(series.dtype, pd.DatetimeTZDtype):
                frame[column] = series.dt.tz_convert("UTC").dt.tz_localize(None)
            elif series.dtype == object:
                frame[column] = series.map(strip_timezone_from_value)
        return frame

    serialisable = []
    for sheet_name, table in tables.items():
        if table is None:
            continue
        if isinstance(table, pd.Series):
            frame = table.to_frame()
        elif isinstance(table, pd.DataFrame):
            frame = table.copy()
        elif isinstance(table, dict):
            frame = pd.DataFrame([table])
        else:
            frame = pd.DataFrame(table)
        frame = make_excel_safe(frame)
        serialisable.append((sheet_name, frame))

    if not serialisable:
        raise ValueError("Tidak ada tabel yang bisa diekspor.")

    path.parent.mkdir(parents=True, exist_ok=True)

    def pick_engine() -> str:
        for candidate in ("openpyxl", "xlsxwriter"):
            if importlib.util.find_spec(candidate):
                return candidate
        raise ModuleNotFoundError(
            "Untuk ekspor Excel diperlukan paket 'openpyxl' atau 'xlsxwriter'."
        )

    def normalise_sheet_name(name: str, existing) -> str:
        safe = (name or "Sheet").strip() or "Sheet"
        safe = safe[:31]
        counter = 1
        candidate = safe
        while candidate in existing:
            suffix = f"_{counter}"
            trimmed = safe[: 31 - len(suffix)] or "Sheet"
            candidate = f"{trimmed}{suffix}"
            counter += 1
        existing.add(candidate)
        return candidate

    engine = pick_engine()
    used_names = set()
    with pd.ExcelWriter(path, engine=engine) as writer:
        for sheet_name, frame in serialisable:
            name = normalise_sheet_name(str(sheet_name), used_names)
            frame.to_excel(writer, sheet_name=name, index=True)

    print(
        f"Berhasil mengekspor {len(serialisable)} sheet ke {path} (engine: {engine})"
    )
    return path
export_dir = PROJECT_ROOT / "outputs" / "result-test"
export_path = export_dir / "ml_baseline.xlsx"

export_tables_to_excel(
    {
        "dataset": dataset,
        "dataset_metadata": dataset_metadata,
        "train_split": train_split,
        "test_split": test_split,
        "cv_split_summary": cv_split_summary,
        "train_fold_performance": train_fold_performance,
        "test_fold_performance": test_fold_performance,
        "final_train_window": train,
        "final_test_window": test,
        "linear_model_metrics": linear_metrics,
        "lightgbm_metrics": lgb_metrics,
        "probability_calibration": probability_calibration,
        "predictions": prediction_frame,
    },
    export_path,
)


Berhasil mengekspor 13 sheet ke C:\Users\jefri\backtest\outputs\result-test\ml_baseline.xlsx (engine: openpyxl)


WindowsPath('C:/Users/jefri/backtest/outputs/result-test/ml_baseline.xlsx')