In [None]:
!pip install pandas

In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from datetime import timedelta

df = pd.read_csv("btc_1d_data_2018_to_2025.csv")

# --- Step 1: Data Cleaning & Feature Engineering ---
def prepare_features_targets(df):
    df = df.copy()
    df['Open time'] = pd.to_datetime(df['Open time'])
    df = df.sort_values('Open time')
    df.set_index('Open time', inplace=True)
    
    # Drop unnecessary columns
    df.drop(columns=['Close time', 'Ignore'], inplace=True, errors='ignore')

    # Create rolling features
    df['return_1d'] = df['Close'].pct_change()
    df['sma_7'] = df['Close'].rolling(7).mean()
    df['sma_30'] = df['Close'].rolling(30).mean()
    df['volatility_7'] = df['Close'].rolling(7).std()
    df['volatility_30'] = df['Close'].rolling(30).std()

    # Lag features
    for lag in range(1, 6):
        df[f'lag_close_{lag}'] = df['Close'].shift(lag)

    # Target creation
    df['target_t+1'] = df['Close'].shift(-1)
    df['target_t+7'] = df['Close'].shift(-7)
    df['target_t+30'] = df['Close'].shift(-30)
    df['target_t+180'] = df['Close'].shift(-180)
    df['target_t+365'] = df['Close'].shift(-365)

    df.dropna(inplace=True)
    return df

# --- Step 2: Rolling Window Training ---
def rolling_predict(df, horizon, window_size=1000, hold_days=1, fee_rate=0.001):
    predictions = []
    df = df.copy()
    targets = f'target_t+{horizon}'
    features = [col for col in df.columns if col not in ['Open', 'High', 'Low', 'Volume',
                                                         'Number of trades', 'Taker buy base asset volume',
                                                         'Taker buy quote asset volume', 'Close',
                                                         'target_t+1', 'target_t+7', 'target_t+30',
                                                         'target_t+180', 'target_t+365']]

    for i in range(window_size, len(df) - horizon):
        train_df = df.iloc[i - window_size:i]
        test_row = df.iloc[i]

        X_train = train_df[features]
        y_train = train_df[targets]

        model = XGBRegressor(n_estimators=200, max_depth=4, learning_rate=0.05,
                             subsample=0.8, colsample_bytree=0.8, random_state=42, tree_method='hist')
        model.fit(X_train, y_train)

        pred_price = model.predict(test_row[features].values.reshape(1, -1))[0]
        current_price = test_row['Close']
        future_price = df.iloc[i + horizon]['Close']

        predicted_return = (pred_price - current_price) / current_price
        actual_return = (future_price - current_price) / current_price

        # Long/Short logic
        direction = np.sign(predicted_return)
        gross_return = direction * actual_return
        net_return = gross_return - 2 * fee_rate if direction != 0 else 0

        predictions.append({
            'date': df.index[i],
            'predicted_return': predicted_return,
            'actual_return': actual_return,
            'net_return': net_return,
            'direction': direction
        })

    return pd.DataFrame(predictions)

# --- Step 3: Metrics Calculation ---
def compute_metrics(results_df, fee_rate=0.001):
    total_return = (1 + results_df['net_return']).prod() - 1
    sharpe_ratio = results_df['net_return'].mean() / results_df['net_return'].std() * np.sqrt(365)
    drawdown = (1 + results_df['net_return']).cumprod()
    max_drawdown = (drawdown / drawdown.cummax() - 1).min()
    win_rate = (results_df['net_return'] > 0).sum() / len(results_df)
    num_trades = (results_df['direction'] != 0).sum()
    total_fees = num_trades * 2 * fee_rate
    hours = (results_df['date'].iloc[-1] - results_df['date'].iloc[0]).total_seconds() / 3600
    trades_per_hour = num_trades / hours
    profit_factor = results_df[results_df['net_return'] > 0]['net_return'].sum() / abs(
        results_df[results_df['net_return'] < 0]['net_return'].sum())

    annual_returns = results_df.copy()
    annual_returns['year'] = annual_returns['date'].dt.year
    annual_returns = annual_returns.groupby('year')['net_return'].apply(lambda x: (1 + x).prod() - 1)

    return {
        "Total Return": f"{total_return*100:.2f}%",
        "Sharpe Ratio (Annualized)": round(sharpe_ratio, 2),
        "Max Drawdown": f"{max_drawdown*100:.2f}%",
        "Win Rate": f"{win_rate*100:.2f}%",
        "Total Number of Trades": int(num_trades),
        "Total Trading Fees": round(total_fees, 4),
        "Trading Period (hours)": int(hours),
        "Trades per Hour": round(trades_per_hour, 4),
        "Profit Factor": round(profit_factor, 2),
        "Annual Returns": annual_returns.to_dict()
    }

df = prepare_features_targets(df)
results = rolling_predict(df, horizon=30)  # or 7, 30, 180, 365
metrics = compute_metrics(results)
print(metrics)


{'Total Return': '-99.54%', 'Sharpe Ratio (Annualized)': -2.0, 'Max Drawdown': '-99.56%', 'Win Rate': '47.10%', 'Total Number of Trades': 1310, 'Total Trading Fees': 2.62, 'Trading Period (hours)': 31416, 'Trades per Hour': 0.0417, 'Profit Factor': 0.74, 'Annual Returns': {2020: -0.6299072203050021, 2021: -0.8411209186317687, 2022: -0.47259199973130106, 2023: -0.7306915971582764, 2024: -0.4501012007211469}}
