Import libraries

In [1]:
!pip install pandas numpy xgboost scikit-learn jupyter talib

Collecting jupyter
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)


ERROR: Could not find a version that satisfies the requirement talib (from versions: none)
ERROR: No matching distribution found for talib


In [5]:
!python -m pip install TA-Lib





In [4]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
import talib
import warnings
warnings.filterwarnings('ignore')

In [17]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
import talib
import warnings

warnings.filterwarnings('ignore')

class BitcoinStrategy:
    def __init__(self, fee_rate=0.001):
        self.fee_rate = fee_rate
        self.scaler = StandardScaler()
        
    def add_technical_indicators(self, df):    
        """Add technical indicators to the DataFrame."""
        df = df.copy()
        
        # Ensure we have OHLCV columns
        high = df['High'].values
        low = df['Low'].values
        close = df['Close'].values
        volume = df['Volume'].values
        
        # Moving Averages
        df['sma_20'] = talib.SMA(close, timeperiod=20)
        df['sma_50'] = talib.SMA(close, timeperiod=50)
        df['ema_20'] = talib.EMA(close, timeperiod=20)
        
        # MACD
        df['macd'], df['macd_signal'], _ = talib.MACD(close)
        
        # RSI
        df['rsi'] = talib.RSI(close, timeperiod=14)
        
        # Bollinger Bands
        df['bb_upper'], df['bb_middle'], df['bb_lower'] = talib.BBANDS(close)
        
        # ATR
        df['atr'] = talib.ATR(high, low, close, timeperiod=14)
        
        # Price Action Features
        df['daily_return'] = df['Close'].pct_change()  # Use pct_change() directly on the DataFrame
        df['high_low_range'] = df['High'] - df['Low']  # Calculate high-low range
        df['close_open_range'] = df['Close'] - df['Open']  # Calculate close-open range
        
        # Clean up NaN values
        df.dropna(inplace=True)
        
        return df
    
    def prepare_features_targets(self, df):
        """Prepare features and targets for modeling."""
        df = df.copy()
        df['Open time'] = pd.to_datetime(df['Open time'])
        df.set_index('Open time', inplace=True)
        
        # Drop unnecessary columns
        df.drop(columns=['Close time', 'Ignore'], inplace=True, errors='ignore')
        
        # Add technical indicators
        df = self.add_technical_indicators(df)
        
        # Create target returns
        df['target_return'] = df['Close'].pct_change(-1)
        
        # Clean data
        df.dropna(inplace=True)
        
        # Ensure all columns are numeric
        df = df.apply(pd.to_numeric, errors='coerce')
        
        return df
    
    def select_features(self, df):
        """Select features for the model."""
        exclude_cols = ['Close', 'Volume', 'target_return']
        features = [col for col in df.columns if col not in exclude_cols]
        return features
    
    def rolling_predict(self, df, horizon=1, window_size=300):
        """Perform rolling predictions."""
        predictions = []
        target_col = 'target_return'
        
        # Select features
        features = self.select_features(df)
        
        # Initialize model
        model = XGBRegressor(
            n_estimators=100,
            max_depth=4,
            learning_rate=0.1,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42
        )
        
        for i in range(window_size, len(df) - horizon):
            # Get training data
            train_df = df.iloc[i - window_size:i]
            test_row = df.iloc[i]
            
            X_train = train_df[features]
            y_train = train_df[target_col]
            
            # Remove NaN values
            valid_idx = ~(X_train.isna().any(axis=1) | y_train.isna())
            X_train = X_train[valid_idx]
            y_train = y_train[valid_idx]
            
            if len(X_train) < 50:  # Minimum requirement
                continue
            
            # Train model
            model.fit(X_train, y_train)
            
            # Make prediction
            X_test = test_row[features].values.reshape(1, -1)
            if np.any(np.isnan(X_test)):
                continue
                
            predicted_return = model.predict(X_test)[0]
            predicted_return = np.clip(predicted_return, -0.15, 0.15)  # Clipping
            
            # Get actual return
            actual_return = df.iloc[i + horizon - 1][target_col]
            
            # Calculate net return
            signal = np.sign(predicted_return)
            gross_return = signal * actual_return
            net_return = gross_return - self.fee_rate
            
            predictions.append({
                'date': df.index[i],
                'predicted_return': predicted_return,
                'actual_return': actual_return,
                'net_return': net_return,
                'signal': signal
            })
        
        return pd.DataFrame(predictions)
    
    def compute_metrics(self, results_df):
        """Calculate performance metrics."""
        if len(results_df) == 0:
            return {"Error": "No predictions generated"}
        
        # Calculate cumulative returns
        results_df['cumulative_return'] = (1 + results_df['net_return']).cumprod()
        total_return = results_df['cumulative_return'].iloc[-1] - 1
        
        # Time-based metrics
        days = (results_df['date'].iloc[-1] - results_df['date'].iloc[0]).days
        annual_return = (1 + total_return) ** (365 / days) - 1
        
        # Risk metrics
        daily_returns = results_df['net_return']
        volatility = daily_returns.std() * np.sqrt(365)
        sharpe_ratio = (annual_return - 0.02) / volatility if volatility > 0 else 0
        
        # Drawdown
        cumulative = results_df['cumulative_return']
        running_max = cumulative.expanding().max()
        drawdown = (cumulative - running_max) / running_max
        max_drawdown = drawdown.min()
        
        # Trading statistics
        win_rate = (daily_returns > 0).sum() / len(daily_returns)
        num_trades = (results_df['signal'] != 0).sum()
        
        return {
            "Total Return": f"{total_return*100:.2f}%",
            "Annualized Return": f"{annual_return*100:.2f}%",
            "Volatility (Annual)": f"{volatility*100:.2f}%",
            "Sharpe Ratio": round(sharpe_ratio, 3),
            "Max Drawdown": f"{max_drawdown*100:.2f}%",
            "Win Rate": f"{win_rate*100:.2f}%",
            "Total Trades": int(num_trades)
        }


In [18]:
def run_robust_strategy(df_path):
    """Run the robust strategy."""
    
    # Load data
    df = pd.read_csv(df_path)
    
    # Initialize strategy
    strategy = BitcoinStrategy(fee_rate=0.001)
    
    # Prepare data
    print("Preparing features and technical indicators...")
    df = strategy.prepare_features_targets(df)
    print(f"Data shape after feature engineering: {df.shape}")
    
    # Test different horizons
    horizons = [1, 7, 30, 180, 365]
    results = {}
    
    for horizon in horizons:
        print(f"\n=== Testing {horizon}-day horizon ===")
        try:
            predictions = strategy.rolling_predict(df, horizon=horizon)
            metrics = strategy.compute_metrics(predictions)
            results[f"{horizon}_day"] = {
                'metrics': metrics,
                'predictions': predictions
            }
            
            print(f"Results for {horizon}-day strategy:")
            for key, value in metrics.items():
                print(f"  {key}: {value}")
                
        except Exception as e:
            print(f"Error with {horizon}-day horizon: {e}")
            results[f"{horizon}_day"] = {"Error": str(e)}
    
    return results

In [19]:
results = run_robust_strategy('btc_1d_data_2018_to_2025.csv')

Preparing features and technical indicators...
Data shape after feature engineering: (2655, 23)

=== Testing 1-day horizon ===
Results for 1-day strategy:
  Total Return: -99.93%
  Annualized Return: -67.59%
  Volatility (Annual): 67.90%
  Sharpe Ratio: -1.025
  Max Drawdown: -99.95%
  Win Rate: 46.52%
  Total Trades: 2354

=== Testing 7-day horizon ===
Results for 7-day strategy:
  Total Return: 231.60%
  Annualized Return: 20.49%
  Volatility (Annual): 67.68%
  Sharpe Ratio: 0.273
  Max Drawdown: -63.58%
  Win Rate: 49.96%
  Total Trades: 2348

=== Testing 30-day horizon ===
Results for 30-day strategy:
  Total Return: -99.07%
  Annualized Return: -52.03%
  Volatility (Annual): 67.72%
  Sharpe Ratio: -0.798
  Max Drawdown: -99.29%
  Win Rate: 47.66%
  Total Trades: 2325

=== Testing 180-day horizon ===
Results for 180-day strategy:
  Total Return: -92.08%
  Annualized Return: -34.67%
  Volatility (Annual): 68.37%
  Sharpe Ratio: -0.536
  Max Drawdown: -92.29%
  Win Rate: 48.41%
  Tot