In [6]:
import pandas as pd
import yfinance as yf

# Fetch BTC data
btc = yf.download("BTC-USD", period="5y", interval="1d")

# Flatten MultiIndex columns
btc.columns = [col[0] for col in btc.columns]  # Extract first level (OHLCV)

# Rename to standard OHLCV format (optional)
btc = btc.rename(columns={"Close": "close", "High": "high", "Low": "low", "Open": "open", "Volume": "volume"})
btc = btc[['close','volume']]
# Display DataFrame
btc = btc.reset_index()

btc['Date'] = pd.to_datetime(btc['Date'])
btc = btc.set_index('Date')
btc_price = btc['close']

btc.head()


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-20,9608.475586,44925260237
2020-02-21,9686.441406,40930547513
2020-02-22,9663.181641,35838025154
2020-02-23,9924.515625,41185185761
2020-02-24,9650.174805,45080496648


In [7]:
import requests
import pandas as pd

# API URL for Bitcoin hashrate (7-day average)
url = "https://api.blockchain.info/charts/hash-rate?timespan=5y&format=json"

# Fetch data
response = requests.get(url)
data = response.json()

# Convert to DataFrame
hr = pd.DataFrame(data["values"])
hr["date"] = pd.to_datetime(hr["x"], unit="s")  # Convert timestamp
hr.rename(columns={"y": "hashrate"}, inplace=True)
hr = hr.drop('x', axis  = 1)
hr = hr.set_index('date')
# df = df.resample('M').sum()
# # Display DataFrame
hr


Unnamed: 0_level_0,hashrate
date,Unnamed: 1_level_1
2020-02-22,1.143793e+08
2020-02-23,1.097423e+08
2020-02-24,1.089694e+08
2020-02-25,1.110203e+08
2020-02-26,1.047008e+08
...,...
2025-02-15,9.080457e+08
2025-02-16,8.569682e+08
2025-02-17,7.775142e+08
2025-02-18,7.945400e+08


In [8]:

# df.index = pd.to_datetime(df.index)
# btc['trend'] = df
btc['hashrate']  = hr['hashrate']
btc = btc.astype(float)
btc = btc.pct_change().dropna()
btc.columns = ['return'	,'volume'	,'hashrate']

btc['target'] = btc['return'].shift(-1) > 0
btc.head()

  btc = btc.pct_change().dropna()


Unnamed: 0_level_0,return,volume,hashrate,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-02-23,0.027044,0.149204,-0.040541,False
2020-02-24,-0.027643,0.09458,-0.007042,False
2020-02-25,-0.031965,-0.056903,0.01882,False
2020-02-26,-0.055791,0.185928,-0.056922,False
2020-02-27,-0.004085,-0.098172,0.080882,False


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

X = btc[['return', 'volume'  , 'hashrate']]
y = btc['target']

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42,shuffle=False)

# Step 4: Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Define hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Grid search for best hyperparameters
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best parameters
best_rf_model = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

# Predictions
y_pred = best_rf_model.predict(X_test)

# Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Best parameters: {'max_depth': 20, 'min_samples_split': 5, 'n_estimators': 200}
[[101 163]
 [118 166]]
              precision    recall  f1-score   support

       False       0.46      0.38      0.42       264
        True       0.50      0.58      0.54       284

    accuracy                           0.49       548
   macro avg       0.48      0.48      0.48       548
weighted avg       0.48      0.49      0.48       548



In [10]:
trade = pd.DataFrame()
trade['action'] = y_pred
trade.index = y_test.index
trade['price'] = btc_price
trade = trade.sort_index()
trade

Unnamed: 0_level_0,action,price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-08-23,False,26431.640625
2023-08-24,True,26162.373047
2023-08-25,True,26047.667969
2023-08-26,True,26008.462891
2023-08-27,True,26089.693359
...,...,...
2025-02-16,False,96175.031250
2025-02-17,True,95773.382812
2025-02-18,False,95539.546875
2025-02-19,False,96635.609375


In [16]:
import vectorbt as vbt
# Create a portfolio using the signals
portfolio = vbt.Portfolio.from_signals(
    trade['price'],
    trade['action'] == True,
    trade['action'] == False,
    size=1,  # Number of shares to buy/sell
    init_cash=10000  ,# Initial capital
    direction = 'longonly'
)

# Display the portfolio statistics
stats = portfolio.stats()
print(stats)


Start                               2023-08-23 00:00:00
End                                 2025-02-20 00:00:00
Period                                548 days 00:00:00
Start Value                                     10000.0
End Value                                  22370.720033
Total Return [%]                               123.7072
Benchmark Return [%]                         271.597103
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                              20.755482
Max Drawdown Duration                 196 days 00:00:00
Total Trades                                        135
Total Closed Trades                                 135
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  56.296296
Best Trade [%]                                15.528514
Worst Trade [%]                              -11

In [12]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '11a1a276-cd14-488e-bd36-a14b88460daf',
              'x': array([datetime.datetime(2023, 8, 23, 0, 0),
                          datetime.datetime(2023, 8, 24, 0, 0),
                          datetime.datetime(2023, 8, 25, 0, 0), ...,
                          datetime.datetime(2025, 2, 18, 0, 0),
                          datetime.datetime(2025, 2, 19, 0, 0),
                          datetime.datetime(2025, 2, 20, 0, 0)], dtype=object),
              'xaxis': 'x',
              'y': array([26431.640625  , 26162.37304688, 26047.66796875, ..., 95539.546875  ,
                          96635.609375  , 98219.2109375 ]),
              'yaxis': 'y'},
             {'customdata': array([[0.00000000e+00, 3.82228324e-01, 0.00000000e+00],
                                   [2.00000000