In [186]:
import pandas as pd
import yfinance as yf

# Fetch BTC data
btc = yf.download("BTC-USD", period="5y", interval="1d")

# Flatten MultiIndex columns
btc.columns = [col[0] for col in btc.columns]  # Extract first level (OHLCV)

# Rename to standard OHLCV format (optional)
btc = btc.rename(columns={"Close": "close", "High": "high", "Low": "low", "Open": "open", "Volume": "volume"})
btc_ohlc = btc.copy()

btc = btc[['close','volume']]
# Display DataFrame
btc = btc.reset_index()

btc['Date'] = pd.to_datetime(btc['Date'])
btc = btc.set_index('Date')
btc_daaily_return = btc['close'].pct_change().shift(-1)

btc.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-23,9924.515625,41185185761
2020-02-24,9650.174805,45080496648
2020-02-25,9341.705078,42515259129
2020-02-26,8820.522461,50420050762
2020-02-27,8784.494141,45470195695


In [None]:
import requests
import pandas as pd

# API URL for Bitcoin hashrate (7-day average)
url = "https://api.blockchain.info/charts/hash-rate?timespan=5y&format=json"

# Fetch data
response = requests.get(url)
data = response.json()

# Convert to DataFrame
hr = pd.DataFrame(data["values"])
hr["date"] = pd.to_datetime(hr["x"], unit="s")  # Convert timestamp
hr.rename(columns={"y": "hashrate"}, inplace=True)
hr = hr.drop('x', axis  = 1)
hr = hr.set_index('date')
# # Display DataFrame
hr


Unnamed: 0_level_0,hashrate
date,Unnamed: 1_level_1
2020-02-25,1.110203e+08
2020-02-26,1.047008e+08
2020-02-27,1.131692e+08
2020-02-28,1.216377e+08
2020-02-29,1.070104e+08
...,...
2025-02-18,7.945400e+08
2025-02-19,7.150860e+08
2025-02-20,7.377872e+08
2025-02-21,7.321119e+08


In [188]:
import gtrend
from pytrends.request import TrendReq
pytrend = TrendReq(hl='en-US')
keyword = 'Bitcoin'
start = '2020-02-25'
end = '2025-01-01'
geo=''
cat=0
gprop=''
overlapping = gtrend.get_daily_trend(pytrend, keyword, start, end, geo=geo, cat=cat, gprop=gprop, verbose=False)

In [189]:

# df.index = pd.to_datetime(df.index)
btc['trend'] = overlapping['Bitcoin']
btc['hashrate']  = hr['hashrate']
btc = btc.astype(float)
btc = btc.pct_change().dropna()

# btc['return'] = btc['close'].pct_change()
btc.columns = ['return'	,'volume', 'trend'	,'hashrate']

btc['target'] = btc['return'].shift(-1) >= 0
btc.head()


The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



Unnamed: 0_level_0,return,volume,trend,hashrate,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-26,-0.055791,0.185928,0.125,-0.056922,False
2020-02-27,-0.004085,-0.098172,0.0,0.080882,False
2020-02-28,-0.012754,-0.019018,0.0,0.07483,False
2020-02-29,-0.008411,-0.197578,-0.222222,-0.120253,False
2020-03-01,-0.004309,-0.012383,0.0,0.273381,True


In [190]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

X = btc[['trend' ,'volume', 'hashrate']]
y = btc['target']

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42,shuffle=False)

# Step 4: Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Define hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Grid search for best hyperparameters
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best parameters
best_rf_model = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

# Predictions
y_pred = best_rf_model.predict(X_test)

# Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Best parameters: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 50}
[[110 153]
 [125 160]]
              precision    recall  f1-score   support

       False       0.47      0.42      0.44       263
        True       0.51      0.56      0.54       285

    accuracy                           0.49       548
   macro avg       0.49      0.49      0.49       548
weighted avg       0.49      0.49      0.49       548



In [211]:
#Day trade entry when signal is true , exit when day closed
trade = pd.DataFrame()
trade['action'] = y_pred
trade.index = y_test.index
trade['price'] = btc_daaily_return
trade = trade.sort_index()
init_cash = 10000
size = 2 # 1 BTC
trade['cum_return'] = (trade['action'] * trade['price'] * size + 1 ).cumprod()
trade['Balance'] = trade['cum_return'] * init_cash
trade['PnL'] = trade['Balance'] - trade['Balance'].shift(1)
trade = trade.dropna()
trade

Unnamed: 0_level_0,action,price,cum_return,Balance,PnL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-08-27,True,0.000631,1.007516,10075.159181,12.694537
2023-08-28,False,0.062102,1.007516,10075.159181,0.000000
2023-08-29,True,-0.015513,0.976257,9762.573170,-312.586011
2023-08-30,True,-0.050034,0.878565,8785.650868,-976.922302
2023-08-31,False,-0.005042,0.878565,8785.650868,0.000000
...,...,...,...,...,...
2025-02-18,True,0.011472,7.414491,74144.909591,1663.073053
2025-02-19,False,0.017575,7.414491,74144.909591,0.000000
2025-02-20,False,-0.022458,7.414491,74144.909591,0.000000
2025-02-21,True,0.004704,7.484252,74842.520999,697.611409


In [212]:
import numpy as np
import pandas as pd

def calculate_metrics(pnl_series , init_cash):
    """
    Calculate key trading performance metrics from a PnL series.
    
    :param pnl_series: Pandas Series containing PnL values over time.
    :param risk_free_rate: Annual risk-free rate (default 2%).
    :param trading_periods: Number of trading periods in a year (252 for daily).
    :return: Dictionary with performance metrics.
    """

    # Compute returns
    returns = pnl_series.pct_change().dropna()

    # Win Rate
    wins = pnl_series[pnl_series > 0]
    losses = pnl_series[pnl_series < 0]
    win_rate = len(wins) / (len(wins) + len(losses)) if (len(wins) + len(losses)) > 0 else np.nan

    # Profit Factor (Total profit / Total loss)
    total_profit = wins.sum()
    total_loss = abs(losses.sum())
    profit_factor = total_profit / total_loss if total_loss > 0 else np.nan

    # Payoff Ratio (Average win / Average loss)
    avg_win = wins.mean() if len(wins) > 0 else 0
    avg_loss = abs(losses.mean()) if len(losses) > 0 else 0
    payoff_ratio = avg_win / avg_loss if avg_loss > 0 else np.nan

    return {
        "Win Rate": win_rate,
        "Profit Factor": profit_factor,
        "Payoff Ratio": payoff_ratio
    }



In [213]:
metrics = calculate_metrics(trade['PnL'],init_cash)
print(metrics)

{'Win Rate': 0.5112540192926045, 'Profit Factor': 1.4067500557778565, 'Payoff Ratio': 1.3448176633851208}


In [214]:
import vectorbt as vbt
trade['Balance'].vbt.drawdowns.plot()

FigureWidget({
    'data': [{'line': {'color': '#1f77b4'},
              'name': 'Balance',
              'showlegend': True,
              'type': 'scatter',
              'uid': '2dfb026b-4ba3-4fb1-b5df-ef6afa14db59',
              'x': array([datetime.datetime(2023, 8, 27, 0, 0),
                          datetime.datetime(2023, 8, 28, 0, 0),
                          datetime.datetime(2023, 8, 29, 0, 0), ...,
                          datetime.datetime(2025, 2, 20, 0, 0),
                          datetime.datetime(2025, 2, 21, 0, 0),
                          datetime.datetime(2025, 2, 22, 0, 0)], dtype=object),
              'y': array([10075.15918146, 10075.15918146,  9762.57317044, ..., 74144.9095907 ,
                          74842.52099926, 73864.17623259])},
             {'customdata': array([[ 0],
                                   [12],
                                   [21],
                                   [23],
                                   [30]]),
            

In [215]:
btc_ohlc['entry'] = trade['action']
btc_ohlc['exit_price'] = btc_ohlc['close'].shift(-1)
btc_ohlc = btc_ohlc.dropna()

In [216]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Simulated trade data (entry and exit happen on the same day)
dates = btc_ohlc.index 
entry_prices = btc_ohlc['close']  
exit_prices = btc_ohlc['exit_price'] 

# Create trade DataFrame
df_trades = pd.DataFrame({
    'Date': dates,
    'Entry_Price': entry_prices,
    'Exit_Price': exit_prices
})

# Create Plotly figure
fig = go.Figure()

# Add continuous price line
fig.add_trace(go.Scatter(
    x=btc_ohlc.index, y=btc_ohlc['close'],
    mode='lines', name='Price',
    line=dict(color='white', width=2)
))

# Add entry markers (green)
fig.add_trace(go.Scatter(
    x=df_trades['Date'], y=df_trades['Entry_Price'],
    mode='markers', name='Entry (Buy)',
    marker=dict(symbol='triangle-up', size=10, color='green')
))


# Add exit markers (red)
fig.add_trace(go.Scatter(
    x=df_trades['Date'], y=df_trades['Exit_Price'],
    mode='markers', name='Exit (Sell)',
    marker=dict(symbol='triangle-down', size=10, color='red')
))

# Add trade lines connecting entry & exit
for _, trade in df_trades.iterrows():
    fig.add_trace(go.Scatter(
        x=[trade['Date'], trade['Date']],
        y=[trade['Entry_Price'], trade['Exit_Price']],
        mode='lines',
        line=dict(color='gray', width=1),
        showlegend=False
    ))


# Customize layout
fig.update_layout(
    title="Trade Entry & Exit on Same Day with Continuous Price",
    xaxis_title="Date",
    yaxis_title="Price",
    template="plotly_dark",
    legend=dict(x=0, y=1),
    height=600
)

# Show plot
fig.show()
