# Bitcoin and Ethereum ML trading bot

In [387]:
# Initial imports
import os
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from pandas.tseries.offsets import DateOffset
from finta import TA
import numpy as np
import hvplot.pandas
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import classification_report

In [388]:
# Load .env environment variables
env_path = Path("./alpaca_keys.env")
load_dotenv(dotenv_path=env_path)

Python-dotenv could not parse statement starting at line 1


True

## Connect to Alpaca API and import data

In [389]:

API_KEY = os.getenv("ALPACA_API_KEY")
API_SECRET = os.getenv("ALPACA_SECRET_KEY")
ALPACA_API_BASE_URL = "https://paper-api.alpaca.markets"

In [390]:
# Create a connection to the API 
alpaca = tradeapi.REST(
    API_KEY,
    API_SECRET,
    ALPACA_API_BASE_URL,
    api_version="v2")

In [391]:
# Set the tickers
eth_ticker = ["ETHUSD"]
btc_ticker = ["BTCUSD"]


In [392]:
# Set the parameter tz to "America/New_York", 

start_date = pd.Timestamp("2016-06-08", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2022-02-08", tz="America/New_York").isoformat()

In [393]:
# Set timeframe to one day (1D) for the Alpaca API
timeframe = "1Day"

## Preprocess Ethereum Data

In [394]:
# Use the Alpaca get_crypto_bars function to gather the price information for ETHEREUM

eth_data_df = alpaca.get_crypto_bars(
    eth_ticker,
    timeframe,
    start = start_date,
    end = end_date
).df

# Review the resulting `eth_data_df` DataFrame. 
display(eth_data_df.head())


Unnamed: 0_level_0,exchange,open,high,low,close,volume,trade_count,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-06-08 05:00:00+00:00,CBSE,14.64,14.8,14.4,14.64,6849.865353,358,14.601223,ETHUSD
2016-06-09 05:00:00+00:00,CBSE,14.61,14.74,14.52,14.67,2021.553794,256,14.608697,ETHUSD
2016-06-10 05:00:00+00:00,CBSE,13.93,14.11,13.86,14.03,1585.286563,122,14.010104,ETHUSD
2016-06-11 05:00:00+00:00,CBSE,14.14,14.59,13.92,14.53,3639.141808,563,14.286751,ETHUSD
2016-06-12 05:00:00+00:00,CBSE,14.52,15.91,14.28,15.58,16706.624131,762,15.097705,ETHUSD


In [395]:
eth_prices_df = eth_data_df.drop(columns = ['exchange', 'trade_count', 'vwap', 'symbol'])
eth_prices_df.index =eth_prices_df.index.date

display(eth_prices_df.head(5))
display(eth_prices_df.tail(5))

Unnamed: 0,open,high,low,close,volume
2016-06-08,14.64,14.8,14.4,14.64,6849.865353
2016-06-09,14.61,14.74,14.52,14.67,2021.553794
2016-06-10,13.93,14.11,13.86,14.03,1585.286563
2016-06-11,14.14,14.59,13.92,14.53,3639.141808
2016-06-12,14.52,15.91,14.28,15.58,16706.624131


Unnamed: 0,open,high,low,close,volume
2022-02-06,3010.18,3101.16,2967.57,3095.09,2242.796106
2022-02-06,3020.0,3112.0,2954.0,3092.7,12299.525
2022-02-07,3095.0,3199.08,3051.05,3187.58,220032.905103
2022-02-07,3098.79,3197.5,3062.28,3191.6,3625.869325
2022-02-07,3097.2,3196.9,3050.6,3188.2,8537.947


## Preprocess Bitcoin Data

In [396]:
# Use the Alpaca get_crypto_bars function to gather the price information for BITCOIN
btc_data_df = alpaca.get_crypto_bars(
    btc_ticker,
    timeframe,
    start = start_date,
    end = end_date
).df

# Review the resulting `btc_data_df` DataFrame. 
display(btc_data_df.head())

Unnamed: 0_level_0,exchange,open,high,low,close,volume,trade_count,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-06-08 05:00:00+00:00,CBSE,583.19,583.48,579.04,581.4,1005.394407,6777,581.282133,BTCUSD
2016-06-09 05:00:00+00:00,CBSE,581.4,582.06,574.0,577.46,3909.795967,17415,577.913543,BTCUSD
2016-06-10 05:00:00+00:00,CBSE,580.31,586.12,580.31,585.7,1328.264354,5173,583.444632,BTCUSD
2016-06-11 05:00:00+00:00,CBSE,609.5,639.0,605.0,637.0,4544.26999,13470,623.347998,BTCUSD
2016-06-12 05:00:00+00:00,CBSE,637.0,687.71,625.79,677.86,13388.552021,37739,654.400225,BTCUSD


In [397]:
btc_prices_df = btc_data_df.drop(columns = ['exchange', 'trade_count', 'vwap', 'symbol'])
btc_prices_df.index =btc_prices_df.index.date

display(btc_prices_df.head(5))
display(btc_prices_df.tail(5))


Unnamed: 0,open,high,low,close,volume
2016-06-08,583.19,583.48,579.04,581.4,1005.394407
2016-06-09,581.4,582.06,574.0,577.46,3909.795967
2016-06-10,580.31,586.12,580.31,585.7,1328.264354
2016-06-11,609.5,639.0,605.0,637.0,4544.26999
2016-06-12,637.0,687.71,625.79,677.86,13388.552021


Unnamed: 0,open,high,low,close,volume
2022-02-06,41683.0,43014.0,41189.0,42770.6,371.202088
2022-02-06,41579.0,43031.0,41135.0,42816.0,1301.8667
2022-02-07,42835.26,45035.0,42258.61,44916.93,17605.802688
2022-02-07,42779.7,44997.4,42290.1,44983.7,660.084832
2022-02-07,42832.0,45039.0,42266.0,44906.0,2003.415


## Create Training and Testing Period (Train with 75% of data and use remaining 25% as test data)

In [398]:
training_startdate = pd.to_datetime("2016-06-08").date()
training_enddate = pd.to_datetime( "2020-09-08").date()

testing_startdate = pd.to_datetime("2020-09-09").date()
testing_enddate = pd.to_datetime( "2022-02-08").date()


### Ethereum train/test periods

In [399]:
#Create Ethereum training and testing period

eth_training_dates = eth_prices_df.loc[training_startdate:testing_enddate]

eth_testing_dates = eth_prices_df.loc[testing_startdate:testing_enddate]

In [400]:
display(eth_training_dates.head())
display(eth_testing_dates.head())

Unnamed: 0,open,high,low,close,volume
2016-06-08,14.64,14.8,14.4,14.64,6849.865353
2016-06-09,14.61,14.74,14.52,14.67,2021.553794
2016-06-10,13.93,14.11,13.86,14.03,1585.286563
2016-06-11,14.14,14.59,13.92,14.53,3639.141808
2016-06-12,14.52,15.91,14.28,15.58,16706.624131


Unnamed: 0,open,high,low,close,volume
2020-09-09,351.11,375.74,350.23,373.13,73136.321257
2020-09-10,373.22,377.79,360.23,368.06,142426.009731
2020-09-11,373.93,374.64,367.71,370.96,22302.689108
2020-09-12,387.82,390.41,382.47,386.43,30038.800648
2020-09-13,366.58,367.84,355.66,364.56,30864.312023


### Bitcoin train/test periods

In [401]:
#Create Bitcoin training and testing period

btc_training_dates = btc_prices_df.loc[training_startdate:testing_enddate]

btc_testing_dates = btc_prices_df.loc[testing_startdate:testing_enddate]

In [402]:
display(btc_training_dates.head())
display(btc_testing_dates.head())

Unnamed: 0,open,high,low,close,volume
2016-06-08,583.19,583.48,579.04,581.4,1005.394407
2016-06-09,581.4,582.06,574.0,577.46,3909.795967
2016-06-10,580.31,586.12,580.31,585.7,1328.264354
2016-06-11,609.5,639.0,605.0,637.0,4544.26999
2016-06-12,637.0,687.71,625.79,677.86,13388.552021


Unnamed: 0,open,high,low,close,volume
2020-09-09,10076.82,10349.99,10060.89,10224.56,7827.280625
2020-09-10,10387.43,10489.0,10239.03,10343.23,9089.743268
2020-09-11,10394.41,10400.0,10328.0,10363.76,1297.194483
2020-09-12,10363.94,10483.23,10276.19,10446.52,5197.634065
2020-09-13,10336.76,10375.0,10253.0,10349.29,1738.96646


# The Thayer Method

### Ethereum 

In [403]:
eth_prices_df["ema8"] = TA.EMA(eth_prices_df, 8) #Exponential Moving Average
eth_prices_df["ema34"] = TA.EMA(eth_prices_df, 34) #Exponential Moving Average

In [404]:
eth_signals = eth_prices_df.drop(columns = ['open', 'high', 'low','volume'])
eth_signals["Actual Returns"] = eth_signals["close"].pct_change()
eth_signals= eth_signals.dropna()

eth_signals

Unnamed: 0,close,ema8,ema34,Actual Returns
2016-06-09,14.67,14.656875,14.655441,0.002049
2016-06-10,14.03,14.393782,14.434581,-0.043626
2016-06-11,14.53,14.441524,14.460580,0.035638
2016-06-12,15.58,14.795179,14.711554,0.072264
2016-06-13,18.40,15.824010,15.420131,0.181001
...,...,...,...,...
2022-02-06,3095.09,3011.149813,2811.617681,0.000032
2022-02-06,3092.70,3029.272076,2827.679528,-0.000772
2022-02-07,3187.58,3064.451615,2848.245269,0.030679
2022-02-07,3191.60,3092.706812,2867.865540,0.001261


In [405]:
eth_signals['Signal'] = 0.0

In [406]:
# Generate the trading signal 0 or 1,
# where 1 is the short-window (ema8) greater than the long-window (SMA100)
# and 0 is when the condition is not met
eth_signals["Signal"] = np.where(
    eth_signals["ema8"]> eth_signals["ema34"], 1.0, 0.0
)


In [407]:
eth_signals["Entry/Exit"] = eth_signals["Signal"].diff()
eth_signals= eth_signals.dropna()


In [408]:
eth_signals

Unnamed: 0,close,ema8,ema34,Actual Returns,Signal,Entry/Exit
2016-06-10,14.03,14.393782,14.434581,-0.043626,0.0,-1.0
2016-06-11,14.53,14.441524,14.460580,0.035638,0.0,0.0
2016-06-12,15.58,14.795179,14.711554,0.072264,1.0,1.0
2016-06-13,18.40,15.824010,15.420131,0.181001,1.0,0.0
2016-06-14,18.69,16.593366,15.973598,0.015761,1.0,0.0
...,...,...,...,...,...,...
2022-02-06,3095.09,3011.149813,2811.617681,0.000032,1.0,0.0
2022-02-06,3092.70,3029.272076,2827.679528,-0.000772,1.0,0.0
2022-02-07,3187.58,3064.451615,2848.245269,0.030679,1.0,0.0
2022-02-07,3191.60,3092.706812,2867.865540,0.001261,1.0,0.0


In [409]:
exit =eth_signals[eth_signals['Entry/Exit'] == -1.0]['close'].hvplot.scatter(
    color='red',
    legend=False,
    ylabel='Price in $',
    width=1000,
    marker = "v",
    height=400)

# Visualize entry position relative to close price
entry = eth_signals[eth_signals['Entry/Exit'] == 1.0]['close'].hvplot.scatter(
    color='green',
    legend=False,
    ylabel='Price in $',
    marker = "^",
    width=1000,
    height=400)

# Visualize close price for the investment
security_close = eth_signals[['close']].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400)

# Visualize moving averages
moving_avgs = eth_signals[['ema8', 'ema34']].hvplot(
    ylabel='Price in $',
    color= ['blue', 'salmon'],
    width=1000,
    height=400)

# Create the overlay plot
entry_exit_plot = security_close * moving_avgs * entry * exit

# Show the plot
entry_exit_plot.opts(
    title="Ethereum - EMA8, EMA34, Entry and Exit Points"
)

In [410]:
eth_signals['Entry/Exit'].value_counts()

 0.0    2267
 1.0      33
-1.0      33
Name: Entry/Exit, dtype: int64

In [411]:
#eth_signals['Strategy Returns'] = eth_signals['Actual Returns'] * eth_signals['Entry/Exit'].shift()


In [412]:
X = eth_signals[['ema8', 'ema34']].shift().dropna()

X.head()

Unnamed: 0,ema8,ema34
2016-06-11,14.393782,14.434581
2016-06-12,14.441524,14.46058
2016-06-13,14.795179,14.711554
2016-06-14,15.82401,15.420131
2016-06-15,16.593366,15.973598


In [413]:
# Create the target set selecting the Signal column and assiging it to y
y = eth_signals['Entry/Exit']

# Review the value counts
y.value_counts()

 0.0    2267
 1.0      33
-1.0      33
Name: Entry/Exit, dtype: int64

In [414]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_startdate:training_enddate]
y_train = y.loc[pd.to_datetime("2016-06-11").date():training_enddate]


display(X_train.head(5))
display(X_train.tail(5))

Unnamed: 0,ema8,ema34
2016-06-11,14.393782,14.434581
2016-06-12,14.441524,14.46058
2016-06-13,14.795179,14.711554
2016-06-14,15.82401,15.420131
2016-06-15,16.593366,15.973598


Unnamed: 0,ema8,ema34
2020-09-04,418.086871,390.033808
2020-09-05,410.756455,389.751876
2020-09-06,393.977243,386.637483
2020-09-07,383.386745,384.333627
2020-09-08,376.791912,382.583705


In [415]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[testing_startdate:testing_enddate]
y_test = y.loc[testing_startdate:testing_enddate]

display(X_test.head(5))
display(X_test.tail(5))

Unnamed: 0,ema8,ema34
2020-09-09,368.055932,380.006351
2020-09-10,369.183503,379.613416
2020-09-11,368.933835,378.953221
2020-09-12,369.384094,378.496466
2020-09-13,373.172073,378.94981


Unnamed: 0,ema8,ema34
2022-02-06,2987.166902,2794.437541
2022-02-06,3011.149813,2811.617681
2022-02-07,3029.272076,2827.679528
2022-02-07,3064.451615,2848.245269
2022-02-07,3092.706812,2867.86554


In [416]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [417]:
# From SVM, instantiate SVC classifier model instance
svm_model = svm.SVC()
 
# Fit the model to the data using the training data
svm_model = svm_model.fit(X_train_scaled, y_train)
 
# Use the testing data to make the model predictions
svm_pred = svm_model.predict(X_test_scaled)

# Review the model's predicted values
svm_pred[:10]


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [418]:
# Use a classification report to evaluate the model using the predictions and testing data
svm_testing_report = classification_report(y_test, svm_pred)

# Print the classification report
print(svm_testing_report)


              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00        11
         0.0       0.97      1.00      0.99       758
         1.0       0.00      0.00      0.00        12

    accuracy                           0.97       781
   macro avg       0.32      0.33      0.33       781
weighted avg       0.94      0.97      0.96       781



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [426]:
# Create a new empty predictions DataFrame.

# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
predictions_df['Predicted'] = svm_pred

predictions_df['y_test'] = y_test

# Add the actual returns to the DataFrame
predictions_df['Actual Returns'] =  signals_df['Actual Returns']

# Add the strategy returns to the DataFrame
predictions_df['Strategy Returns'] = predictions_df['Predicted'] * predictions_df['Actual Returns']

# Review the DataFrame
display(predictions_df.head(10))
display(predictions_df.tail(10))

NameError: name 'signals_df' is not defined

In [419]:
btc_prices_df["ema50"] = TA.EMA(btc_prices_df, 50)
btc_prices_df["ema200"] = TA.EMA(btc_prices_df, 200) 

### Use lines of code below to help with your strategy

In [420]:
# Calculate the daily returns using the closing prices and the pct_change function

#btc_data_df["actual returns"] = btc_data_df["close"].pct_change()


In [421]:
# Create a new column in the btc_data_df called signal setting its value to zero.

#btc_data_df['signal'] = 0.0

In [422]:
#finta library required to use the functions below

#btc_prices_df['sma50'] = TA.SMA(btc_prices_df, 50)
#btc_prices_df['sma200'] = TA.SMA(btc_prices_df, 200)
#btc_prices_df["ssma"] = TA.SSMA(btc_prices_df) #Smoothed Moving Average
#btc_prices_df["ema50"] = TA.EMA(btc_prices_df, 50) #Exponential Moving Average
#btc_prices_df["ema200"] = TA.EMA(btc_prices_df, 200) #Exponential Moving Average
#btc_prices_df["dema"] = TA.DEMA(btc_prices_df) #Double Exponential Moving Average
#btc_prices_df["bbands"] = TA.BBANDS(btc_prices_df) #Bollinger Bands(needs more perameters than what is shown in this line)
#btc_prices_df["RSI"] = TA.RSI(btc_prices_df) #Relative Strength index

# Drop all NaN values from the DataFrame
#btc_prices_df = btc_prices_df.dropna()


In [423]:
# Create a new column in the eth_data_df called signal setting its value to zero.

#eth_data_df['signal'] = 0.0

In [424]:
#eth_prices_df['sma50'] = TA.SMA(eth_prices_df, 50)
#eth_prices_df['sma200'] = TA.SMA(eth_prices_df, 200)
#eth_prices_df["ssma"] = TA.SSMA(eth_prices_df) #Smoothed Moving Average
#eth_prices_df["ema50"] = TA.EMA(eth_prices_df, 50) #Exponential Moving Average
#eth_prices_df["ema200"] = TA.EMA(eth_prices_df, 200) #Exponential Moving Average
#eth_prices_df["dema"] = TA.DEMA(eth_prices_df) #Double Exponential Moving Average
#eth_prices_df["bbands"] = TA.BBANDS(eth_prices_df) #Bollinger Bands(needs more perameters than what is shown in this line)
#eth_prices_df["RSI"] = TA.RSI(eth_prices_df) #Relative Strength index

# Drop all NaN values from the DataFrame
#eth_prices_df = eth_prices_df.dropna()


In [425]:
# Calculate the daily returns using the closing prices and the pct_change function

#eth_data_df["actual returns"] = eth_data_df["close"].pct_change()

# Drop all NaN values from the DataFrame
#eth_data_df = eth_data_df.dropna()