In [145]:
# Import libraries and dependencies

import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
import os
import pandas as pd
import alpaca_trade_api.rest as tradeapi
import datetime as dt
import holoviews as hv
from prophet import Prophet

In [221]:
# Read csv file as DataFrame, with the first column as row index
eth_df = pd.read_csv("eth_2017.csv",
                index_col="Date",
                infer_datetime_format=True,
                parse_dates=True
                )

eth_df = eth_df.sort_values(by='Date')
eth_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Market Cap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-01,7.969100,8.471230,7.969100,8.175580,1.146285e+07,7.215696e+08
2017-01-02,8.172570,8.436330,8.054410,8.370870,1.425256e+07,7.190737e+08
2017-01-03,8.378510,9.996800,8.317170,9.745860,1.918060e+07,7.816533e+08
2017-01-04,9.725250,11.276400,9.559810,11.276400,4.371927e+07,9.278113e+08
2017-01-05,11.251600,11.890900,9.402680,10.267700,4.368710e+07,9.486751e+08
...,...,...,...,...,...,...
2021-12-28,4044.256508,4044.256508,3788.353454,3814.279700,1.960572e+10,4.643036e+11
2021-12-29,3808.748900,3836.195189,3634.019303,3639.101876,2.050396e+10,4.502630e+11
2021-12-30,3640.373261,3774.378774,3620.622805,3724.106300,2.360372e+10,4.419016e+11
2021-12-31,3721.875500,3821.726100,3644.590367,3691.611156,1.684483e+10,4.460166e+11


In [222]:
# Filter the date index and close columns
signals_df = eth_df.loc[:, ["Close"]]

# Use the pct_change function to generate returns from close prices
signals_df["Actual Returns"] = signals_df["Close"].pct_change()

# Drop all NaN values from the DataFrame
signals_df = signals_df.dropna()

In [223]:
# Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

Unnamed: 0_level_0,Close,Actual Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-02,8.37087,0.023887
2017-01-03,9.74586,0.164259
2017-01-04,11.2764,0.157045
2017-01-05,10.2677,-0.089452
2017-01-06,10.2487,-0.00185


Unnamed: 0_level_0,Close,Actual Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-12-28,3814.2797,-0.057
2021-12-29,3639.101876,-0.045927
2021-12-30,3724.1063,0.023359
2021-12-31,3691.611156,-0.008726
2022-01-01,3773.5834,0.022205


In [224]:
# Set the short window and long window
short_window = 4
long_window = 100

# Generate the fast and slow simple moving averages (4 and 100 days, respectively)
signals_df['SMA_Fast'] = signals_df['Close'].rolling(window=short_window).mean()
signals_df['SMA_Slow'] = signals_df['Close'].rolling(window=long_window).mean()

signals_df = signals_df.dropna()

# Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

Unnamed: 0_level_0,Close,Actual Returns,SMA_Fast,SMA_Slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-04-11,43.4256,-0.001476,43.65475,22.236885
2017-04-12,46.2747,0.065609,44.119975,22.615923
2017-04-13,50.164,0.084048,45.838525,23.020105
2017-04-14,47.6627,-0.049862,46.88175,23.383968
2017-04-15,49.1679,0.03158,48.317325,23.77297


Unnamed: 0_level_0,Close,Actual Returns,SMA_Fast,SMA_Slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-12-28,3814.2797,-0.057,4011.29318,3991.47819
2021-12-29,3639.101876,-0.045927,3893.657031,3998.04839
2021-12-30,3724.1063,0.023359,3805.580745,4007.72696
2021-12-31,3691.611156,-0.008726,3717.274758,4013.876669
2022-01-01,3773.5834,0.022205,3707.100683,4020.025338


In [225]:
# Initialize the new Signal column
signals_df['Signal'] = 0.0

# When Actual Returns are greater than or equal to 0, generate signal to buy stock long
signals_df.loc[(signals_df['Actual Returns'] >= 0), 'Signal'] = 1

# When Actual Returns are less than 0, generate signal to sell stock short
signals_df.loc[(signals_df['Actual Returns'] < 0), 'Signal'] = -1

# Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

Unnamed: 0_level_0,Close,Actual Returns,SMA_Fast,SMA_Slow,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-04-11,43.4256,-0.001476,43.65475,22.236885,-1.0
2017-04-12,46.2747,0.065609,44.119975,22.615923,1.0
2017-04-13,50.164,0.084048,45.838525,23.020105,1.0
2017-04-14,47.6627,-0.049862,46.88175,23.383968,-1.0
2017-04-15,49.1679,0.03158,48.317325,23.77297,1.0


Unnamed: 0_level_0,Close,Actual Returns,SMA_Fast,SMA_Slow,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-28,3814.2797,-0.057,4011.29318,3991.47819,-1.0
2021-12-29,3639.101876,-0.045927,3893.657031,3998.04839,-1.0
2021-12-30,3724.1063,0.023359,3805.580745,4007.72696,1.0
2021-12-31,3691.611156,-0.008726,3717.274758,4013.876669,-1.0
2022-01-01,3773.5834,0.022205,3707.100683,4020.025338,1.0


In [226]:
signals_df['Signal'].value_counts()

 1.0    903
-1.0    824
Name: Signal, dtype: int64

In [227]:
# Calculate the strategy returns and add them to the signals_df DataFrame
signals_df['Strategy Returns'] = signals_df['Actual Returns'] * signals_df['Signal'].shift()

# Review the DataFrame
display(signals_df.head())
display(signals_df.tail())

Unnamed: 0_level_0,Close,Actual Returns,SMA_Fast,SMA_Slow,Signal,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-04-11,43.4256,-0.001476,43.65475,22.236885,-1.0,
2017-04-12,46.2747,0.065609,44.119975,22.615923,1.0,-0.065609
2017-04-13,50.164,0.084048,45.838525,23.020105,1.0,0.084048
2017-04-14,47.6627,-0.049862,46.88175,23.383968,-1.0,-0.049862
2017-04-15,49.1679,0.03158,48.317325,23.77297,1.0,-0.03158


Unnamed: 0_level_0,Close,Actual Returns,SMA_Fast,SMA_Slow,Signal,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-12-28,3814.2797,-0.057,4011.29318,3991.47819,-1.0,0.057
2021-12-29,3639.101876,-0.045927,3893.657031,3998.04839,-1.0,0.045927
2021-12-30,3724.1063,0.023359,3805.580745,4007.72696,1.0,-0.023359
2021-12-31,3691.611156,-0.008726,3717.274758,4013.876669,-1.0,-0.008726
2022-01-01,3773.5834,0.022205,3707.100683,4020.025338,1.0,-0.022205


In [280]:
# Plot Strategy Returns to examine performance
(1 + signals_df['Strategy Returns']).cumprod().hvplot()

In [229]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = signals_df[['SMA_Fast', 'SMA_Slow']].shift().dropna()

# Review the DataFrame
X.head()

Unnamed: 0_level_0,SMA_Fast,SMA_Slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-04-12,43.65475,22.236885
2017-04-13,44.119975,22.615923
2017-04-14,45.838525,23.020105
2017-04-15,46.88175,23.383968
2017-04-16,48.317325,23.77297


In [230]:
# Create the target set selecting the Signal column and assiging it to y
y = signals_df['Signal']

# Review the value counts
y.value_counts()

 1.0    903
-1.0    824
Name: Signal, dtype: int64

In [251]:
# Select the start of the training period
training_begin = X.index.min() 

# Display the training begin date
print(training_begin)

2017-04-12 00:00:00


In [283]:
# Select the ending period for the training data with an offset of 6 months
training_end = X.index.min() + DateOffset(months=6)

# Display the training end date
print(training_end)

2017-10-12 00:00:00


In [284]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,SMA_Fast,SMA_Slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-04-12,43.65475,22.236885
2017-04-13,44.119975,22.615923
2017-04-14,45.838525,23.020105
2017-04-15,46.88175,23.383968
2017-04-16,48.317325,23.77297


In [285]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

Unnamed: 0_level_0,SMA_Fast,SMA_Slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-10-13,301.324193,275.740205
2017-10-14,311.768879,276.439214
2017-10-15,321.142652,277.107279
2017-10-16,328.984724,277.977446
2017-10-17,335.823541,278.789677


In [286]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [287]:
# From SVM, instantiate SVC classifier model instance
svm_model = svm.SVC()
 
# Fit the model to the data using the training data
svm_model = svm_model.fit(X_train_scaled, y_train)
 
# Use the testing data to make the model predictions
svm_pred = svm_model.predict(X_test_scaled)

# Review the model's predicted values

svm_pred

array([1., 1., 1., ..., 1., 1., 1.])

In [288]:
# Use a classification report to evaluate the model using the predictions and testing data
svm_testing_report = classification_report(y_test, svm_pred)

# Print the classification report
print(svm_testing_report)


              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00       742
         1.0       0.52      1.00      0.68       800

    accuracy                           0.52      1542
   macro avg       0.26      0.50      0.34      1542
weighted avg       0.27      0.52      0.35      1542



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [289]:
# Create a new empty predictions DataFrame:

# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
predictions_df['Predicted'] = svm_pred

# Add the actual returns to the DataFrame
predictions_df['Actual Returns'] = signals_df['Actual Returns']

# Add the strategy returns to the DataFrame
predictions_df['Strategy Returns'] = predictions_df["Actual Returns"] * predictions_df["Predicted"]

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())

Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-10-13,1.0,0.110421,0.110421
2017-10-14,1.0,-0.002965,-0.002965
2017-10-15,1.0,-0.010255,-0.010255
2017-10-16,1.0,-0.005577,-0.005577
2017-10-17,1.0,-0.046075,-0.046075


Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-12-28,1.0,-0.057,-0.057
2021-12-29,1.0,-0.045927,-0.045927
2021-12-30,1.0,0.023359,0.023359
2021-12-31,1.0,-0.008726,-0.008726
2022-01-01,1.0,0.022205,0.022205


In [290]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[["Actual Returns", "Strategy Returns"]]).cumprod().hvplot()


In [291]:
# Import a new classifier from SKLearn
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
# Initiate the model instance
model = LogisticRegression()


In [292]:
# Fit the model using the training data

model.fit(X_train_scaled, y_train)
# Use the testing dataset to generate the predictions for the new model
pred = model.predict(X_test_scaled)

# Review the model's predicted values

pred

array([1., 1., 1., ..., 1., 1., 1.])

In [293]:
# Use a classification report to evaluate the model using the predictions and testing data
lr_training_report = classification_report(y_test, pred)

# Print the classification report

print(lr_training_report)

              precision    recall  f1-score   support

        -1.0       0.49      0.05      0.09       742
         1.0       0.52      0.95      0.67       800

    accuracy                           0.52      1542
   macro avg       0.51      0.50      0.38      1542
weighted avg       0.51      0.52      0.39      1542



In [294]:
# Create a new empty predictions DataFrame:

# Create a predictions DataFrame
predictions_df2 = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
predictions_df2['Predicted'] = pred

# Add the actual returns to the DataFrame
predictions_df2['Actual Returns'] = signals_df['Actual Returns']

# Add the strategy returns to the DataFrame
predictions_df2['Strategy Returns'] = predictions_df2["Actual Returns"] * predictions_df2["Predicted"]

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())


Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-10-13,1.0,0.110421,0.110421
2017-10-14,1.0,-0.002965,-0.002965
2017-10-15,1.0,-0.010255,-0.010255
2017-10-16,1.0,-0.005577,-0.005577
2017-10-17,1.0,-0.046075,-0.046075


Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-12-28,1.0,-0.057,-0.057
2021-12-29,1.0,-0.045927,-0.045927
2021-12-30,1.0,0.023359,0.023359
2021-12-31,1.0,-0.008726,-0.008726
2022-01-01,1.0,0.022205,0.022205


In [295]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df2[["Actual Returns", "Strategy Returns"]]).cumprod().hvplot()