In [2]:
# Import the required libraries and dependencies
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
%matplotlib inline

In [3]:
# Load the environment variables by calling the load_dotenv function
load_dotenv("api_keys.env")

True

In [4]:
# Set Alpaca API key and secret by calling the os.getenv function and referencing the environment variable names
# Set each environment variable to a notebook variable of the same name
alpaca_api_key = os.getenv("ALPACA_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

In [6]:
# Create your Alpaca API REST object by calling Alpaca's tradeapi.REST function
# Set the parameters to your alpaca_api_key, alpaca_secret_key and api_version="v2" 
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2"
)

tickers = ["AAPL"]

In [7]:
# Set the values for start_date and end_date using the pd.Timestamp function
# The start and end data should be 2013-01-01 and 2023-06-30, respectively
# Set the parameter tz to "America/New_York", 
# Set this all to the ISO format by calling the isoformat function 
start_date = pd.Timestamp("2013-01-01", tz= "America/New_York").isoformat()
end_date = pd.Timestamp("2023-06-30", tz= "America/New_York").isoformat()

In [8]:
# Set timeframe to one day (1Day)
timeframe = "1DAY"

In [9]:
# Use the Alpaca get_bars function to gather the price information for each ticker
# Include the function parameters: tickers, timeframe, start, end, and limit
# Be sure to call the df property to ensure that the returned information is set as a DataFrame
df = alpaca.get_bars(
    tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

# Review the first five rows of the resulting DataFrame 
# YOUR CODE HERE
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-12-01 05:00:00+00:00,118.75,118.81,116.86,117.34,34852374,187129,117.75676,AAPL
2015-12-02 05:00:00+00:00,117.05,118.11,116.08,116.28,33385643,180616,117.151198,AAPL
2015-12-03 05:00:00+00:00,116.55,116.79,114.22,115.2,41560785,245330,115.434888,AAPL
2015-12-04 05:00:00+00:00,115.29,119.25,115.11,119.03,57776977,307788,118.18729,AAPL
2015-12-07 05:00:00+00:00,118.98,119.86,117.81,118.28,32080754,190809,118.509111,AAPL


In [11]:
df.drop(columns = ["trade_count", "symbol", "vwap"], axis=1, inplace = True)

In [12]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-12-01 05:00:00+00:00,118.75,118.81,116.86,117.34,34852374
2015-12-02 05:00:00+00:00,117.05,118.11,116.08,116.28,33385643
2015-12-03 05:00:00+00:00,116.55,116.79,114.22,115.2,41560785
2015-12-04 05:00:00+00:00,115.29,119.25,115.11,119.03,57776977
2015-12-07 05:00:00+00:00,118.98,119.86,117.81,118.28,32080754


In [13]:
# Calculate the daily returns using the closing prices and the pct_change function
df["actual_returns"] = df["close"].pct_change()

# Drop all NaN values from the DataFrame
df = df.dropna()

# Review the DataFrame
display(df.head())
display(df.tail())

Unnamed: 0_level_0,open,high,low,close,volume,actual_returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-12-02 05:00:00+00:00,117.05,118.11,116.08,116.28,33385643,-0.009034
2015-12-03 05:00:00+00:00,116.55,116.79,114.22,115.2,41560785,-0.009288
2015-12-04 05:00:00+00:00,115.29,119.25,115.11,119.03,57776977,0.033247
2015-12-07 05:00:00+00:00,118.98,119.86,117.81,118.28,32080754,-0.006301
2015-12-08 05:00:00+00:00,117.52,118.6,116.86,118.23,34309450,-0.000423


Unnamed: 0_level_0,open,high,low,close,volume,actual_returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-06-26 04:00:00+00:00,186.83,188.05,185.23,185.27,48174281,-0.007553
2023-06-27 04:00:00+00:00,185.89,188.39,185.67,188.06,50781649,0.015059
2023-06-28 04:00:00+00:00,187.93,189.9,187.6,189.25,51239329,0.006328
2023-06-29 04:00:00+00:00,189.08,190.07,188.94,189.59,46347308,0.001797
2023-06-30 04:00:00+00:00,191.775,194.48,191.26,193.88,68445760,0.022628


In [14]:
df['signal'] = 0.0
df.loc[(df['actual_returns'] >= 0), 'signal'] = 1
df.loc[(df['actual_returns'] < 0), 'signal'] = -1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0_level_0,open,high,low,close,volume,actual_returns,signal
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-06-26 04:00:00+00:00,186.83,188.05,185.23,185.27,48174281,-0.007553,-1.0
2023-06-27 04:00:00+00:00,185.89,188.39,185.67,188.06,50781649,0.015059,1.0
2023-06-28 04:00:00+00:00,187.93,189.9,187.6,189.25,51239329,0.006328,1.0
2023-06-29 04:00:00+00:00,189.08,190.07,188.94,189.59,46347308,0.001797,1.0
2023-06-30 04:00:00+00:00,191.775,194.48,191.26,193.88,68445760,0.022628,1.0


In [15]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,actual_returns,signal
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-12-02 05:00:00+00:00,117.05,118.11,116.08,116.28,33385643,-0.009034,-1.0
2015-12-03 05:00:00+00:00,116.55,116.79,114.22,115.2,41560785,-0.009288,-1.0
2015-12-04 05:00:00+00:00,115.29,119.25,115.11,119.03,57776977,0.033247,1.0
2015-12-07 05:00:00+00:00,118.98,119.86,117.81,118.28,32080754,-0.006301,-1.0
2015-12-08 05:00:00+00:00,117.52,118.6,116.86,118.23,34309450,-0.000423,-1.0


In [19]:
from sklearn.linear_model import LogisticRegression
from pandas.tseries.offsets import DateOffset
X= df.drop(columns = ["signal"], axis=1).copy()
y= df["signal"].copy()

# Select the start of the training period
training_begin = X.index.min()

# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=87)




In [20]:
print(training_end)

2023-03-02 05:00:00+00:00


In [21]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Display sample data
X_train.head()

Unnamed: 0_level_0,open,high,low,close,volume,actual_returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-12-02 05:00:00+00:00,117.05,118.11,116.08,116.28,33385643,-0.009034
2015-12-03 05:00:00+00:00,116.55,116.79,114.22,115.2,41560785,-0.009288
2015-12-04 05:00:00+00:00,115.29,119.25,115.11,119.03,57776977,0.033247
2015-12-07 05:00:00+00:00,118.98,119.86,117.81,118.28,32080754,-0.006301
2015-12-08 05:00:00+00:00,117.52,118.6,116.86,118.23,34309450,-0.000423


In [22]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Display sample data
X_test.head()

Unnamed: 0_level_0,open,high,low,close,volume,actual_returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-02 05:00:00+00:00,144.38,146.71,143.9,145.91,52288660,0.004129
2023-03-03 05:00:00+00:00,148.045,151.11,147.33,151.03,70739572,0.03509
2023-03-06 05:00:00+00:00,153.785,156.3,153.46,153.83,87605555,0.018539
2023-03-07 05:00:00+00:00,153.7,154.0299,151.13,151.6,56284401,-0.014497
2023-03-08 05:00:00+00:00,152.81,153.47,151.83,152.87,47238140,0.008377


In [24]:
# Imports
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [23]:
# Import LogisticRegression from sklearn
from sklearn.linear_model import LogisticRegression
# Create an instance of the LogisticRegression model
logistic_regression_model = LogisticRegression()

In [25]:
# Fit the LogisticRegression model
lr_model_fit = logistic_regression_model.fit(X_train_scaled, y_train)


In [26]:
# Use the trained LogisticRegression model to predict the trading signals for the training data
lr_training_signal_predictions = lr_model_fit.predict(X_train_scaled)

In [28]:
from sklearn.metrics import classification_report
# Generate a classification report using the training data and the logistic regression model's predications
lr_training_report = classification_report(y_train, lr_training_signal_predictions)
print(lr_training_report)

              precision    recall  f1-score   support

        -1.0       0.99      1.00      1.00       854
         1.0       1.00      0.99      1.00       970

    accuracy                           1.00      1824
   macro avg       1.00      1.00      1.00      1824
weighted avg       1.00      1.00      1.00      1824



In [29]:
# Use the trained model to predict the trading signals for the testing data.
lr_testing_signal_predictions = lr_model_fit.predict(X_test_scaled)

In [30]:
# Generate a classification report using the testing data and the logistic regression model's predications
lr_testing_report = classification_report(y_test, lr_testing_signal_predictions)

print(lr_testing_report)

              precision    recall  f1-score   support

        -1.0       0.97      1.00      0.99        37
         1.0       1.00      0.98      0.99        47

    accuracy                           0.99        84
   macro avg       0.99      0.99      0.99        84
weighted avg       0.99      0.99      0.99        84

