In [1]:
#Get 5 Year's Worth of Stock Price Data via API
import os
import pandas as pd
import alpaca_trade_api as tradeapi
import numpy as np
from dotenv import load_dotenv
load_dotenv()
%matplotlib inline
from alpaca_trade_api.rest import REST, TimeFrame, TimeFrameUnit

In [2]:
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="vs2"
)

ticker = ['ICF']
timeframe = '1Day'
start_date = pd.Timestamp("2017-05-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2023-04-01", tz="America/New_York").isoformat()

df_ticker = alpaca.get_bars(
    ticker,
    timeframe,
    start=start_date,
    end=end_date
).df

ICF = df_ticker[df_ticker['symbol']== 'ICF'].drop('symbol', axis=1)
icf_closing_prices = pd.DataFrame()
icf_closing_prices['ICF'] = ICF['close']

## Create A Long-Position Algorithm

In [3]:
# Calculate the daily returns using the closing prices and the pct_change function
icf_closing_prices['Actual Returns'] = icf_closing_prices['ICF'].pct_change()

# Drop all NaN values from the DataFrame
icf_closing_prices = icf_closing_prices.dropna()

# Review the DataFrame
display(icf_closing_prices.head())
display(icf_closing_prices.tail())

Unnamed: 0_level_0,ICF,Actual Returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-05-02 04:00:00+00:00,100.34,0.000598
2017-05-03 04:00:00+00:00,99.05,-0.012856
2017-05-04 04:00:00+00:00,99.08,0.000303
2017-05-05 04:00:00+00:00,99.74,0.006661
2017-05-08 04:00:00+00:00,99.21,-0.005314


Unnamed: 0_level_0,ICF,Actual Returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-03-27 04:00:00+00:00,52.49,-0.00266
2023-03-28 04:00:00+00:00,52.39,-0.001905
2023-03-29 04:00:00+00:00,53.61,0.023287
2023-03-30 04:00:00+00:00,54.28,0.012498
2023-03-31 04:00:00+00:00,55.54,0.023213


In [4]:
# Create the short_window and long_window variables

short_window = 50
long_window = 75

# Using Close Columns and Rolling & Mean Functions and Create the SMA columns for the moving average

icf_closing_prices['SMA Short'] = icf_closing_prices['ICF'].rolling(window=short_window).mean()
icf_closing_prices['SMA Long'] = icf_closing_prices['ICF'].rolling(window=long_window).mean()

# Review the DataFrame

icf_closing_prices.tail()

Unnamed: 0_level_0,ICF,Actual Returns,SMA Short,SMA Long
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-03-27 04:00:00+00:00,52.49,-0.00266,57.1836,56.7416
2023-03-28 04:00:00+00:00,52.39,-0.001905,57.0618,56.688
2023-03-29 04:00:00+00:00,53.61,0.023287,56.9618,56.6456
2023-03-30 04:00:00+00:00,54.28,0.012498,56.8946,56.6136
2023-03-31 04:00:00+00:00,55.54,0.023213,56.8568,56.5924


In [5]:
icf_closing_prices['Signal'] = 0.0

In [6]:
# Create the features set
# Assign a copy of the SMA Short and SMA Long columns to a new DataFrame called X
X = icf_closing_prices[['SMA Short', 'SMA Long']].shift().dropna().copy()

# Display sample data
display(X.head())
display(X.tail())

Unnamed: 0_level_0,SMA Short,SMA Long
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-17 04:00:00+00:00,101.481,100.9356
2017-08-18 04:00:00+00:00,101.4786,100.942133
2017-08-21 04:00:00+00:00,101.4598,100.951733
2017-08-22 04:00:00+00:00,101.4574,100.977067
2017-08-23 04:00:00+00:00,101.4316,100.988933


Unnamed: 0_level_0,SMA Short,SMA Long
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-03-27 04:00:00+00:00,57.3118,56.791733
2023-03-28 04:00:00+00:00,57.1836,56.7416
2023-03-29 04:00:00+00:00,57.0618,56.688
2023-03-30 04:00:00+00:00,56.9618,56.6456
2023-03-31 04:00:00+00:00,56.8946,56.6136


In [7]:
# Create target set
# Create a new column in the icf_closing_prices DataFrame called singal settings its value to zero
icf_closing_prices["Signal"] = 0.0
icf_closing_prices

Unnamed: 0_level_0,ICF,Actual Returns,SMA Short,SMA Long,Signal
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-05-02 04:00:00+00:00,100.34,0.000598,,,0.0
2017-05-03 04:00:00+00:00,99.05,-0.012856,,,0.0
2017-05-04 04:00:00+00:00,99.08,0.000303,,,0.0
2017-05-05 04:00:00+00:00,99.74,0.006661,,,0.0
2017-05-08 04:00:00+00:00,99.21,-0.005314,,,0.0
...,...,...,...,...,...
2023-03-27 04:00:00+00:00,52.49,-0.002660,57.1836,56.7416,0.0
2023-03-28 04:00:00+00:00,52.39,-0.001905,57.0618,56.6880,0.0
2023-03-29 04:00:00+00:00,53.61,0.023287,56.9618,56.6456,0.0
2023-03-30 04:00:00+00:00,54.28,0.012498,56.8946,56.6136,0.0


In [44]:
# # Create the signal to buy
icf_closing_prices.loc[(icf_closing_prices["Actual Returns"] >=0), "Signal"] = 1

# # Create the signal to sell
icf_closing_prices.loc[(icf_closing_prices["Actual Returns"] < 0), "Signal"] = -1

# Copy the new signal column to a new Series called y
y = icf_closing_prices["Signal"].copy()
pd.set_option('display.max_rows', 500)
y.tail()

timestamp
2023-03-27 04:00:00+00:00   -1.0
2023-03-28 04:00:00+00:00   -1.0
2023-03-29 04:00:00+00:00    1.0
2023-03-30 04:00:00+00:00    1.0
2023-03-31 04:00:00+00:00    1.0
Name: Signal, dtype: float64

## Split the Data Into Training & Testing Datasets

### Creating the Training Datasets

In [11]:
# Imports
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset

In [12]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2017-08-17 04:00:00+00:00


In [13]:
# Select the ending period for the training data with an offset of "" months
training_end = X.index.min() + DateOffset(months=15)

# Display the training end date
print(training_end)

2018-11-17 04:00:00+00:00


In [14]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Display sample data
X_train.head()

Unnamed: 0_level_0,SMA Short,SMA Long
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-17 04:00:00+00:00,101.481,100.9356
2017-08-18 04:00:00+00:00,101.4786,100.942133
2017-08-21 04:00:00+00:00,101.4598,100.951733
2017-08-22 04:00:00+00:00,101.4574,100.977067
2017-08-23 04:00:00+00:00,101.4316,100.988933


### Creating the Testing Datasets

In [15]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Display sample data
X_test.head()

Unnamed: 0_level_0,SMA Short,SMA Long
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-11-19 05:00:00+00:00,99.7761,100.692733
2018-11-20 05:00:00+00:00,99.7759,100.699933
2018-11-21 05:00:00+00:00,99.7571,100.695
2018-11-23 05:00:00+00:00,99.7351,100.6922
2018-11-26 05:00:00+00:00,99.6973,100.691533


### Standardizing the Data

In [16]:
# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Introduce the Machine Learning Model: Logistic Regression

In [17]:
# Import LogisticRegression from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [18]:
# Create an instance of the LogisticRegression model
logistic_regression_model = LogisticRegression()

In [19]:
# Fit the LogisticRegression model
logistic_regression_model.fit(X_train_scaled, y_train)

LogisticRegression()

In [20]:
# Use the trained LogisticRegression model to predict the trading signals for the training data
lr_training_signal_predictions = logistic_regression_model.predict(X_train_scaled)

# Display the predictions
lr_training_signal_predictions

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1

In [21]:
# Generate a classification report using the training data and the logistic regression model's predications
lr_training_report = classification_report(y_train, lr_training_signal_predictions)

# Review the classification report
print(lr_training_report)

              precision    recall  f1-score   support

        -1.0       0.52      0.29      0.37       147
         1.0       0.56      0.76      0.64       170

    accuracy                           0.55       317
   macro avg       0.54      0.53      0.51       317
weighted avg       0.54      0.55      0.52       317



## Backtest the Logistic Regression Model

In [22]:
# Use the trained model to predict the trading signals for the testing data.
lr_testing_signal_predictions = logistic_regression_model.predict(X_test_scaled)

In [23]:
# Generate a classification report using the testing data and the logistic regression model's predictions
lr_testing_report = classification_report(y_test, lr_testing_signal_predictions)

# Review the testing classification report
print(lr_testing_report)

              precision    recall  f1-score   support

        -1.0       0.49      0.56      0.52       505
         1.0       0.58      0.51      0.54       593

    accuracy                           0.53      1098
   macro avg       0.53      0.53      0.53      1098
weighted avg       0.54      0.53      0.53      1098



In [24]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df["Predicted Signal"] = lr_testing_signal_predictions
predictions_df["Actual Returns"] = icf_closing_prices["Actual Returns"]
predictions_df["Trading Algorithm Returns"] = predictions_df["Actual Returns"] * predictions_df["Predicted Signal"]
predictions_df.tail()

Unnamed: 0_level_0,Predicted Signal,Actual Returns,Trading Algorithm Returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-03-27 04:00:00+00:00,-1.0,-0.00266,0.00266
2023-03-28 04:00:00+00:00,-1.0,-0.001905,0.001905
2023-03-29 04:00:00+00:00,-1.0,0.023287,-0.023287
2023-03-30 04:00:00+00:00,-1.0,0.012498,-0.012498
2023-03-31 04:00:00+00:00,-1.0,0.023213,-0.023213


In [25]:
#Calcualte the cumulative returns for the actual returns and trading algorithm returns
cumulative_df = pd.DataFrame(index=X_test.index)
cumulative_df['Actual Cumulative Returns'] = (1 + predictions_df[['Actual Returns']]).cumprod()
cumulative_df['Algo Cumulative Returns'] = (1 + predictions_df[['Trading Algorithm Returns']]).cumprod()

#Review the cumulative returns dataframe
cumulative_df.tail(50)

Unnamed: 0_level_0,Actual Cumulative Returns,Algo Cumulative Returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-20 05:00:00+00:00,0.566433,0.528325
2023-01-23 05:00:00+00:00,0.567115,0.527689
2023-01-24 05:00:00+00:00,0.570037,0.52497
2023-01-25 05:00:00+00:00,0.569745,0.525239
2023-01-26 05:00:00+00:00,0.575687,0.519761
2023-01-27 05:00:00+00:00,0.580947,0.515012
2023-01-30 05:00:00+00:00,0.573739,0.521402
2023-01-31 05:00:00+00:00,0.58572,0.510514
2023-02-01 05:00:00+00:00,0.589616,0.507118
2023-02-02 05:00:00+00:00,0.603059,0.495556


## Back Test the Actual Trading Strategy


In [35]:
#Create the dataframe for the Actual Cumualtive Returns
algo_backtest = pd.concat([icf_closing_prices, cumulative_df], axis='columns', join='inner')

#Drop the Algo Cumulative Returns Column
algo_backtest.drop(columns='Actual Cumulative Returns', inplace=True)

# Calculate the points in time at which a position should be taken, 1 or -1
algo_backtest['Entry/Exit'] = algo_backtest['Signal'].diff()
pd.set_option('display.max_rows', None)

#Review the dataframe
algo_backtest.head(100)

Unnamed: 0_level_0,ICF,Actual Returns,SMA Short,SMA Long,Signal,Algo Cumulative Returns,Entry/Exit
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-11-19 05:00:00+00:00,102.99,0.003214,99.7759,100.699933,1.0,1.003214,
2018-11-20 05:00:00+00:00,102.02,-0.009418,99.7571,100.695,-1.0,0.993766,-2.0
2018-11-21 05:00:00+00:00,101.93,-0.000882,99.7351,100.6922,-1.0,0.992889,0.0
2018-11-23 05:00:00+00:00,101.64,-0.002845,99.6973,100.691533,-1.0,0.990064,0.0
2018-11-26 05:00:00+00:00,101.75,0.001082,99.6767,100.6934,1.0,0.991136,2.0
2018-11-27 05:00:00+00:00,102.25,0.004914,99.6555,100.714733,1.0,0.996006,0.0
2018-11-28 05:00:00+00:00,103.04,0.007726,99.6619,100.7458,1.0,1.003702,0.0
2018-11-29 05:00:00+00:00,103.19,0.001456,99.6947,100.775133,1.0,1.005163,0.0
2018-11-30 05:00:00+00:00,104.07,0.008528,99.7285,100.803133,1.0,1.013735,0.0
2018-12-03 05:00:00+00:00,104.84,0.007399,99.7763,100.8302,1.0,1.021235,0.0


In [36]:
# Set the intial capital 
initial_capital = float(100000)

# Set the share size
share_size = 500

#Take the share position where the dual moving average crossover is 1
algo_backtest['Position'] = share_size * algo_backtest['Signal']

# Find the points in time where a 500 share position is bought or sold
algo_backtest["Entry/Exit Position"] = algo_backtest["Position"].diff()

# Multiply share price by entry/exit positions and get the cumulatively sum
algo_backtest["Portfolio Holdings"] = (
    algo_backtest["ICF"] * algo_backtest["Entry/Exit Position"].cumsum()
)

# Subtract the initial capital by the portfolio holdings to get the amount of liquid cash in the portfolio
algo_backtest["Portfolio Cash"] = (
    initial_capital - (algo_backtest["ICF"] * algo_backtest["Entry/Exit Position"]).cumsum()
)

# Get the total portfolio value by adding the cash amount by the portfolio holdings (or investments)
algo_backtest["Portfolio Total"] = (
   algo_backtest["Portfolio Cash"] + algo_backtest["Portfolio Holdings"]
)

# Calculate the portfolio daily returns
algo_backtest["Portfolio Daily Returns"] = algo_backtest["Portfolio Total"].pct_change()

# Calculate the cumulative returns
algo_backtest["Portfolio Cumulative Returns"] = (
    1 + algo_backtest["Portfolio Daily Returns"]
).cumprod() - 1

# Print the DataFrame
algo_backtest.tail()

Unnamed: 0_level_0,ICF,Actual Returns,SMA Short,SMA Long,Signal,Algo Cumulative Returns,Entry/Exit,Position,Entry/Exit Position,Portfolio Holdings,Portfolio Cash,Portfolio Total,Portfolio Daily Returns,Portfolio Cumulative Returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-03-27 04:00:00+00:00,52.49,-0.00266,57.1836,56.7416,-1.0,0.579795,-2.0,-500.0,-1000.0,-52490.0,137473.3,84983.3,0.0,-0.150167
2023-03-28 04:00:00+00:00,52.39,-0.001905,57.0618,56.688,-1.0,0.5809,0.0,-500.0,0.0,-52390.0,137473.3,85083.3,0.001177,-0.149167
2023-03-29 04:00:00+00:00,53.61,0.023287,56.9618,56.6456,1.0,0.567372,2.0,500.0,1000.0,0.0,83863.3,83863.3,-0.014339,-0.161367
2023-03-30 04:00:00+00:00,54.28,0.012498,56.8946,56.6136,1.0,0.560281,0.0,500.0,0.0,0.0,83863.3,83863.3,0.0,-0.161367
2023-03-31 04:00:00+00:00,55.54,0.023213,56.8568,56.5924,1.0,0.547276,0.0,500.0,0.0,0.0,83863.3,83863.3,0.0,-0.161367


## Actual Portfolio Evaluation Metrics

In [37]:
# Create the list of the metric names
metrics1 = [
    'Annualized Return',
    'Cumulative Returns',
    'Annual Volatility',
    'Sharpe Ratio',
    'Sortino Ratio'
]

# Create a list that holds the column name
columns1 = ['Backtest']

# Initialize the DataFrame with index set to evaluation metrics and columns 
algo_evaluation_df = pd.DataFrame(index=metrics1, columns=columns1)

# Review the DataFrame
algo_evaluation_df

Unnamed: 0,Backtest
Annualized Return,
Cumulative Returns,
Annual Volatility,
Sharpe Ratio,
Sortino Ratio,


In [38]:
algo_evaluation_df.loc['Annualized Return'] = (
    algo_backtest['Portfolio Daily Returns'].mean() * 252
)

In [39]:
# Calculate the Cumulative returns metric
algo_evaluation_df.loc['Cumulative Returns'] = algo_backtest['Portfolio Cumulative Returns'][-1]

In [40]:
# Calculate the Annual volatility metric
algo_evaluation_df.loc['Annual Volatility'] = (
    algo_backtest['Portfolio Daily Returns'].std() * np.sqrt(252)
)

In [41]:
# Calculate the Sharpe ratio
algo_evaluation_df.loc['Sharpe Ratio'] = (
    algo_backtest['Portfolio Daily Returns'].mean() * 252) / (
    algo_backtest['Portfolio Daily Returns'].std() * np.sqrt(252)
)

In [42]:
# Calculate the Sortino ratio
# Start by calculating the downside return values

# Create a DataFrame that contains the Portfolio Daily Returns column
sortino_ratio_df = algo_backtest[['Portfolio Daily Returns']].copy()

# Create a column to hold downside return values
sortino_ratio_df.loc[:,'Downside Returns'] = 0

# Find Portfolio Daily Returns values less than 0, 
# square those values, and add them to the Downside Returns column
sortino_ratio_df.loc[sortino_ratio_df['Portfolio Daily Returns'] < 0, 
                     'Downside Returns'] = sortino_ratio_df['Portfolio Daily Returns']**2

# Calculate the annualized return value
annualized_return = sortino_ratio_df['Portfolio Daily Returns'].mean() * 252

# Calculate the annualized downside standard deviation value
downside_standard_deviation = np.sqrt(sortino_ratio_df['Downside Returns'].mean()) * np.sqrt(252)

# Divide the annualized return value by the downside standard deviation value
sortino_ratio = annualized_return/downside_standard_deviation

# Add the Sortino ratio to the evaluation DataFrame
algo_evaluation_df.loc['Sortino Ratio'] = sortino_ratio

In [43]:
# Review the portfolio evaluation DataFrame
algo_evaluation_df

Unnamed: 0,Backtest
Annualized Return,-0.024631
Cumulative Returns,-0.161367
Annual Volatility,0.178368
Sharpe Ratio,-0.13809
Sortino Ratio,-0.20284
