In [1]:
# Initial imports
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

## Loading & Preprocessing 

In [2]:
# Import the Google Search Trend Data for the Stock Ticker into a Pandas DataFrame
carvana_search_df = pd.read_csv(Path('Trend_Comparison.csv'))

# Review the DataFrame for each of the Google Search Trends Data sets
carvana_search_df.head()

Unnamed: 0,Week,Carvana profit,Carvana loss,CVNA buy,CVNA Sell,Carvana stock up,Carvana stock down,Trend,Unnamed: 8,Unnamed: 9
0,7/26/2020,21,0,0,0,0,0,0,,
1,8/2/2020,15,24,0,21,0,0,1,,
2,8/9/2020,0,84,0,33,0,0,1,,
3,8/16/2020,0,0,0,0,0,0,0,,
4,8/23/2020,0,38,0,25,0,0,1,,


## Define the Features 

In [3]:
# Define the features set by copying the Google Search Trend DataFrame
X = carvana_search_df.copy()
X.drop(columns=['Trend', 'Week', 'Unnamed: 8', 'Unnamed: 9'], axis=1, inplace=True)
# Review the features set of the carvana_search_df 
X.head()

Unnamed: 0,Carvana profit,Carvana loss,CVNA buy,CVNA Sell,Carvana stock up,Carvana stock down
0,21,0,0,0,0,0
1,15,24,0,21,0,0
2,0,84,0,33,0,0
3,0,0,0,0,0,0
4,0,38,0,25,0,0


## Define the Targets

In [4]:
# Create the target vector by assiging the values of the ['Column'] from the carvana_search_df
y = carvana_search_df['Trend'].values.reshape(-1,1)

# Review the targets set of the carvana_search_df
y[:5]

array([[0],
       [1],
       [1],
       [0],
       [1]], dtype=int64)

## Train, Test, & Split the Features and Targets

In [5]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=70)

## Use the StandardScaler to Scale the Features Data

In [6]:
# Create the StandardScaler instance
scaler = StandardScaler()

In [7]:
# Fit the Standard Scaler with the Training Data
X_scaler = scaler.fit(X_train)

In [8]:
# Scale the Training Data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Fitting the Random Forest Model

Once the data is scaled, create a random forest instance and train it with the training data (X_train_scaled and y_train).

In [9]:
# Create the random forest classifier instance
rf_model = RandomForestClassifier(n_estimators=500, random_state=70)

In [10]:
# Fit the model
rf_model = rf_model.fit(X_train_scaled, y_train)

  


## Making Predictions Using the Random Forest Model

Validate the trained model, by predicting loan defaults using the testing data (X_test_scaled).

In [11]:
# Making predictions using the testing data
predictions = rf_model.predict(X_test_scaled)

## Model Evaluation

Evaluate model's results, by using sklearn to calculate the confusion matrix, the accuracy score and to generate the classification report.

In [12]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
      cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [13]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,16,0
Actual 1,1,22


Accuracy Score : 0.9743589743589743
Classification Report
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        16
           1       1.00      0.96      0.98        23

    accuracy                           0.97        39
   macro avg       0.97      0.98      0.97        39
weighted avg       0.98      0.97      0.97        39



## Feature Importance

In this section, you are asked to fetch the features' importance from the random forest model and display the top 10 most important features.

In [14]:
# Get the feature importance array
importances = rf_model.feature_importances_

In [15]:
# List the top 10 most important features
importances_sorted = sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)
importances_sorted[:]

[(0.34014980033487596, 'Carvana loss'),
 (0.18408502810993746, 'CVNA Sell'),
 (0.13504891265568217, 'Carvana stock down'),
 (0.12658161615490712, 'Carvana profit'),
 (0.12175487469318019, 'CVNA buy'),
 (0.09237976805141698, 'Carvana stock up')]

## Slice the DataFrame & Plot Against Closing Prices

In [16]:
# Change the 'week' column in the Google Search Trend DF to Datetime 
carvana_search_df['Week'] = pd.to_datetime(
    carvana_search_df['Week'],
    infer_datetime_format=True,
    utc=True
)

In [17]:
carvana_search_df.head()

Unnamed: 0,Week,Carvana profit,Carvana loss,CVNA buy,CVNA Sell,Carvana stock up,Carvana stock down,Trend,Unnamed: 8,Unnamed: 9
0,2020-07-26 00:00:00+00:00,21,0,0,0,0,0,0,,
1,2020-08-02 00:00:00+00:00,15,24,0,21,0,0,1,,
2,2020-08-09 00:00:00+00:00,0,84,0,33,0,0,1,,
3,2020-08-16 00:00:00+00:00,0,0,0,0,0,0,0,,
4,2020-08-23 00:00:00+00:00,0,38,0,25,0,0,1,,


In [18]:
carvana_search_df['Week'] = carvana_search_df['Week'].dt.tz_localize(None)

carvana_search_df.head()

Unnamed: 0,Week,Carvana profit,Carvana loss,CVNA buy,CVNA Sell,Carvana stock up,Carvana stock down,Trend,Unnamed: 8,Unnamed: 9
0,2020-07-26,21,0,0,0,0,0,0,,
1,2020-08-02,15,24,0,21,0,0,1,,
2,2020-08-09,0,84,0,33,0,0,1,,
3,2020-08-16,0,0,0,0,0,0,0,,
4,2020-08-23,0,38,0,25,0,0,1,,


In [19]:
# Drop every column except for the most important feature
carvana_search_df.drop(columns=['Carvana profit', 'CVNA buy', 'CVNA Sell', 'Carvana stock up', 'Carvana stock down', 'Trend', 'Unnamed: 8', 'Unnamed: 9'], inplace=True)

# Set the index to Week in the Google Search Trend DF
carvana_search_df.set_index('Week', inplace=True)
carvana_search_df.head()

Unnamed: 0_level_0,Carvana loss
Week,Unnamed: 1_level_1
2020-07-26,0
2020-08-02,24
2020-08-09,84
2020-08-16,0
2020-08-23,38


In [55]:
#Get 3 Year's Worth of Stock Price Data via API
import os
import pandas as pd
import alpaca_trade_api as tradeapi
from dotenv import load_dotenv
load_dotenv()
%matplotlib inline
from alpaca_trade_api.rest import REST, TimeFrame, TimeFrameUnit

alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")
alpaca = tradeapi.REST(
alpaca_api_key,
alpaca_secret_key,
api_version="v2"
)

ticker = ['CVNA']
timeframe ='1Day'
start_date = pd.Timestamp("2018-05-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2023-05-01", tz="America/New_York").isoformat()

df_ticker = alpaca.get_bars(
ticker,
timeframe,
start=start_date,
end=end_date
).df

CVNA = df_ticker[df_ticker['symbol']== 'CVNA'].drop('symbol', axis=1)
cvna_closing_prices = pd.DataFrame()
cvna_closing_prices["CVNA"] = CVNA["close"]




In [56]:
#Import hvplot libaries
import hvplot.pandas

#Plot the Google Search Trend DF and the Closing Prices for Comparison
carvana_search_df.hvplot() + cvna_closing_prices.hvplot()

## Create A Short-Position Algorithm

In [57]:
# Calculate the daily returns using the closing prices and the pct_change function
cvna_closing_prices["actual_returns"] = cvna_closing_prices["CVNA"].pct_change()

# Drop all NaN values from the DataFrame
cvna_closing_prices = cvna_closing_prices.dropna()

# Review the DataFrame
display(cvna_closing_prices.head())
display(cvna_closing_prices.tail())

Unnamed: 0_level_0,CVNA,actual_returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-05-02 04:00:00+00:00,26.7,-0.010745
2018-05-03 04:00:00+00:00,26.18,-0.019476
2018-05-04 04:00:00+00:00,25.47,-0.02712
2018-05-07 04:00:00+00:00,25.24,-0.00903
2018-05-08 04:00:00+00:00,24.97,-0.010697


Unnamed: 0_level_0,CVNA,actual_returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-04-25 04:00:00+00:00,7.55,-0.097969
2023-04-26 04:00:00+00:00,7.52,-0.003974
2023-04-27 04:00:00+00:00,7.34,-0.023936
2023-04-28 04:00:00+00:00,6.94,-0.054496
2023-05-01 04:00:00+00:00,6.93,-0.001441


In [58]:
# Create the short_window and long_window variables
short_window = 25
long_window = 100

# Using Close Columns and Rolling & Mean Functions and Create the SMA75 and SMA145 columns for moving averages
cvna_closing_prices['SMA25'] = cvna_closing_prices['CVNA'].rolling(window=short_window ).mean()
cvna_closing_prices['SMA100'] = cvna_closing_prices['CVNA'].rolling(window=long_window).mean()

#Review the DataFrame
cvna_closing_prices.tail()

Unnamed: 0_level_0,CVNA,actual_returns,SMA25,SMA100
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-04-25 04:00:00+00:00,7.55,-0.097969,8.6348,7.942
2023-04-26 04:00:00+00:00,7.52,-0.003974,8.6184,7.9401
2023-04-27 04:00:00+00:00,7.34,-0.023936,8.5748,7.9322
2023-04-28 04:00:00+00:00,6.94,-0.054496,8.5184,7.9209
2023-05-01 04:00:00+00:00,6.93,-0.001441,8.468,7.9192


In [59]:
#Create the features set
#Assign a copy of the SMA25 and SMA100 columns to a new DataFrame called X
X = cvna_closing_prices[['SMA25', 'SMA100']].shift().dropna().copy()

#Display sample data
display(X.head())
display(X.tail())

Unnamed: 0_level_0,SMA25,SMA100
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-09-24 04:00:00+00:00,61.8308,44.2783
2018-09-25 04:00:00+00:00,61.9956,44.6067
2018-09-26 04:00:00+00:00,62.0276,44.9382
2018-09-27 04:00:00+00:00,62.1384,45.295
2018-09-28 04:00:00+00:00,62.2196,45.6426


Unnamed: 0_level_0,SMA25,SMA100
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-04-25 04:00:00+00:00,8.6156,7.9399
2023-04-26 04:00:00+00:00,8.6348,7.942
2023-04-27 04:00:00+00:00,8.6184,7.9401
2023-04-28 04:00:00+00:00,8.5748,7.9322
2023-05-01 04:00:00+00:00,8.5184,7.9209


In [60]:
#Create target set
#Create a new column in the signals_cvna called signal setting its value to zero
cvna_closing_prices["Signal"] = 0.0

In [61]:
#Create the signal to buy
cvna_closing_prices.loc[(cvna_closing_prices["actual_returns"] >= 0), "Signal"] = 1

In [62]:
#Create the signal to sell
cvna_closing_prices.loc[(cvna_closing_prices["actual_returns"] < 0), "Signal"] = -1

In [63]:
#Copy the new signal column to a new SEries called y
y = cvna_closing_prices["Signal"].copy()
y.tail()

timestamp
2023-04-25 04:00:00+00:00   -1.0
2023-04-26 04:00:00+00:00   -1.0
2023-04-27 04:00:00+00:00   -1.0
2023-04-28 04:00:00+00:00   -1.0
2023-05-01 04:00:00+00:00   -1.0
Name: Signal, dtype: float64

## Split the Data Into Training & Testing Datasets

### Creating the Training Datasets

In [64]:
# Imports
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset

In [65]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2018-09-24 04:00:00+00:00


In [66]:
X.head()

Unnamed: 0_level_0,SMA25,SMA100
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-09-24 04:00:00+00:00,61.8308,44.2783
2018-09-25 04:00:00+00:00,61.9956,44.6067
2018-09-26 04:00:00+00:00,62.0276,44.9382
2018-09-27 04:00:00+00:00,62.1384,45.295
2018-09-28 04:00:00+00:00,62.2196,45.6426


In [67]:
# Select the ending period for the training data with an offset of "" months
training_end = X.index.min() + DateOffset(months=30)

# Display the training end date
print(training_end)

2021-03-24 04:00:00+00:00


In [68]:
# Generate the X_train and y_train DataFrames
X_train1 = X.loc[training_begin:training_end]
y_train1 = y.loc[training_begin:training_end]

# Display sample data
X_train1.head()

Unnamed: 0_level_0,SMA25,SMA100
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-09-24 04:00:00+00:00,61.8308,44.2783
2018-09-25 04:00:00+00:00,61.9956,44.6067
2018-09-26 04:00:00+00:00,62.0276,44.9382
2018-09-27 04:00:00+00:00,62.1384,45.295
2018-09-28 04:00:00+00:00,62.2196,45.6426


### Creating the Training Datasets

In [69]:
# Generate the X_test and y_test DataFrames
X_test1 = X.loc[training_end:]
y_test1 = y.loc[training_end:]

# Display sample data
X_test1.head()

Unnamed: 0_level_0,SMA25,SMA100
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-03-24 04:00:00+00:00,280.0312,258.5408
2021-03-25 04:00:00+00:00,278.3208,259.0419
2021-03-26 04:00:00+00:00,276.7592,259.6936
2021-03-29 04:00:00+00:00,274.7696,260.4301
2021-03-30 04:00:00+00:00,273.4404,261.0671


### Standardizing the Data

In [70]:
# Create a StandardScaler instance
scaler1 = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler1 = scaler.fit(X_train1)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled1 = X_scaler.transform(X_train1)
X_test_scaled1 = X_scaler.transform(X_test1)

## Introduce the Machine Learning Model: Logistic Regression

In [71]:
# Import LogisticRegression from sklearn
from sklearn.linear_model import LogisticRegression

In [72]:
# Create an instance of the LogisticRegression model
logistic_regression_model = LogisticRegression()

In [73]:
# Fit the LogisticRegression model
logistic_regression_model.fit(X_train_scaled1, y_train1)

LogisticRegression()

In [74]:
# Use the trained LogisticRegression model to predict the trading signals for the training data
lr_training_signal_predictions = logistic_regression_model.predict(X_train_scaled1)

# Display the predictions
lr_training_signal_predictions

array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1

In [75]:
# Generate a classification report using the training data and the logistic regression model's predications
lr_training_report = classification_report(y_train1, lr_training_signal_predictions)

# Review the classification report
print(lr_training_report)

              precision    recall  f1-score   support

        -1.0       0.50      0.18      0.27       297
         1.0       0.53      0.84      0.65       332

    accuracy                           0.53       629
   macro avg       0.52      0.51      0.46       629
weighted avg       0.52      0.53      0.47       629



## Backtest the Logistic Regression Model 

In [76]:
# Use the trained model to predict the trading signals for the testing data.
lr_testing_signal_predictions = logistic_regression_model.predict(X_test_scaled1)

In [77]:
# Generate a classification report using the testing data and the logistic regression model's predictions
lr_testing_report = classification_report(y_test1, lr_testing_signal_predictions)

# Review the testing classification report
print(lr_testing_report)

              precision    recall  f1-score   support

        -1.0       0.52      0.09      0.16       285
         1.0       0.46      0.90      0.61       245

    accuracy                           0.47       530
   macro avg       0.49      0.50      0.38       530
weighted avg       0.49      0.47      0.37       530



In [78]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test1.index)
predictions_df["predicted_signal"] = lr_testing_signal_predictions
predictions_df["actual_returns"] = cvna_closing_prices["actual_returns"]
predictions_df["trading_algorithm_returns"] = predictions_df["actual_returns"] * predictions_df["predicted_signal"]
predictions_df.head()
predictions_df.tail()

Unnamed: 0_level_0,predicted_signal,actual_returns,trading_algorithm_returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-04-25 04:00:00+00:00,-1.0,-0.097969,0.097969
2023-04-26 04:00:00+00:00,-1.0,-0.003974,0.003974
2023-04-27 04:00:00+00:00,-1.0,-0.023936,0.023936
2023-04-28 04:00:00+00:00,-1.0,-0.054496,0.054496
2023-05-01 04:00:00+00:00,-1.0,-0.001441,0.001441


In [79]:
#Calcualte the cumulative returns for the actual returns and trading algorithm returns
cumulative_df = pd.DataFrame(index=X_test1.index)
cumulative_df['Actual Cumulative Returns'] = (1 + predictions_df[['actual_returns']]).cumprod()
cumulative_df['Algo Cumulative Returns'] = (1 + predictions_df[['trading_algorithm_returns']]).cumprod()

#Review the cumulative returns dataframe
cumulative_df.tail()

Unnamed: 0_level_0,Actual Cumulative Returns,Algo Cumulative Returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-04-25 04:00:00+00:00,0.027538,0.045533
2023-04-26 04:00:00+00:00,0.027428,0.045714
2023-04-27 04:00:00+00:00,0.026772,0.046808
2023-04-28 04:00:00+00:00,0.025313,0.049359
2023-05-01 04:00:00+00:00,0.025276,0.04943


In [80]:
#Plot the Google Search Trends in Comparison to the Cumulative Returns
carvana_search_df.hvplot() + cumulative_df.hvplot()

# Back Test the Actual Trading Strategy


In [81]:
#Create the dataframe for the Actual Cumualtive Returns
actual_backtest = pd.concat([cvna_closing_prices, cumulative_df], axis='columns', join='inner')

#Drop the Algo Cumulative Returns Column
actual_backtest.drop(columns='Algo Cumulative Returns', inplace=True)

# Calculate the points in time at which a position should be taken, 1 or -1
actual_backtest['Entry/Exit'] = actual_backtest['Signal'].diff()
pd.set_option('display.max_rows', None)

#Review the dataframe
actual_backtest.tail()

Unnamed: 0_level_0,CVNA,actual_returns,SMA25,SMA100,Signal,Actual Cumulative Returns,Entry/Exit
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-04-25 04:00:00+00:00,7.55,-0.097969,8.6348,7.942,-1.0,0.027538,-2.0
2023-04-26 04:00:00+00:00,7.52,-0.003974,8.6184,7.9401,-1.0,0.027428,0.0
2023-04-27 04:00:00+00:00,7.34,-0.023936,8.5748,7.9322,-1.0,0.026772,0.0
2023-04-28 04:00:00+00:00,6.94,-0.054496,8.5184,7.9209,-1.0,0.025313,0.0
2023-05-01 04:00:00+00:00,6.93,-0.001441,8.468,7.9192,-1.0,0.025276,0.0


In [82]:
# Set the intial capital 
initial_capital = float(100000)

# Set the share size
share_size = 500

#Take the share position where the dual moving average crossover is 1
actual_backtest['Position'] = share_size * actual_backtest['Signal']

# Find the points in time where a 500 share position is bought or sold
actual_backtest["Entry/Exit Position"] = actual_backtest["Position"].diff()

# Multiply share price by entry/exit positions and get the cumulatively sum
actual_backtest["Portfolio Holdings"] = (
    actual_backtest["CVNA"] * actual_backtest["Entry/Exit Position"].cumsum()
)

# Subtract the initial capital by the portfolio holdings to get the amount of liquid cash in the portfolio
actual_backtest["Portfolio Cash"] = (
    initial_capital - (actual_backtest["CVNA"] * actual_backtest["Entry/Exit Position"]).cumsum()
)

# Get the total portfolio value by adding the cash amount by the portfolio holdings (or investments)
actual_backtest["Portfolio Total"] = (
   actual_backtest["Portfolio Cash"] + actual_backtest["Portfolio Holdings"]
)

# Calculate the portfolio daily returns
actual_backtest["Portfolio Daily Returns"] = actual_backtest["Portfolio Total"].pct_change()

# Calculate the cumulative returns
actual_backtest["Portfolio Cumulative Returns"] = (
    1 + actual_backtest["Portfolio Daily Returns"]
).cumprod() - 1

# Print the DataFrame
actual_backtest.tail()

Unnamed: 0_level_0,CVNA,actual_returns,SMA25,SMA100,Signal,Actual Cumulative Returns,Entry/Exit,Position,Entry/Exit Position,Portfolio Holdings,Portfolio Cash,Portfolio Total,Portfolio Daily Returns,Portfolio Cumulative Returns
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-04-25 04:00:00+00:00,7.55,-0.097969,8.6348,7.942,-1.0,0.027538,-2.0,-500.0,-1000.0,0.0,6050.0,6050.0,-0.11936,-0.9395
2023-04-26 04:00:00+00:00,7.52,-0.003974,8.6184,7.9401,-1.0,0.027428,0.0,-500.0,0.0,0.0,6050.0,6050.0,0.0,-0.9395
2023-04-27 04:00:00+00:00,7.34,-0.023936,8.5748,7.9322,-1.0,0.026772,0.0,-500.0,0.0,0.0,6050.0,6050.0,0.0,-0.9395
2023-04-28 04:00:00+00:00,6.94,-0.054496,8.5184,7.9209,-1.0,0.025313,0.0,-500.0,0.0,0.0,6050.0,6050.0,0.0,-0.9395
2023-05-01 04:00:00+00:00,6.93,-0.001441,8.468,7.9192,-1.0,0.025276,0.0,-500.0,0.0,0.0,6050.0,6050.0,0.0,-0.9395


## Plot Entry/Exit Points of Actual Backtest Results

In [83]:
# Visualize entry postions relative to closing price
entry  = actual_backtest[actual_backtest['Entry/Exit'] == 2.0]['CVNA'].hvplot.scatter(
    color='purple',
    marker='^',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

# Visualize exit positions relative to close price
exit = actual_backtest[actual_backtest['Entry/Exit'] == -2.0]['CVNA'].hvplot.scatter(
    color='orange',
    marker='v',
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400)

# Visualize the close price for the investment
security_close = actual_backtest[['CVNA']].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize the moving averages
moving_avgs = actual_backtest[['SMA25', 'SMA100']].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay the plots
portfolio_entry_exit_plot = security_close * moving_avgs * entry * exit
portfolio_entry_exit_plot.opts(
    title="VAQ Short-Position Actual - Total Portfolio Value",
    yformatter='%.0f'
) + carvana_search_df.hvplot(width=1000,
    height=400)

# Back Test the Algorithmic Trading Strategy

In [None]:
#Create the dataframe for the Actual Cumualtive Returns
algo_backtest = pd.concat([cvna_closing_prices, cumulative_df], axis='columns', join='inner')

#Drop the Algo Cumulative Returns Column
actual_backtest.drop(columns= 'Actual Cumulative Returns', inplace=True)

# Calculate the points in time at which a position should be taken, 1 or -1
actual_backtest['Entry/Exit'] = actual_backtest['Signal'].diff()
pd.set_option('display.max_rows', None)

#Review the dataframe
actual_backtest.tail()