In [45]:
import pandas as pd
import numpy as np
import hvplot.pandas
import yfinance as yf
import datetime

In [46]:
# Define the stock symbol and create a Yahoo Finance ticker object
symbol = "XLRE"
ticker = yf.Ticker(symbol)

# Get historical stock data for the last 5 years
historical_data = ticker.history(period="5y")

# Print the first few rows of the historical data
print(historical_data.head())

                                Open       High        Low      Close  \
Date                                                                    
2018-09-17 00:00:00-04:00  28.304331  28.574700  28.300105  28.532455   
2018-09-18 00:00:00-04:00  28.490217  28.549360  28.304336  28.363480   
2018-09-19 00:00:00-04:00  28.371924  28.380371  28.046633  28.093103   
2018-09-20 00:00:00-04:00  28.101554  28.321231  27.991719  28.304333   
2018-09-21 00:00:00-04:00  28.321416  28.436684  28.210421  28.304342   

                            Volume  Dividends  Stock Splits  Capital Gains  
Date                                                                        
2018-09-17 00:00:00-04:00  2338800       0.00           0.0            0.0  
2018-09-18 00:00:00-04:00  1451800       0.00           0.0            0.0  
2018-09-19 00:00:00-04:00  2033100       0.00           0.0            0.0  
2018-09-20 00:00:00-04:00  1928400       0.00           0.0            0.0  
2018-09-21 00:00:00-04:00 

In [3]:
# Calculate daily returns",
historical_data['Daily_Return'] = historical_data['Close'].pct_change()
#Print the first few rows of the data with daily returns
print(historical_data.head())

                                Open       High        Low      Close  \
Date                                                                    
2018-09-17 00:00:00-04:00  28.304340  28.574710  28.300115  28.532465   
2018-09-18 00:00:00-04:00  28.490221  28.549364  28.304340  28.363483   
2018-09-19 00:00:00-04:00  28.371931  28.380379  28.046641  28.093111   
2018-09-20 00:00:00-04:00  28.101558  28.321235  27.991722  28.304337   
2018-09-21 00:00:00-04:00  28.321412  28.436680  28.210417  28.304338   

                            Volume  Dividends  Stock Splits  Capital Gains  \
Date                                                                         
2018-09-17 00:00:00-04:00  2338800       0.00           0.0            0.0   
2018-09-18 00:00:00-04:00  1451800       0.00           0.0            0.0   
2018-09-19 00:00:00-04:00  2033100       0.00           0.0            0.0   
2018-09-20 00:00:00-04:00  1928400       0.00           0.0            0.0   
2018-09-21 00:00:00-

In [4]:
# Convert to DataFrame and Reset Index
df = pd.DataFrame(historical_data)
df = df.reset_index()
df = df.drop(columns = ["Open", "High", "Low", "Volume", "Dividends", "Stock Splits", "Capital Gains"])
df.head()

Unnamed: 0,Date,Close,Daily_Return
0,2018-09-17 00:00:00-04:00,28.532465,
1,2018-09-18 00:00:00-04:00,28.363483,-0.005922431
2,2018-09-19 00:00:00-04:00,28.093111,-0.009532411
3,2018-09-20 00:00:00-04:00,28.304337,0.007518765
4,2018-09-21 00:00:00-04:00,28.304338,6.738715e-08


In [5]:
# Calculate the total return over the period
total_return = (1 + df['Daily_Return']).prod() - 1

# Determine the number of years in the investment horizon
years = (df['Date'].max() - df['Date'].min()).days / 365

# Calculate annualized return using the formula: ((1 + Total Return)^(1 / Years) - 1)
annualized_return = (pow(1 + total_return, 1 / years) - 1) * 100 

print(years)
print(annualized_return)

4.994520547945205
5.301022258902055


In [6]:
# Calculate Cumulative Return and add as a Column
df['Cumulative_Return'] = (1 + df['Daily_Return']).cumprod() - 1

df.head()

Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return
0,2018-09-17 00:00:00-04:00,28.532465,,
1,2018-09-18 00:00:00-04:00,28.363483,-0.005922431,-0.005922
2,2018-09-19 00:00:00-04:00,28.093111,-0.009532411,-0.015398
3,2018-09-20 00:00:00-04:00,28.304337,0.007518765,-0.007995
4,2018-09-21 00:00:00-04:00,28.304338,6.738715e-08,-0.007995


In [7]:
# Calculate the standard deviation of daily returns
daily_volatility = df['Daily_Return'].std()

# Annualize the volatility
annual_volatility = daily_volatility * np.sqrt(252)

print(daily_volatility)
print(annual_volatility)

0.015457050117549402
0.2453730636821845


In [8]:
# Calculate the average daily return
average_daily_return = df['Daily_Return'].mean()

# Assume a daily risk free rate
daily_risk_free_rate = 0.001

# Calculate the Sharpe ratio
sharpe_ratio = (average_daily_return - daily_risk_free_rate) / daily_volatility

print(average_daily_return)
print(sharpe_ratio)

0.0003259382126522056
-0.04360869520520529


In [9]:
# Calculate downside risk (standard deviation of negative returns)
negative_returns = df[df['Daily_Return'] < 0]['Daily_Return']
negative_returns.head()

1   -0.005922
2   -0.009532
5   -0.019608
6   -0.000923
7   -0.011087
Name: Daily_Return, dtype: float64

In [10]:
print(len(negative_returns))

570


In [11]:
print(len(df))

1257


In [12]:
print(len(df['Daily_Return']))

1257


In [13]:
downside_risk = negative_returns.std()

# Calculate the Sortino ratio
sortino_ratio = (average_daily_return - daily_risk_free_rate) / downside_risk

print(sortino_ratio)

-0.05390782308381583


In [14]:
short_window = 49
long_window = 99

In [15]:
df['Signal'] = 0.0

In [16]:


df['SMA_Short'] = df['Close'].rolling(window=short_window).mean()
df['SMA_Long'] = df['Close'].rolling(window=long_window).mean()
df.head()

Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return,Signal,SMA_Short,SMA_Long
0,2018-09-17 00:00:00-04:00,28.532465,,,0.0,,
1,2018-09-18 00:00:00-04:00,28.363483,-0.005922431,-0.005922,0.0,,
2,2018-09-19 00:00:00-04:00,28.093111,-0.009532411,-0.015398,0.0,,
3,2018-09-20 00:00:00-04:00,28.304337,0.007518765,-0.007995,0.0,,
4,2018-09-21 00:00:00-04:00,28.304338,6.738715e-08,-0.007995,0.0,,


In [17]:
# Create a binary target variable: 1 for buy signal, -1 for sell signal

df['Signal'][short_window:] = np.where(df['SMA_Short'][short_window:] > df['SMA_Long'][short_window:], 1.0, -1.0)
df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Signal'][short_window:] = np.where(df['SMA_Short'][short_window:] > df['SMA_Long'][short_window:], 1.0, -1.0)


Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return,Signal,SMA_Short,SMA_Long
0,2018-09-17 00:00:00-04:00,28.532465,,,0.0,,
1,2018-09-18 00:00:00-04:00,28.363483,-0.005922431,-0.005922,0.0,,
2,2018-09-19 00:00:00-04:00,28.093111,-0.009532411,-0.015398,0.0,,
3,2018-09-20 00:00:00-04:00,28.304337,0.007518765,-0.007995,0.0,,
4,2018-09-21 00:00:00-04:00,28.304338,6.738715e-08,-0.007995,0.0,,


In [18]:
# Drop NaN values and shift target variable by one day
df['Signal'] = df['Signal'].shift(-1)
df = df.dropna()
df.head()

Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return,Signal,SMA_Short,SMA_Long
98,2019-02-07 00:00:00-05:00,29.863298,0.00846,0.046643,1.0,28.097253,27.86677
99,2019-02-08 00:00:00-05:00,29.863298,0.0,0.046643,1.0,28.127849,27.880213
100,2019-02-11 00:00:00-05:00,29.949678,0.002893,0.04967,1.0,28.155503,27.896235
101,2019-02-12 00:00:00-05:00,29.742359,-0.006922,0.042404,1.0,28.178229,27.912894
102,2019-02-13 00:00:00-05:00,29.941032,0.00668,0.049367,1.0,28.199433,27.929427


In [19]:
df = df.set_index(["Date"])

In [20]:
# Prepare data for machine learning
X = df.drop(columns = (["Close", "Daily_Return", "Cumulative_Return", "Signal"]))
y = df["Signal"]
display(X)
display(y)

Unnamed: 0_level_0,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-07 00:00:00-05:00,28.097253,27.866770
2019-02-08 00:00:00-05:00,28.127849,27.880213
2019-02-11 00:00:00-05:00,28.155503,27.896235
2019-02-12 00:00:00-05:00,28.178229,27.912894
2019-02-13 00:00:00-05:00,28.199433,27.929427
...,...,...
2023-09-07 00:00:00-04:00,37.529796,37.012679
2023-09-08 00:00:00-04:00,37.511837,37.008699
2023-09-11 00:00:00-04:00,37.490408,37.002720
2023-09-12 00:00:00-04:00,37.462857,37.001243


Date
2019-02-07 00:00:00-05:00    1.0
2019-02-08 00:00:00-05:00    1.0
2019-02-11 00:00:00-05:00    1.0
2019-02-12 00:00:00-05:00    1.0
2019-02-13 00:00:00-05:00    1.0
                            ... 
2023-09-07 00:00:00-04:00    1.0
2023-09-08 00:00:00-04:00    1.0
2023-09-11 00:00:00-04:00    1.0
2023-09-12 00:00:00-04:00    1.0
2023-09-13 00:00:00-04:00    1.0
Name: Signal, Length: 1158, dtype: float64

In [21]:
# Imports
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset

In [22]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2019-02-07 00:00:00-05:00


In [23]:
# Select the ending period for the training data with an offset of "" months
training_end = np.add(training_begin, DateOffset(months=18))

# Display the training end date
print(training_end)

2020-08-07 00:00:00-04:00


In [24]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Display sample data
X_train.head()

Unnamed: 0_level_0,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-07 00:00:00-05:00,28.097253,27.86677
2019-02-08 00:00:00-05:00,28.127849,27.880213
2019-02-11 00:00:00-05:00,28.155503,27.896235
2019-02-12 00:00:00-05:00,28.178229,27.912894
2019-02-13 00:00:00-05:00,28.199433,27.929427


In [26]:
# Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [27]:
# Scale the data and transform
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [28]:
# Import LogisticRegression from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [29]:
# Create an instance of the LogisticRegression model
logistic_regression_model = LogisticRegression()

In [30]:
# Fit the LogisticRegression model
logistic_regression_model.fit(X_train_scaled, y_train)

In [31]:
# Use the trained LogisticRegression model to predict the trading signals for the training data
lr_training_signal_predictions = logistic_regression_model.predict(X_train_scaled)

# Display the predictions
lr_training_signal_predictions

array([ 1., -1., -1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1., -1.,
        1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1., -1.,  1.,  1., -1.,
       -1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,
       -1.,  1., -1.,  1.,  1.,  1., -1., -1.,  1., -1., -1.,  1.,  1.,
        1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1., -1.,  1., -1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1., -1.,  1.,  1.,
        1.,  1.,  1., -1.,  1., -1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1., -1., -1., -1.,  1.,
        1.,  1.,  1., -1.,  1.,  1., -1., -1.,  1.,  1.,  1.,  1

In [32]:
# Generate a classification report using the training data and the logistic regression model's predications
lr_training_report = classification_report(y_train, lr_training_signal_predictions)

# Review the classification report
print(lr_training_report)

              precision    recall  f1-score   support

        -1.0       1.00      0.78      0.88       283
         1.0       0.91      1.00      0.95       643

    accuracy                           0.93       926
   macro avg       0.96      0.89      0.92       926
weighted avg       0.94      0.93      0.93       926



In [33]:
# Use the trained model to predict the trading signals for the testing data.
lr_testing_signal_predictions = logistic_regression_model.predict(X_test_scaled)

In [34]:
# Generate a classification report using the testing data and the logistic regression model's predictions
lr_testing_report = classification_report(y_test, lr_testing_signal_predictions)

# Review the testing classification report
print(lr_testing_report)

              precision    recall  f1-score   support

        -1.0       1.00      0.80      0.89        75
         1.0       0.91      1.00      0.95       157

    accuracy                           0.94       232
   macro avg       0.96      0.90      0.92       232
weighted avg       0.94      0.94      0.93       232



In [36]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df["Signal"] = lr_testing_signal_predictions
predictions_df['Daily Returns'] = df['Daily_Return']
predictions_df["Trading Algorithm Returns"] = predictions_df['Daily Returns'] * predictions_df["Signal"]
predictions_df.tail()

Unnamed: 0_level_0,Signal,Daily Returns,Trading Algorithm Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-06-14 00:00:00-04:00,1.0,0.00426,0.00426
2019-07-09 00:00:00-04:00,1.0,0.005284,0.005284
2022-02-24 00:00:00-05:00,1.0,0.017281,0.017281
2021-04-29 00:00:00-04:00,1.0,0.00735,0.00735
2021-04-09 00:00:00-04:00,1.0,0.001489,0.001489


In [39]:
#Calcualte the cumulative returns for the actual returns and trading algorithm returns
cumulative_df = pd.DataFrame(index=X_test.index)
cumulative_df['Actual Cumulative Returns'] = (1 + predictions_df[['Daily Returns']]).cumprod()
cumulative_df['Algo Cumulative Returns'] = (1 + predictions_df[['Trading Algorithm Returns']]).cumprod()

#Review the cumulative returns dataframe
cumulative_df.tail()

Unnamed: 0_level_0,Actual Cumulative Returns,Algo Cumulative Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-06-14 00:00:00-04:00,0.929575,1.106413
2019-07-09 00:00:00-04:00,0.934487,1.11226
2022-02-24 00:00:00-05:00,0.950635,1.13148
2021-04-29 00:00:00-04:00,0.957622,1.139796
2021-04-09 00:00:00-04:00,0.959048,1.141494


In [40]:
cumulative_df.to_csv('XLRE_Returns_Df.csv', index=False)

In [41]:
#Create the dataframe for the Actual Cumualtive Returns
algo_backtest = pd.concat([df, cumulative_df], axis='columns', join='inner')

#Drop the Algo Cumulative Returns Column
algo_backtest.drop(columns='Algo Cumulative Returns', inplace=True)

# Calculate the points in time at which a position should be taken, 1 or -1
algo_backtest['Entry/Exit'] = algo_backtest['Signal'].diff()
pd.set_option('display.max_rows', None)

#Review the dataframe
algo_backtest.tail()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,Signal,SMA_Short,SMA_Long,Actual Cumulative Returns,Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-07-26 00:00:00-04:00,38.82,0.003619,0.360555,1.0,37.104828,36.78667,0.922225,0.0
2023-08-04 00:00:00-04:00,37.07,-0.009882,0.299222,1.0,37.398842,36.862138,1.04753,0.0
2023-08-22 00:00:00-04:00,35.990002,0.003066,0.26137,1.0,37.550976,37.015874,1.178095,0.0
2023-09-06 00:00:00-04:00,36.560001,-0.001911,0.281347,1.0,37.535918,37.014836,1.18928,0.0
2023-09-11 00:00:00-04:00,36.639999,0.000546,0.284151,1.0,37.490408,37.00272,1.235573,0.0


In [50]:
# Set the intial capital 
initial_capital = float(100000)

# Set the share size
share_size = 500

#Take the share position where the dual moving average crossover is 1
algo_backtest['Position'] = share_size * algo_backtest['Signal']

# Find the points in time where a 500 share position is bought or sold
algo_backtest["Entry/Exit Position"] = algo_backtest["Position"].diff()

# Multiply share price by entry/exit positions and get the cumulatively sum
algo_backtest["Portfolio Holdings"] = (
    algo_backtest["Close"] * algo_backtest["Entry/Exit Position"].cumsum()
)

# Subtract the initial capital by the portfolio holdings to get the amount of liquid cash in the portfolio
algo_backtest["Portfolio Cash"] = (
    initial_capital - (algo_backtest["Close"] * algo_backtest["Entry/Exit Position"]).cumsum()
)

# Get the total portfolio value by adding the cash amount by the portfolio holdings (or investments)
algo_backtest["Portfolio Total"] = (
   algo_backtest["Portfolio Cash"] + algo_backtest["Portfolio Holdings"]
)

# Calculate the portfolio daily returns
algo_backtest["Portfolio Daily Returns"] = algo_backtest["Portfolio Total"].pct_change()

# Calculate the cumulative returns
algo_backtest["Portfolio Cumulative Returns"] = (
    1 + algo_backtest["Portfolio Daily Returns"]
).cumprod() - 1

# Print the DataFrame
algo_backtest.tail()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,Signal,SMA_Short,SMA_Long,Actual Cumulative Returns,Entry/Exit,Position,Entry/Exit Position,Portfolio Holdings,Portfolio Cash,Portfolio Total,Portfolio Daily Returns,Portfolio Cumulative Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-07-26 00:00:00-04:00,38.82,0.003619,0.360555,1.0,37.104828,36.78667,0.922225,0.0,500.0,0.0,0.0,87968.975067,87968.975067,0.0,-0.12031
2023-08-04 00:00:00-04:00,37.07,-0.009882,0.299222,1.0,37.398842,36.862138,1.04753,0.0,500.0,0.0,0.0,87968.975067,87968.975067,0.0,-0.12031
2023-08-22 00:00:00-04:00,35.990002,0.003066,0.26137,1.0,37.550976,37.015874,1.178095,0.0,500.0,0.0,0.0,87968.975067,87968.975067,0.0,-0.12031
2023-09-06 00:00:00-04:00,36.560001,-0.001911,0.281347,1.0,37.535918,37.014836,1.18928,0.0,500.0,0.0,0.0,87968.975067,87968.975067,0.0,-0.12031
2023-09-11 00:00:00-04:00,36.639999,0.000546,0.284151,1.0,37.490408,37.00272,1.235573,0.0,500.0,0.0,0.0,87968.975067,87968.975067,0.0,-0.12031


In [51]:
# Create the list of the metric names
metrics1 = [
    'Annualized Return',
    'Cumulative Returns',
    'Annual Volatility',
    'Sharpe Ratio',
    'Sortino Ratio'
]

# Create a list that holds the column name
columns1 = ['Backtest']

# Initialize the DataFrame with index set to evaluation metrics and columns 
algo_evaluation_df = pd.DataFrame(index=metrics1, columns=columns1)

# Review the DataFrame
algo_evaluation_df

Unnamed: 0,Backtest
Annualized Return,
Cumulative Returns,
Annual Volatility,
Sharpe Ratio,
Sortino Ratio,


In [52]:
algo_evaluation_df.loc['Annualized Return'] = (
    algo_backtest['Portfolio Daily Returns'].mean() * 252
)

In [53]:
# Calculate the Cumulative returns metric
algo_evaluation_df.loc['Cumulative Returns'] = algo_backtest['Portfolio Cumulative Returns'][-1]

In [54]:
# Calculate the Annual volatility metric
algo_evaluation_df.loc['Annual Volatility'] = (
    algo_backtest['Portfolio Daily Returns'].std() * np.sqrt(252)
)

In [55]:
# Calculate the Sharpe ratio
algo_evaluation_df.loc['Sharpe Ratio'] = (
    algo_backtest['Portfolio Daily Returns'].mean() * 252) / (
    algo_backtest['Portfolio Daily Returns'].std() * np.sqrt(252)
)

In [56]:
# Calculate the Sortino ratio
# Start by calculating the downside return values

# Create a DataFrame that contains the Portfolio Daily Returns column
sortino_ratio_df = algo_backtest[['Portfolio Daily Returns']].copy()

# Create a column to hold downside return values
sortino_ratio_df.loc[:,'Downside Returns'] = 0

# Find Portfolio Daily Returns values less than 0, 
# square those values, and add them to the Downside Returns column
sortino_ratio_df.loc[sortino_ratio_df['Portfolio Daily Returns'] < 0, 
                     'Downside Returns'] = sortino_ratio_df['Portfolio Daily Returns']**2

# Calculate the annualized return value
annualized_return = sortino_ratio_df['Portfolio Daily Returns'].mean() * 252

# Calculate the annualized downside standard deviation value
downside_standard_deviation = np.sqrt(sortino_ratio_df['Downside Returns'].mean()) * np.sqrt(252)

# Divide the annualized return value by the downside standard deviation value
sortino_ratio = annualized_return/downside_standard_deviation

# Add the Sortino ratio to the evaluation DataFrame
algo_evaluation_df.loc['Sortino Ratio'] = sortino_ratio

In [57]:
# Review the portfolio evaluation DataFrame
algo_evaluation_df

Unnamed: 0,Backtest
Annualized Return,-0.129797
Cumulative Returns,-0.12031
Annual Volatility,0.1457
Sharpe Ratio,-0.890856
Sortino Ratio,-1.174871


In [58]:
algo_evaluation_df.to_csv('XLRE_Backtest.c', index=False)