In [17]:
import pandas as pd
import numpy as np
import hvplot.pandas
import yfinance as yf
import datetime

In [18]:
# Define the stock symbol and create a Yahoo Finance ticker object
symbol = "XLRE"
ticker = yf.Ticker(symbol)

# Get historical stock data for the last 5 years
historical_data = ticker.history(period="5y")

# Print the first few rows of the historical data
print(historical_data.head())

                                Open       High        Low      Close  \
Date                                                                    
2018-09-17 00:00:00-04:00  28.304342  28.574712  28.300117  28.532467   
2018-09-18 00:00:00-04:00  28.490217  28.549360  28.304336  28.363480   
2018-09-19 00:00:00-04:00  28.371930  28.380377  28.046639  28.093109   
2018-09-20 00:00:00-04:00  28.101554  28.321231  27.991719  28.304333   
2018-09-21 00:00:00-04:00  28.321408  28.436677  28.210413  28.304335   

                            Volume  Dividends  Stock Splits  Capital Gains  
Date                                                                        
2018-09-17 00:00:00-04:00  2338800       0.00           0.0            0.0  
2018-09-18 00:00:00-04:00  1451800       0.00           0.0            0.0  
2018-09-19 00:00:00-04:00  2033100       0.00           0.0            0.0  
2018-09-20 00:00:00-04:00  1928400       0.00           0.0            0.0  
2018-09-21 00:00:00-04:00 

In [19]:
# Calculate daily returns",
historical_data['Daily_Return'] = historical_data['Close'].pct_change()
#Print the first few rows of the data with daily returns
print(historical_data.head())

                                Open       High        Low      Close  \
Date                                                                    
2018-09-17 00:00:00-04:00  28.304342  28.574712  28.300117  28.532467   
2018-09-18 00:00:00-04:00  28.490217  28.549360  28.304336  28.363480   
2018-09-19 00:00:00-04:00  28.371930  28.380377  28.046639  28.093109   
2018-09-20 00:00:00-04:00  28.101554  28.321231  27.991719  28.304333   
2018-09-21 00:00:00-04:00  28.321408  28.436677  28.210413  28.304335   

                            Volume  Dividends  Stock Splits  Capital Gains  \
Date                                                                         
2018-09-17 00:00:00-04:00  2338800       0.00           0.0            0.0   
2018-09-18 00:00:00-04:00  1451800       0.00           0.0            0.0   
2018-09-19 00:00:00-04:00  2033100       0.00           0.0            0.0   
2018-09-20 00:00:00-04:00  1928400       0.00           0.0            0.0   
2018-09-21 00:00:00-

In [20]:
# Convert to DataFrame and Reset Index
df = pd.DataFrame(historical_data)
df = df.reset_index()
df = df.drop(columns = ["Open", "High", "Low", "Volume", "Dividends", "Stock Splits", "Capital Gains"])
df.head()

Unnamed: 0,Date,Close,Daily_Return
0,2018-09-17 00:00:00-04:00,28.532467,
1,2018-09-18 00:00:00-04:00,28.36348,-0.005922631
2,2018-09-19 00:00:00-04:00,28.093109,-0.009532345
3,2018-09-20 00:00:00-04:00,28.304333,0.007518698
4,2018-09-21 00:00:00-04:00,28.304335,6.738716e-08


In [21]:
# Calculate the total return over the period
total_return = (1 + df['Daily_Return']).prod() - 1

# Determine the number of years in the investment horizon
years = (df['Date'].max() - df['Date'].min()).days / 365

# Calculate annualized return using the formula: ((1 + Total Return)^(1 / Years) - 1)
annualized_return = (pow(1 + total_return, 1 / years) - 1) * 100 

print(years)
print(annualized_return)

4.994520547945205
5.301020849517357


In [22]:
# Calculate Cumulative Return and add as a Column
df['Cumulative_Return'] = (1 + df['Daily_Return']).cumprod() - 1

df.head()

Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return
0,2018-09-17 00:00:00-04:00,28.532467,,
1,2018-09-18 00:00:00-04:00,28.36348,-0.005922631,-0.005923
2,2018-09-19 00:00:00-04:00,28.093109,-0.009532345,-0.015399
3,2018-09-20 00:00:00-04:00,28.304333,0.007518698,-0.007996
4,2018-09-21 00:00:00-04:00,28.304335,6.738716e-08,-0.007996


In [23]:
# Calculate the standard deviation of daily returns
daily_volatility = df['Daily_Return'].std()

# Annualize the volatility
annual_volatility = daily_volatility * np.sqrt(252)

print(daily_volatility)
print(annual_volatility)

0.01545704864499105
0.24537304030604534


In [24]:
# Calculate the average daily return
average_daily_return = df['Daily_Return'].mean()

# Assume a daily risk free rate
daily_risk_free_rate = 0.001

# Calculate the Sharpe ratio
sharpe_ratio = (average_daily_return - daily_risk_free_rate) / daily_volatility

print(average_daily_return)
print(sharpe_ratio)

0.0003259381372409796
-0.043608704238467556


In [25]:
# Calculate downside risk (standard deviation of negative returns)
negative_returns = df[df['Daily_Return'] < 0]['Daily_Return']
downside_risk = negative_returns.std()

# Calculate the Sortino ratio
sortino_ratio = (average_daily_return - daily_risk_free_rate) / downside_risk

print(sortino_ratio)

-0.05390780999182525


In [26]:
df = df.set_index(["Date"])

df['SMA_Short'] = df['Close'].rolling(window=49).mean()
df['SMA_Long'] = df['Close'].rolling(window=99).mean()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-17 00:00:00-04:00,28.532467,,,,
2018-09-18 00:00:00-04:00,28.36348,-0.005922631,-0.005923,,
2018-09-19 00:00:00-04:00,28.093109,-0.009532345,-0.015399,,
2018-09-20 00:00:00-04:00,28.304333,0.007518698,-0.007996,,
2018-09-21 00:00:00-04:00,28.304335,6.738716e-08,-0.007996,,


In [27]:
# Create a binary target variable: 1 for buy signal, -1 for sell signal

df['Signal'] = np.where(df['SMA_Short'] > df['SMA_Long'], 1, -1)
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-09-17 00:00:00-04:00,28.532467,,,,,-1
2018-09-18 00:00:00-04:00,28.36348,-0.005922631,-0.005923,,,-1
2018-09-19 00:00:00-04:00,28.093109,-0.009532345,-0.015399,,,-1
2018-09-20 00:00:00-04:00,28.304333,0.007518698,-0.007996,,,-1
2018-09-21 00:00:00-04:00,28.304335,6.738716e-08,-0.007996,,,-1


In [28]:
# Drop NaN values and shift target variable by one day
df['Signal'] = df['Signal'].shift(-1)
df = df.dropna()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-02-07 00:00:00-05:00,29.863295,0.00846,0.046643,28.097252,27.86677,1.0
2019-02-08 00:00:00-05:00,29.863295,0.0,0.046643,28.127848,27.880212,1.0
2019-02-11 00:00:00-05:00,29.949682,0.002893,0.04967,28.155502,27.896235,1.0
2019-02-12 00:00:00-05:00,29.742353,-0.006923,0.042404,28.178228,27.912894,1.0
2019-02-13 00:00:00-05:00,29.941044,0.00668,0.049368,28.199433,27.929426,1.0


In [29]:
# Prepare data for machine learning
X = df.drop(columns = (["Close", "Daily_Return", "Cumulative_Return", "Signal"]))
y = df["Signal"]
display(X)
display(y)

Unnamed: 0_level_0,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-07 00:00:00-05:00,28.097252,27.866770
2019-02-08 00:00:00-05:00,28.127848,27.880212
2019-02-11 00:00:00-05:00,28.155502,27.896235
2019-02-12 00:00:00-05:00,28.178228,27.912894
2019-02-13 00:00:00-05:00,28.199433,27.929426
...,...,...
2023-09-07 00:00:00-04:00,37.529796,37.012679
2023-09-08 00:00:00-04:00,37.511837,37.008699
2023-09-11 00:00:00-04:00,37.490408,37.002720
2023-09-12 00:00:00-04:00,37.462857,37.001243


Date
2019-02-07 00:00:00-05:00    1.0
2019-02-08 00:00:00-05:00    1.0
2019-02-11 00:00:00-05:00    1.0
2019-02-12 00:00:00-05:00    1.0
2019-02-13 00:00:00-05:00    1.0
                            ... 
2023-09-07 00:00:00-04:00    1.0
2023-09-08 00:00:00-04:00    1.0
2023-09-11 00:00:00-04:00    1.0
2023-09-12 00:00:00-04:00    1.0
2023-09-13 00:00:00-04:00    1.0
Name: Signal, Length: 1158, dtype: float64

In [35]:
# Imports
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.model_selection import train_test_split

In [36]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2019-02-07 00:00:00-05:00


In [37]:
# Select the ending period for the training data with an offset of "" months
training_end = X.index.min() + DateOffset(months=15)

# Display the training end date
print(training_end)

2020-05-07 00:00:00-04:00


In [38]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Display sample data
X_train.head()

Unnamed: 0_level_0,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-07 00:00:00-05:00,28.097252,27.86677
2019-02-08 00:00:00-05:00,28.127848,27.880212
2019-02-11 00:00:00-05:00,28.155502,27.896235
2019-02-12 00:00:00-05:00,28.178228,27.912894
2019-02-13 00:00:00-05:00,28.199433,27.929426


In [39]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [40]:
# Scale the data and transform
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [41]:
# Import LogisticRegression from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report


In [42]:
# Create an instance of the LogisticRegression model
logistic_regression_model = LogisticRegression()

In [43]:
# Fit the LogisticRegression model
logistic_regression_model.fit(X_train_scaled, y_train)

In [44]:
# Use the trained LogisticRegression model to predict the trading signals for the training data
lr_training_signal_predictions = logistic_regression_model.predict(X_train_scaled)

# Display the predictions
lr_training_signal_predictions

array([ 1., -1., -1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1., -1.,
        1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1., -1.,  1.,  1., -1.,
       -1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,
       -1.,  1., -1.,  1.,  1.,  1., -1., -1.,  1., -1., -1.,  1.,  1.,
        1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1., -1.,  1., -1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1., -1.,  1.,  1.,
        1.,  1.,  1., -1.,  1., -1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1., -1., -1., -1.,  1.,
        1.,  1.,  1., -1.,  1.,  1., -1., -1.,  1.,  1.,  1.,  1

In [45]:
# Generate a classification report using the training data and the logistic regression model's predications
lr_training_report = classification_report(y_train, lr_training_signal_predictions)

# Review the classification report
print(lr_training_report)

              precision    recall  f1-score   support

        -1.0       1.00      0.78      0.88       283
         1.0       0.91      1.00      0.95       643

    accuracy                           0.93       926
   macro avg       0.96      0.89      0.92       926
weighted avg       0.94      0.93      0.93       926



In [46]:
# Use the trained model to predict the trading signals for the testing data.
lr_testing_signal_predictions = logistic_regression_model.predict(X_test_scaled)

In [47]:
# Generate a classification report using the testing data and the logistic regression model's predictions
lr_testing_report = classification_report(y_test, lr_testing_signal_predictions)

# Review the testing classification report
print(lr_testing_report)

              precision    recall  f1-score   support

        -1.0       1.00      0.80      0.89        75
         1.0       0.91      1.00      0.95       157

    accuracy                           0.94       232
   macro avg       0.96      0.90      0.92       232
weighted avg       0.94      0.94      0.93       232



In [51]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df["Signal"] = lr_testing_signal_predictions
predictions_df['Daily Returns'] = df['Daily_Return']
predictions_df["Trading Algorithm Returns"] = predictions_df['Daily Returns'] * predictions_df["Signal"]
predictions_df.tail()

Unnamed: 0_level_0,Signal,Daily Returns,Trading Algorithm Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-06-14 00:00:00-04:00,1.0,0.00426,0.00426
2019-07-09 00:00:00-04:00,1.0,0.005284,0.005284
2022-02-24 00:00:00-05:00,1.0,0.017281,0.017281
2021-04-29 00:00:00-04:00,1.0,0.007349,0.007349
2021-04-09 00:00:00-04:00,1.0,0.00149,0.00149


In [52]:
#Calcualte the cumulative returns for the actual returns and trading algorithm returns
cumulative_df = pd.DataFrame(index=X_test.index)
cumulative_df['Daily Cumulative Returns'] = (1 + predictions_df[['Daily Returns']]).cumprod()
cumulative_df['Algo Cumulative Returns'] = (1 + predictions_df[['Trading Algorithm Returns']]).cumprod()

#Review the cumulative returns dataframe
cumulative_df.tail()

Unnamed: 0_level_0,Daily Cumulative Returns,Algo Cumulative Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-06-14 00:00:00-04:00,0.929574,1.106412
2019-07-09 00:00:00-04:00,0.934486,1.112259
2022-02-24 00:00:00-05:00,0.950635,1.131479
2021-04-29 00:00:00-04:00,0.957621,1.139795
2021-04-09 00:00:00-04:00,0.959048,1.141493
