In [3]:
import pandas as pd
import numpy as np
import hvplot.pandas
import yfinance as yf
import datetime

In [4]:
# Define the stock symbol and create a Yahoo Finance ticker object
symbol = "XLRE"
ticker = yf.Ticker(symbol)

# Get historical stock data for the last 5 years
historical_data = ticker.history(period="5y")

# Print the first few rows of the historical data
print(historical_data.head())

                                Open       High        Low      Close  \
Date                                                                    
2018-09-14 00:00:00-04:00  28.532464  28.549362  28.198726  28.371933   
2018-09-17 00:00:00-04:00  28.304332  28.574702  28.300107  28.532457   
2018-09-18 00:00:00-04:00  28.490217  28.549360  28.304336  28.363480   
2018-09-19 00:00:00-04:00  28.371935  28.380383  28.046645  28.093115   
2018-09-20 00:00:00-04:00  28.101556  28.321233  27.991721  28.304335   

                            Volume  Dividends  Stock Splits  Capital Gains  
Date                                                                        
2018-09-14 00:00:00-04:00  1828300        0.0           0.0            0.0  
2018-09-17 00:00:00-04:00  2338800        0.0           0.0            0.0  
2018-09-18 00:00:00-04:00  1451800        0.0           0.0            0.0  
2018-09-19 00:00:00-04:00  2033100        0.0           0.0            0.0  
2018-09-20 00:00:00-04:00 

In [5]:
# Calculate daily returns",
historical_data['Daily_Return'] = historical_data['Close'].pct_change()
#Print the first few rows of the data with daily returns
print(historical_data.head())

                                Open       High        Low      Close  \
Date                                                                    
2018-09-14 00:00:00-04:00  28.532464  28.549362  28.198726  28.371933   
2018-09-17 00:00:00-04:00  28.304332  28.574702  28.300107  28.532457   
2018-09-18 00:00:00-04:00  28.490217  28.549360  28.304336  28.363480   
2018-09-19 00:00:00-04:00  28.371935  28.380383  28.046645  28.093115   
2018-09-20 00:00:00-04:00  28.101556  28.321233  27.991721  28.304335   

                            Volume  Dividends  Stock Splits  Capital Gains  \
Date                                                                         
2018-09-14 00:00:00-04:00  1828300        0.0           0.0            0.0   
2018-09-17 00:00:00-04:00  2338800        0.0           0.0            0.0   
2018-09-18 00:00:00-04:00  1451800        0.0           0.0            0.0   
2018-09-19 00:00:00-04:00  2033100        0.0           0.0            0.0   
2018-09-20 00:00:00-

In [6]:
# Convert to DataFrame and Reset Index
df = pd.DataFrame(historical_data)
df = df.reset_index()
df = df.drop(columns = ["Open", "High", "Low", "Volume", "Dividends", "Stock Splits", "Capital Gains"])
df.head()

Unnamed: 0,Date,Close,Daily_Return
0,2018-09-14 00:00:00-04:00,28.371933,
1,2018-09-17 00:00:00-04:00,28.532457,0.005658
2,2018-09-18 00:00:00-04:00,28.36348,-0.005922
3,2018-09-19 00:00:00-04:00,28.093115,-0.009532
4,2018-09-20 00:00:00-04:00,28.304335,0.007519


In [7]:
# Calculate the total return over the period
total_return = (1 + df['Daily_Return']).prod() - 1

# Determine the number of years in the investment horizon
years = (df['Date'].max() - df['Date'].min()).days / 365

# Calculate annualized return using the formula: ((1 + Total Return)^(1 / Years) - 1)
annualized_return = (pow(1 + total_return, 1 / years) - 1) * 100 

print(years)
print(annualized_return)

5.0
5.034440686948161


In [8]:
# Calculate Cumulative Return and add as a Column
df['Cumulative_Return'] = (1 + df['Daily_Return']).cumprod() - 1

df.head()

Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return
0,2018-09-14 00:00:00-04:00,28.371933,,
1,2018-09-17 00:00:00-04:00,28.532457,0.005658,0.005658
2,2018-09-18 00:00:00-04:00,28.36348,-0.005922,-0.000298
3,2018-09-19 00:00:00-04:00,28.093115,-0.009532,-0.009827
4,2018-09-20 00:00:00-04:00,28.304335,0.007519,-0.002383


In [10]:
# Calculate the average daily return
average_daily_return = df['Daily_Return'].mean()

# Assume a daily risk free rate
daily_risk_free_rate = 0.001

# Calculate the Sharpe ratio
sharpe_ratio = (average_daily_return - daily_risk_free_rate) / daily_volatility

print(average_daily_return)
print(sharpe_ratio)

0.00031595512521888065
-0.04427605465624885


In [11]:
# Calculate downside risk (standard deviation of negative returns)
negative_returns = df[df['Daily_Return'] < 0]['Daily_Return']
downside_risk = negative_returns.std()

# Calculate the Sortino ratio
sortino_ratio = (average_daily_return - daily_risk_free_rate) / downside_risk

print(sortino_ratio)

-0.05470618982052337


In [13]:
df = df.set_index(["Date"])

df['SMA_Short'] = df['Close'].rolling(window=49).mean()
df['SMA_Long'] = df['Close'].rolling(window=99).mean()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-14 00:00:00-04:00,28.371933,,,,
2018-09-17 00:00:00-04:00,28.532457,0.005658,0.005658,,
2018-09-18 00:00:00-04:00,28.36348,-0.005922,-0.000298,,
2018-09-19 00:00:00-04:00,28.093115,-0.009532,-0.009827,,
2018-09-20 00:00:00-04:00,28.304335,0.007519,-0.002383,,


In [14]:
# Create a binary target variable: 1 for buy signal, -1 for sell signal

df['Signal'] = np.where(df['SMA_Short'] > df['SMA_Long'], 1, -1)
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-09-14 00:00:00-04:00,28.371933,,,,,-1
2018-09-17 00:00:00-04:00,28.532457,0.005658,0.005658,,,-1
2018-09-18 00:00:00-04:00,28.36348,-0.005922,-0.000298,,,-1
2018-09-19 00:00:00-04:00,28.093115,-0.009532,-0.009827,,,-1
2018-09-20 00:00:00-04:00,28.304335,0.007519,-0.002383,,,-1


In [15]:
# Drop NaN values and shift target variable by one day
df['Signal'] = df['Signal'].shift(-1)
df = df.dropna()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-02-06 00:00:00-05:00,29.612774,-0.007815,0.043735,28.063521,27.851706,1.0
2019-02-07 00:00:00-05:00,29.863298,0.00846,0.052565,28.097253,27.866771,1.0
2019-02-08 00:00:00-05:00,29.863298,0.0,0.052565,28.127849,27.880213,1.0
2019-02-11 00:00:00-05:00,29.949682,0.002893,0.05561,28.155503,27.896236,1.0
2019-02-12 00:00:00-05:00,29.742353,-0.006923,0.048302,28.178229,27.912895,1.0


In [16]:
# Prepare data for machine learning
X = df.drop(columns = (["Close", "Daily_Return", "Cumulative_Return", "Signal"]))
y = df["Signal"]
display(X)
display(y)

Unnamed: 0_level_0,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-06 00:00:00-05:00,28.063521,27.851706
2019-02-07 00:00:00-05:00,28.097253,27.866771
2019-02-08 00:00:00-05:00,28.127849,27.880213
2019-02-11 00:00:00-05:00,28.155503,27.896236
2019-02-12 00:00:00-05:00,28.178229,27.912895
...,...,...
2023-09-06 00:00:00-04:00,37.535918,37.014836
2023-09-07 00:00:00-04:00,37.529796,37.012679
2023-09-08 00:00:00-04:00,37.511837,37.008699
2023-09-11 00:00:00-04:00,37.490408,37.002720


Date
2019-02-06 00:00:00-05:00    1.0
2019-02-07 00:00:00-05:00    1.0
2019-02-08 00:00:00-05:00    1.0
2019-02-11 00:00:00-05:00    1.0
2019-02-12 00:00:00-05:00    1.0
                            ... 
2023-09-06 00:00:00-04:00    1.0
2023-09-07 00:00:00-04:00    1.0
2023-09-08 00:00:00-04:00    1.0
2023-09-11 00:00:00-04:00    1.0
2023-09-12 00:00:00-04:00    1.0
Name: Signal, Length: 1158, dtype: float64

In [31]:
# Imports
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset

In [32]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2019-02-06 00:00:00-05:00


In [33]:
# Select the ending period for the training data with an offset of "" months
training_end = X.index.min() + DateOffset(months=15)

# Display the training end date
print(training_end)

2020-05-06 00:00:00-04:00


In [34]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Display sample data
X_train.head()

Unnamed: 0_level_0,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-06 00:00:00-05:00,28.063521,27.851706
2019-02-07 00:00:00-05:00,28.097253,27.866771
2019-02-08 00:00:00-05:00,28.127849,27.880213
2019-02-11 00:00:00-05:00,28.155503,27.896236
2019-02-12 00:00:00-05:00,28.178229,27.912895


In [17]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [18]:
# Scale the data and transform
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Import LogisticRegression from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [21]:
# Create an instance of the LogisticRegression model
logistic_regression_model = LogisticRegression()

In [22]:
# Fit the LogisticRegression model
logistic_regression_model.fit(X_train_scaled, y_train)

In [23]:
# Use the trained LogisticRegression model to predict the trading signals for the training data
lr_training_signal_predictions = logistic_regression_model.predict(X_train_scaled)

# Display the predictions
lr_training_signal_predictions

array([ 1., -1., -1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1., -1.,
        1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1., -1.,  1.,  1., -1.,
       -1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,
       -1.,  1., -1.,  1.,  1.,  1., -1., -1.,  1., -1., -1.,  1.,  1.,
        1., -1.,  1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1., -1.,  1., -1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1., -1.,  1.,  1.,
        1.,  1.,  1., -1.,  1., -1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1., -1., -1., -1., -1.,
        1.,  1.,  1., -1.,  1.,  1., -1., -1.,  1.,  1.,  1.,  1

In [24]:
# Generate a classification report using the training data and the logistic regression model's predications
lr_training_report = classification_report(y_train, lr_training_signal_predictions)

# Review the classification report
print(lr_training_report)

              precision    recall  f1-score   support

        -1.0       1.00      0.80      0.88       284
         1.0       0.92      1.00      0.96       642

    accuracy                           0.94       926
   macro avg       0.96      0.90      0.92       926
weighted avg       0.94      0.94      0.93       926



In [25]:
# Use the trained model to predict the trading signals for the testing data.
lr_testing_signal_predictions = logistic_regression_model.predict(X_test_scaled)

In [26]:
# Generate a classification report using the testing data and the logistic regression model's predictions
lr_testing_report = classification_report(y_test, lr_testing_signal_predictions)

# Review the testing classification report
print(lr_testing_report)

              precision    recall  f1-score   support

        -1.0       1.00      0.80      0.89        74
         1.0       0.91      1.00      0.95       158

    accuracy                           0.94       232
   macro avg       0.96      0.90      0.92       232
weighted avg       0.94      0.94      0.93       232



In [39]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df["Signal"] = lr_testing_signal_predictions
predictions_df['Daily Returns'] = df['Daily Returns']
predictions_df["Trading Algorithm Returns"] = predictions_df['Daily Returns'] * predictions_df["Signal"]
predictions_df.tail()

KeyError: 'Daily Returns'

In [38]:
#Calcualte the cumulative returns for the actual returns and trading algorithm returns
cumulative_df = pd.DataFrame(index=X_test.index)
cumulative_df['Daily Cumulative Returns'] = (1 + predictions_df[['Daily Returns']]).cumprod()
cumulative_df['Algo Cumulative Returns'] = (1 + predictions_df[['Trading Algorithm Returns']]).cumprod()

#Review the cumulative returns dataframe
cumulative_df.tail()

KeyError: "None of [Index(['Daily Returns'], dtype='object')] are in the [columns]"