In [5]:
import pandas as pd
import numpy as np
import hvplot.pandas
import yfinance as yf
import datetime

In [6]:
# Define the stock symbol and create a Yahoo Finance ticker object
symbol = "XLRE"
ticker = yf.Ticker(symbol)

# Get historical stock data for the last 5 years
historical_data = ticker.history(period="5y")

# Print the first few rows of the historical data
print(historical_data.head())

                                Open       High        Low      Close  \
Date                                                                    
2018-09-13 00:00:00-04:00  28.549358  28.692991  28.481764  28.616949   
2018-09-14 00:00:00-04:00  28.532460  28.549359  28.198722  28.371929   
2018-09-17 00:00:00-04:00  28.304332  28.574702  28.300107  28.532457   
2018-09-18 00:00:00-04:00  28.490211  28.549354  28.304331  28.363474   
2018-09-19 00:00:00-04:00  28.371933  28.380381  28.046643  28.093113   

                            Volume  Dividends  Stock Splits  Capital Gains  
Date                                                                        
2018-09-13 00:00:00-04:00  1576500        0.0           0.0            0.0  
2018-09-14 00:00:00-04:00  1828300        0.0           0.0            0.0  
2018-09-17 00:00:00-04:00  2338800        0.0           0.0            0.0  
2018-09-18 00:00:00-04:00  1451800        0.0           0.0            0.0  
2018-09-19 00:00:00-04:00 

In [7]:
# Calculate daily returns
historical_data['Daily_Return'] = historical_data['Close'].pct_change()

# Print the first few rows of the data with daily returns
print(historical_data.head())

                                Open       High        Low      Close  \
Date                                                                    
2018-09-13 00:00:00-04:00  28.549358  28.692991  28.481764  28.616949   
2018-09-14 00:00:00-04:00  28.532460  28.549359  28.198722  28.371929   
2018-09-17 00:00:00-04:00  28.304332  28.574702  28.300107  28.532457   
2018-09-18 00:00:00-04:00  28.490211  28.549354  28.304331  28.363474   
2018-09-19 00:00:00-04:00  28.371933  28.380381  28.046643  28.093113   

                            Volume  Dividends  Stock Splits  Capital Gains  \
Date                                                                         
2018-09-13 00:00:00-04:00  1576500        0.0           0.0            0.0   
2018-09-14 00:00:00-04:00  1828300        0.0           0.0            0.0   
2018-09-17 00:00:00-04:00  2338800        0.0           0.0            0.0   
2018-09-18 00:00:00-04:00  1451800        0.0           0.0            0.0   
2018-09-19 00:00:00-

In [16]:
# Convert to DataFrame and Reset Index
df = pd.DataFrame(historical_data)
df = df.reset_index()
df = df.drop(columns = ["Open", "High", "Low", "Volume", "Dividends", "Stock Splits", "Capital Gains"])
df.head()

Unnamed: 0,Date,Close,Daily_Return
0,2018-09-13 00:00:00-04:00,28.616949,
1,2018-09-14 00:00:00-04:00,28.371929,-0.008562
2,2018-09-17 00:00:00-04:00,28.532457,0.005658
3,2018-09-18 00:00:00-04:00,28.363474,-0.005922
4,2018-09-19 00:00:00-04:00,28.093113,-0.009532


In [17]:
# Calculate the total return over the period
total_return = (1 + df['Daily_Return']).prod() - 1

# Determine the number of years in the investment horizon
years = (df['Date'].max() - df['Date'].min()).days / 365

# Calculate annualized return using the formula: ((1 + Total Return)^(1 / Years) - 1)
annualized_return = (pow(1 + total_return, 1 / years) - 1) * 100 

print(years)
print(annualized_return)

5.0
5.06702261197407


In [18]:
# Calculate Cumulative Return and add as a Column
df['Cumulative_Return'] = (1 + df['Daily_Return']).cumprod() - 1

df.head()

Unnamed: 0,Date,Close,Daily_Return,Cumulative_Return
0,2018-09-13 00:00:00-04:00,28.616949,,
1,2018-09-14 00:00:00-04:00,28.371929,-0.008562,-0.008562
2,2018-09-17 00:00:00-04:00,28.532457,0.005658,-0.002953
3,2018-09-18 00:00:00-04:00,28.363474,-0.005922,-0.008858
4,2018-09-19 00:00:00-04:00,28.093113,-0.009532,-0.018305


In [19]:
# Calculate the standard deviation of daily returns
daily_volatility = df['Daily_Return'].std()

# Annualize the volatility
annual_volatility = daily_volatility * np.sqrt(252)

print(daily_volatility)
print(annual_volatility)

0.015448776500619354
0.24524172408514497


In [20]:
# Calculate the average daily return
average_daily_return = df['Daily_Return'].mean()

# Assume a daily risk free rate
daily_risk_free_rate = 0.001

# Calculate the Sharpe ratio
sharpe_ratio = (average_daily_return - daily_risk_free_rate) / daily_volatility

print(average_daily_return)
print(sharpe_ratio)

0.0003171782349838185
-0.04419908366133763


In [21]:
# Calculate downside risk (standard deviation of negative returns)
negative_returns = df[df['Daily_Return'] < 0]['Daily_Return']
downside_risk = negative_returns.std()

# Calculate the Sortino ratio
sortino_ratio = (average_daily_return - daily_risk_free_rate) / downside_risk

print(sortino_ratio)

-0.054606822256568155


In [22]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [23]:
df = df.set_index(["Date"])

df['SMA_Short'] = df['Close'].rolling(window=49).mean()
df['SMA_Long'] = df['Close'].rolling(window=99).mean()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-13 00:00:00-04:00,28.616949,,,,
2018-09-14 00:00:00-04:00,28.371929,-0.008562,-0.008562,,
2018-09-17 00:00:00-04:00,28.532457,0.005658,-0.002953,,
2018-09-18 00:00:00-04:00,28.363474,-0.005922,-0.008858,,
2018-09-19 00:00:00-04:00,28.093113,-0.009532,-0.018305,,


In [24]:
# Create a binary target variable: 1 for buy signal, -1 for sell signal

df['Signal'] = np.where(df['SMA_Short'] > df['SMA_Long'], 1, -1)
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-09-13 00:00:00-04:00,28.616949,,,,,-1
2018-09-14 00:00:00-04:00,28.371929,-0.008562,-0.008562,,,-1
2018-09-17 00:00:00-04:00,28.532457,0.005658,-0.002953,,,-1
2018-09-18 00:00:00-04:00,28.363474,-0.005922,-0.008858,,,-1
2018-09-19 00:00:00-04:00,28.093113,-0.009532,-0.018305,,,-1


In [25]:
# Drop NaN values and shift target variable by one day
df['Signal'] = df['Signal'].shift(-1)
df = df.dropna()
df.head()

Unnamed: 0_level_0,Close,Daily_Return,Cumulative_Return,SMA_Short,SMA_Long,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-02-05 00:00:00-05:00,29.846022,0.006702,0.042949,28.033681,27.841647,1.0
2019-02-06 00:00:00-05:00,29.612782,-0.007815,0.034799,28.063521,27.851706,1.0
2019-02-07 00:00:00-05:00,29.863293,0.00846,0.043553,28.097253,27.86677,1.0
2019-02-08 00:00:00-05:00,29.863293,0.0,0.043553,28.127848,27.880213,1.0
2019-02-11 00:00:00-05:00,29.949682,0.002893,0.046571,28.155502,27.896235,1.0


In [26]:
# Prepare data for machine learning
X = df.drop(columns = (["Close", "Daily_Return", "Cumulative_Return", "Signal"]))
y = df["Signal"]
display(X)
display(y)

Unnamed: 0_level_0,SMA_Short,SMA_Long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-02-05 00:00:00-05:00,28.033681,27.841647
2019-02-06 00:00:00-05:00,28.063521,27.851706
2019-02-07 00:00:00-05:00,28.097253,27.866770
2019-02-08 00:00:00-05:00,28.127848,27.880213
2019-02-11 00:00:00-05:00,28.155502,27.896235
...,...,...
2023-09-05 00:00:00-04:00,37.546122,37.011718
2023-09-06 00:00:00-04:00,37.535918,37.014836
2023-09-07 00:00:00-04:00,37.529796,37.012679
2023-09-08 00:00:00-04:00,37.511837,37.008699


Date
2019-02-05 00:00:00-05:00    1.0
2019-02-06 00:00:00-05:00    1.0
2019-02-07 00:00:00-05:00    1.0
2019-02-08 00:00:00-05:00    1.0
2019-02-11 00:00:00-05:00    1.0
                            ... 
2023-09-05 00:00:00-04:00    1.0
2023-09-06 00:00:00-04:00    1.0
2023-09-07 00:00:00-04:00    1.0
2023-09-08 00:00:00-04:00    1.0
2023-09-11 00:00:00-04:00    1.0
Name: Signal, Length: 1158, dtype: float64

In [27]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [28]:
# Scale the data and transform
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [29]:
# Build and train a machine learning model
model = LogisticRegression(random_state = 1)
model.fit(X_train_scaled, y_train)

In [30]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
testing_report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(testing_report)



Accuracy: 0.3146551724137931
              precision    recall  f1-score   support

        -1.0       0.31      1.00      0.48        73
         1.0       0.00      0.00      0.00       159

    accuracy                           0.31       232
   macro avg       0.16      0.50      0.24       232
weighted avg       0.10      0.31      0.15       232



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
# Export the DataFrame to a CSV file
csv_file_path = 'Real_Estate_XLRE.csv'
df.to_csv(csv_file_path, index=False)