In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt

In [2]:
ticker = yf.Ticker('AMZN')
data = ticker.history('60mo')
data.head()
data['Daily Returns'] = data['Close'].pct_change()

YFRateLimitError: Too Many Requests. Rate limited. Try after a while.

In [None]:
data['Close'].plot(title = 'AMZN closing costs')
plt.xlabel('date')
plt.ylabel('closing cost')
plt.show()

In [None]:
data["MA50"] = data['Close'].rolling(window = 50).mean()
data["MA200"] = data['Close'].rolling(window = 200).mean()
data["Rolling Average Trend"] = data['Close'] > data["MA200"]

Some interesing instights from the rolling average. When you see the short term rolling average is lower than the long term rolling average, could indicate a potential 'buy', and if the short term is way higher than the long term rolling average, could indicate a sell.

In [None]:
data[['Close','MA50','MA200']].plot(title = "AMZN Closing price comapred to 50 and 200 day rolling averages")
plt.xlabel('date')
plt.ylabel('price')
plt.show

# Rule-Based Momentum and Volatility Factor Model

Using this line to try and compare the velocity of price changes compared to the actual price levels themselves.

In [None]:
data['10day'] = (data['Close'] / data['Close'].shift(10)) - 1
data['60day'] = (data['Close'] / data['Close'].shift(60)) - 1
data['180day'] = (data['Close'] / data['Close'].shift(180)) - 1

Calculating the volatilities for each one of the time lookback periods(10 days, 60 days, 180 days)

In [None]:
data['10 Day Volatility'] = data['Daily Returns'].rolling(window = 10).std()
data['60 Day Volatility'] = data['Daily Returns'].rolling(window = 60).std()
data['180 Day Volatility'] = data['Daily Returns'].rolling(window = 180).std()

Using Momentum, Volatility, and the Rolling Average to determine my signal. If the stock isn't too volatile (under .02), the momentum is greater than .05, and the current close price is greater than the rolling average, then buy. If the opposite is true and the volatility is greater than .02 and the momentum is less than .05, then hold. Otherwise, sell.

In [None]:
data['10 Day Signal'] = np.where((data['Rolling Average Trend']) & (data['10 Day Volatility'] < .02) & (data['10day'] > .05), 1, np.where((data['10 Day Volatility'] < .02) & (data['10day'] < -.05), -1, 0))
data['60 Day Signal'] = np.where((data['Rolling Average Trend']) & (data['60 Day Volatility'] < .02) & (data['60day'] > .05), 1, np.where((data['60 Day Volatility'] < .02) & (data['60day'] < -.05), -1, 0))
data['180 Day Signal'] = np.where((data['Rolling Average Trend']) & (data['180 Day Volatility'] < .02) & (data['180day'] > .05), 1, np.where((data['180 Day Volatility'] < .02) & (data['180day'] < -.05), -1, 0))

data['10 Day position'] = data['10 Day Signal'].shift(1)
data['60 Day position'] = data['60 Day Signal'].shift(1)
data['180 Day position'] = data['180 Day Signal'].shift(1)

data['10 Day Momentum Daily Returns'] = data['10 Day position'] * data['Daily Returns']
data['60 Day Momentum Daily Returns'] = data['60 Day position'] * data['Daily Returns']
data['180 Day Momentum Daily Returns'] = data['180 Day position'] * data['Daily Returns']

data['10 Day Momentum Cumulative Returns'] = (1 + data['10 Day Momentum Daily Returns']).cumprod()
data['60 Day Momentum Cumulative Returns'] = (1 + data['60 Day Momentum Daily Returns']).cumprod()
data['180 Day Momentum Cumulative Returns'] = (1 + data['180 Day Momentum Daily Returns']).cumprod()

data[['10 Day Momentum Cumulative Returns', '60 Day Momentum Cumulative Returns', '180 Day Momentum Cumulative Returns']].plot(title = '10, 60, and 180 day momentum returns')
plt.xlabel('date')
plt.ylabel('returns')
plt.show

Saw that the cumulative returns on the 180 day Momentum was the highest, so comparing that to a Buy and Hold for AAPL shares

In [None]:
data['Momentum Cumulative Returns'] = (1 + data['180 Day Momentum Daily Returns']).cumprod()
data['Buy and Hold Cumulative Returns'] = (1 + data['Daily Returns']).cumprod()

Evidently, a simple rule-based factor model isn't going to outperform buying and holding the stock itself. The momentum cumulative returns went up around 22% while the stock itself went up over 60%

In [None]:
data[['Momentum Cumulative Returns', 'Buy and Hold Cumulative Returns']].plot(title = 'Momentum vs Buy and Hold Returns')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.show

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.callbacks import EarlyStopping

# Creating a LSTM model to predict what direction the stock will move the next day

In [None]:
sequence_length = 50
features = ['10day', '60day', '180day', '10 Day Volatility', '60 Day Volatility', '180 Day Volatility', 'Rolling Average Trend', 'Daily Returns']
data['Target'] = np.where(data['Close'].shift(-1) > data['Close'], 1,0) 
data.dropna(inplace=True)

In [None]:
# Storing the input sequences and labels in arrays for the model
def create_sequence(sequence_length, data):
    x, y = [], []
    for i in range(len(data) - sequence_length):
        y.append(data['Target'].iloc[i + sequence_length])
        x.append(data[features].iloc[i:i + sequence_length].values)
    return np.array(x), np.array(y)
x_all, y_all = create_sequence(sequence_length, data)

In [None]:
data['Actual Return'] = data['Close'].pct_change().shift(-1)

In [None]:
# Splitting into 5 validations across the five years in data
tscv = TimeSeriesSplit(n_splits=5)
validation_accuracy = []
all_y_probs = []
all_actual_returns = []

for fold, (train_index, val_index) in enumerate(tscv.split(x_all)):
    scaler = MinMaxScaler()
    # Combining samples and features into one so it's 2D for the MinMaxScaler
    x_train_2d = x_all[train_index].reshape(-1, x_all.shape[-1])
    x_val_2d = x_all[val_index].reshape(-1, x_all.shape[-1])

    # Fitting scale only on training data, transform both
    x_train = scaler.fit_transform(x_train_2d).reshape(-1, sequence_length, x_all.shape[-1])
    x_val = scaler.transform(x_val_2d).reshape(-1, sequence_length, x_all.shape[-1])

    y_train, y_val = y_all[train_index], y_all[val_index]
    model = Sequential([
        Input(shape=(sequence_length, len(features))),
        LSTM(64, return_sequences=True),
        Dropout(0.2),
        LSTM(32),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])
    # If the model is doing worse on validation data, stop training
    early_stop = EarlyStopping(monitor='val_loss', patience = 5, restore_best_weights = True, verbose = 0)

    print(f"\nFold {fold+1} training...")
    model.fit(x_train, y_train, 
          validation_data = (x_val, y_val),
          epochs=60,
          batch_size = 32,
          callbacks=[early_stop],
          verbose = 1)
    loss, acc = model.evaluate(x_val, y_val, verbose = 0)
    print(f"Fold {fold+1} val_accuracy: {acc:.4f}")
    validation_accuracy.append(acc)

    #Save all the predictions and actual returns for backtesting
    y_probs = model.predict(x_val).flatten()
    all_y_probs.extend(y_probs)
    actual_returns = data['Actual Return'].iloc[val_index].values
    all_actual_returns.extend(actual_returns)


### Have a section in the medium article why I did early stopping, make some graph about the average epoch value with and without early stop 

## Backtesting current model state and seeing outcome

In [None]:
signals = []
for p in all_y_probs:
    if p > .515:
        signals.append(1) # buy
    elif p < .485:
        signals.append(-1) # sell
    else:
        signals.append(0) # nothing

signals = np.array(signals)
actual_returns = np.array(all_actual_returns)
strategy_returns = signals * actual_returns

cumulative_returns = np.cumprod(1 + strategy_returns) - 1

sharpe = np.mean(strategy_returns) / np.std(strategy_returns) * np.sqrt(252)
win_rate = np.mean(strategy_returns > 0)
exposure = np.mean(signals != 0)

print(f"Sharpe Ratio: {sharpe:.2f}")
print(f"Win Rate: {win_rate:.2%}")
print(f"Market Exposure: {exposure:.2%}")

In [None]:
print("Max daily return:", np.max(strategy_returns))
print("Min daily return:", np.min(strategy_returns))


In [None]:
import matplotlib.pyplot as plt

plt.hist(all_y_probs, bins=50)
plt.title("Histogram of Model Predicted Probabilities")
plt.xlabel("Predicted Probability")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()


In [None]:
# Seeing how confident my model is in terms of predictions
print("Total predictions collected:", len(all_y_probs))
print("Min predicted prob:", np.min(all_y_probs))
print("Max predicted prob:", np.max(all_y_probs))


In [None]:
import seaborn as sns
import pandas as pd

df_bt = pd.DataFrame({
    "Strategy Return": strategy_returns,
    "Signal": signals,
    "Probability": all_y_probs,
    "Actual Return": actual_returns
})

# Only show executed trades
executed_trades = df_bt[df_bt["Signal"] != 0]
sns.histplot(executed_trades["Strategy Return"], kde=True)
plt.title("Distribution of Strategy Returns on Trades")
plt.show()