In [82]:
import pandas as pd
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import os
import tensorflow as tf
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [83]:
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)
os.environ['PYTHONHASHSEED'] = '42'

In [84]:
def calculate_rsi(prices, period=14):
    delta = prices.diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    
    avg_gain = gain.rolling(window=period).mean()
    avg_loss = loss.rolling(window=period).mean()
    
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi


# wtf are all these features??
## 'Return' 
### - Percentage change in closing price (pct_change())
## 'Volume'
### - Trading volume from yfinance
## 'ma20'
### - 20-period moving average of closing prices
## 'ma50'
### - 50-period moving average of closing prices
## 'rsi'
### - Relative Strength Index (14-period by default)
## 'macd'
### - Moving Average Convergence Divergence (difference between 12-period and 26-period EMA)
## 'macd_hist'
### - MACD histogram (difference between MACD and its signal line)
## 'momentum_3'
### - 3-period momentum (percentage change over 3 periods)
## 'momentum_5'
### - 5-period momentum (percentage change over 5 periods)
## 'volatility'
### - 20-period standard deviation of closing prices These are the 

In [133]:
df = yf.download('AAPL', start='2018-01-01', end='2024-01-01', interval='1d')
df['rsi'] = calculate_rsi(df['Close'])  
df['ema12'] = df['Close'].ewm(span=12).mean()
df['ema26'] = df['Close'].ewm(span=26).mean()
df['macd'] = df['ema12'] - df['ema26']
df['macd_signal'] = df['macd'].ewm(span=9).mean()
df['macd_hist'] = df['macd'] - df['macd_signal']
df['Return'] = df['Close'].pct_change()
df['ma20'] = df['Close'].rolling(window=20).mean()
df['ma50'] = df['Close'].rolling(window=50).mean()
for period in [3, 5, 10]:
    df[f'momentum_{period}'] = df['Close'].pct_change(periods=period)

df['volatility'] = df['Close'].rolling(window=20).std()

df.dropna(inplace=True)
df.columns = df.columns.droplevel('Ticker')


[*********************100%***********************]  1 of 1 completed


In [134]:
feature_cols = ['Close', 'Return', 'Volume', 'ma20', 'ma50', 'rsi', 
                'macd', 'macd_hist', 'momentum_3', 'momentum_5', 'volatility']
df[feature_cols].head(1)

Price,Close,Return,Volume,ma20,ma50,rsi,macd,macd_hist,momentum_3,momentum_5,volatility
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-03-14,42.103054,-0.008501,117473600,41.479792,40.570716,62.364619,0.629901,0.077077,-0.008556,0.019482,0.843446


In [135]:
df['Direction'] = (df['Close'].shift(-1) > df['Close']).astype(int)
print(f"Class distribution: {df['Direction'].value_counts().to_dict()}")
df.dropna(inplace=True)
df['Direction']

Class distribution: {1: 780, 0: 680}


Date
2018-03-14    1
2018-03-15    0
2018-03-16    0
2018-03-19    0
2018-03-20    0
             ..
2023-12-22    0
2023-12-26    1
2023-12-27    1
2023-12-28    0
2023-12-29    0
Name: Direction, Length: 1460, dtype: int64

In [136]:
train_size = int(len(df) * 0.8)
train_data = df[:train_size]
test_data = df[train_size:]

In [137]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(train_data[feature_cols])
X_test_scaled = scaler.transform(test_data[feature_cols])
y_train = train_data['Direction'].values
y_test = test_data['Direction'].values

In [138]:
def create_sequences(X, y, seq_length):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_length):
        X_seq.append(X[i:i+seq_length])
        y_seq.append(y[i+seq_length])
    return np.array(X_seq), np.array(y_seq)


In [139]:
seq_len = 60
X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train, seq_len)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test, seq_len)


In [140]:
len(X_train_seq), len(X_train_scaled)

(1108, 1168)

# **compute_class_weight**
### Calculates weights for each class (0 and 1 for the Direction variable) inversely proportional to their frequency in the training data
### These weights are then converted to a dictionary format that Keras expects
### During training, the loss function will penalize misclassifications of the minority class more heavily based on these weights

In [141]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', classes=np.unique(y_train_seq), y=y_train_seq)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}


# **First LSTM Layer (64 units)**

### Processes the input sequences (30 time steps of market data)
### return_sequences=True preserves the temporal dimension, outputting a sequence for the next LSTM layer
### LSTM cells internally use sigmoid and tanh activations for their gates

## Dropout (0.3)
### Randomly sets 30% of the outputs to zero during training
### Prevents overfitting by forcing the network to learn redundant representations
# **Second LSTM Layer (32 units)**

### Further processes the sequences from the first LSTM layer
### Captures higher-level temporal patterns
### Outputs a single vector per sequence (no return_sequences)
## Dropout (0.3)

# **Another regularization layer to prevent overfitting**
## Dense Layer (16 units) with ReLU Activation
### Transforms LSTM outputs into a more compact representation
## Why ReLU?
### Helps with vanishing gradient problem (unlike sigmoid/tanh)
### Introduces non-linearity while maintaining computational efficiency
### Creates sparse activations (many neurons can be "off")
### Allows for faster training compared to sigmoid/tanh
## Dropout (0.2)
### Final regularization before the output layer

# **Output Layer (1 unit) with Sigmoid Activation**

## Why Sigmoid?
### Outputs a value between 0 and 1, interpreted as probability
### Perfect for binary classification (market up/down) since:
### Values closer to 0 = higher confidence in downward movement
### Values closer to 1 = higher confidence in upward movement
### Matches the binary_crossentropy loss function


In [142]:
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(seq_len, X_train_seq.shape[2])),
    Dropout(0.3),
    LSTM(32),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

  super().__init__(**kwargs)


In [143]:
model.compile(
    optimizer=Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [144]:
early_stop = EarlyStopping(
    monitor='val_accuracy',
    patience=50,
    restore_best_weights=True
)

In [145]:
history = model.fit(
    X_train_seq, y_train_seq, 
    epochs=200, 
    batch_size=32, 
    validation_split=0.2,
    class_weight=class_weight_dict,  
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/200
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.5310 - loss: 0.6938 - val_accuracy: 0.5180 - val_loss: 0.6930
Epoch 2/200
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5378 - loss: 0.6930 - val_accuracy: 0.5270 - val_loss: 0.6923
Epoch 3/200
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5181 - loss: 0.6938 - val_accuracy: 0.5135 - val_loss: 0.6924
Epoch 4/200
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5094 - loss: 0.6947 - val_accuracy: 0.5180 - val_loss: 0.6925
Epoch 5/200
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5287 - loss: 0.6933 - val_accuracy: 0.4820 - val_loss: 0.6933
Epoch 6/200
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5327 - loss: 0.6910 - val_accuracy: 0.4910 - val_loss: 0.6939
Epoch 7/200
[1m28/28[0m [

In [146]:
y_pred_proba = model.predict(X_test_seq)
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_test_seq, y_pred_proba)
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]
print(f"Optimal threshold: {optimal_threshold:.4f}")

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step
Optimal threshold: 0.5499


In [150]:
y_pred = (y_pred_proba > optimal_threshold).astype(int)
y_pred

array([[1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
    

In [151]:
accuracy = accuracy_score(y_test_seq, y_pred)
print(f"Directional Accuracy: {accuracy:.4f}")

Directional Accuracy: 0.4914


In [152]:
print("\nClassification Report:")
print(classification_report(y_test_seq, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.46      0.82      0.59       105
           1       0.60      0.22      0.32       127

    accuracy                           0.49       232
   macro avg       0.53      0.52      0.46       232
weighted avg       0.54      0.49      0.44       232



# *tf is the confusion matrix??*
## [tn, fp]
## [fp, tp]
### TN = True Negatives (correctly predicted "down" movements)
### FP = False Positives (incorrectly predicted "up" when actually "down")
### FN = False Negatives (incorrectly predicted "down" when actually "up")
### TP = True Positives (correctly predicted "up" movements)

In [153]:
cm = confusion_matrix(y_test_seq, y_pred)
print("\nConfusion Matrix:")
print(cm)


Confusion Matrix:
[[86 19]
 [99 28]]


In [154]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')


<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1edd8b25e50>

In [155]:
plt.figure(figsize=(12, 6))
plt.plot(y_test_seq, 'b-', label='Actual Direction')
plt.plot(y_pred, 'r-', label='Predicted Direction')
plt.title('LSTM Model: Predicted vs Actual Price Direction')
plt.xlabel('Time')
plt.ylabel('Direction (1=Up, 0=Down)')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

In [156]:
import backtrader as bt
import datetime

In [157]:


class LSTMStrategy(bt.Strategy):
    """
    Simple strategy: if LSTM prediction is 1, go long; if 0, go flat (close).
    """
    params = (
        ('predictions', None),  # array-like with 0/1 predictions
    )

    def __init__(self):
        self.index = 0

    def next(self):
        # Stop if no remaining predictions
        if self.index >= len(self.params.predictions):
            return

        current_pred = self.params.predictions[self.index]

        if current_pred == 1 and not self.position:
            self.buy()
        elif current_pred == 0 and self.position:
            self.close()

        self.index += 1

def run_backtest_bt(test_data_df, predictions, seq_len):

    # Slice out the portion aligned with predictions:
    df_for_bt = test_data_df[['Open','High','Low','Close','Volume']].iloc[seq_len:].copy()
    df_for_bt.sort_index(inplace=True)  # ensure ascending by date/time

    # Convert to Backtrader's PandasData feed
    data_feed = bt.feeds.PandasData(dataname=df_for_bt)

    # Setup Cerebro
    cerebro = bt.Cerebro()
    cerebro.adddata(data_feed)
    cerebro.addstrategy(LSTMStrategy, predictions=predictions)
    cerebro.broker.setcash(100000.0)  # Starting capital

    cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe')
    cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='returns')
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='trade_analysis')
    cerebro.addanalyzer(bt.analyzers.PyFolio, _name='PyFolio')
    results = cerebro.run()
    strategy = results[0]

    # Print performance metrics
    sharpe = strategy.analyzers.sharpe.get_analysis()
    returns = strategy.analyzers.returns.get_analysis()
    drawdown = strategy.analyzers.drawdown.get_analysis()
    trades = strategy.analyzers.trade_analysis.get_analysis()

    print("\n=== Backtest Results ===")
    print(f"Sharpe Ratio: {sharpe.get('sharperatio', 'N/A')}")
    print(f"Annual Returns: {returns}")
    print(f"Max Drawdown: {drawdown.get('max', 'N/A')}%")
    print(f"Total Trades: {trades.get('total', 'N/A')}")
    print(f"Winning Trades: {trades.get('won', {}).get('total', 0)}")
    print(f"Losing Trades: {trades.get('lost', {}).get('total', 0)}")
    print(f"Final Portfolio Value: {cerebro.broker.getvalue()}")

    img_path = "backtrader_plot.png"

    # Plot results with buy/sell markers
    fig = cerebro.plot(iplot=True, show=False)[0][0]


In [158]:
run_backtest_bt(test_data, y_pred, seq_len)


=== Backtest Results ===
Sharpe Ratio: None
Annual Returns: OrderedDict({2023: 7.9397598829134e-05})
Max Drawdown: AutoOrderedDict({'len': 227, 'drawdown': 0.019675439678605713, 'moneydown': 19.677734698387212})%
Total Trades: AutoOrderedDict({'total': 4, 'open': 0, 'closed': 4})
Winning Trades: 2
Losing Trades: 2
Final Portfolio Value: 100007.93975988292


<IPython.core.display.Javascript object>

In [159]:
y_pred

array([[1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
    