# LSTM Prediction on Microsoft Stock

This notebook trains an LSTM model on Microsoft stock data first **without** technical indicators and then **with** two indicators: Schaff Trend Cycle (STC) and Chande Momentum Oscillator (CMO).
Metrics reported are accuracy, recall and F1 score.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

## Load Microsoft stock data

In [None]:
df = pd.read_csv('MSFT_1986_2025-06-30.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date').reset_index(drop=True)
df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

## Train LSTM without technical indicators

In [None]:
basic_features = ['Close','High','Low','Open','Volume']
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[basic_features])
data = pd.DataFrame(scaled, columns=basic_features)
data['Target'] = df['Target']
data = data.dropna().reset_index(drop=True)
seq_len = 20
X, y = [], []
for i in range(len(data) - seq_len):
    X.append(data[basic_features].values[i:i+seq_len])
    y.append(data['Target'].iloc[i+seq_len])
X, y = np.array(X), np.array(y)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
model_basic = Sequential([
    LSTM(50, input_shape=(seq_len, len(basic_features))),
    Dense(1, activation='sigmoid')
])
model_basic.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_basic.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
pred_basic = (model_basic.predict(X_test) > 0.5).astype(int).flatten()
acc_basic = accuracy_score(y_test, pred_basic)
recall_basic = recall_score(y_test, pred_basic)
f1_basic = f1_score(y_test, pred_basic)
print('Without indicators - Acc:', acc_basic, 'Recall:', recall_basic, 'F1:', f1_basic)

## Calculate STC and CMO indicators

In [None]:
def schaff_trend_cycle(close, fast_period=23, slow_period=50, cycle_period=10):
    ema_fast = close.ewm(span=fast_period, adjust=False).mean()
    ema_slow = close.ewm(span=slow_period, adjust=False).mean()
    macd = ema_fast - ema_slow
    lowest_macd = macd.rolling(window=cycle_period).min()
    highest_macd = macd.rolling(window=cycle_period).max()
    stoch_macd = 100 * (macd - lowest_macd) / (highest_macd - lowest_macd)
    stoch_macd_smoothed1 = stoch_macd.ewm(span=3, adjust=False).mean()
    stoch_macd_smoothed2 = stoch_macd_smoothed1.ewm(span=3, adjust=False).mean()
    return stoch_macd_smoothed2

def chande_momentum_oscillator(close, window=14):
    delta = close.diff()
    gains = delta.where(delta > 0, 0)
    losses = -delta.where(delta < 0, 0)
    sum_gains = gains.rolling(window=window).sum()
    sum_losses = losses.rolling(window=window).sum()
    cmo = 100 * (sum_gains - sum_losses) / (sum_gains + sum_losses)
    return cmo

df['STC'] = schaff_trend_cycle(df['Close'])
df['CMO'] = chande_momentum_oscillator(df['Close'])
df_ind = df.dropna().reset_index(drop=True)

## Train LSTM with STC and CMO indicators

In [None]:
feat_ind = ['Close','High','Low','Open','Volume','STC','CMO']
scaler_ind = MinMaxScaler()
scaled_ind = scaler_ind.fit_transform(df_ind[feat_ind])
data_ind = pd.DataFrame(scaled_ind, columns=feat_ind)
data_ind['Target'] = df_ind['Target'].values
seq_len = 20
X2, y2 = [], []
for i in range(len(data_ind) - seq_len):
    X2.append(data_ind[feat_ind].values[i:i+seq_len])
    y2.append(data_ind['Target'].iloc[i+seq_len])
X2, y2 = np.array(X2), np.array(y2)
split2 = int(0.8 * len(X2))
X2_train, X2_test = X2[:split2], X2[split2:]
y2_train, y2_test = y2[:split2], y2[split2:]
model_ind = Sequential([
    LSTM(50, input_shape=(seq_len, len(feat_ind))),
    Dense(1, activation='sigmoid')
])
model_ind.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_ind.fit(X2_train, y2_train, epochs=5, batch_size=32, verbose=0)
pred_ind = (model_ind.predict(X2_test) > 0.5).astype(int).flatten()
acc_ind = accuracy_score(y2_test, pred_ind)
recall_ind = recall_score(y2_test, pred_ind)
f1_ind = f1_score(y2_test, pred_ind)
print('With indicators - Acc:', acc_ind, 'Recall:', recall_ind, 'F1:', f1_ind)

## Compare results

In [None]:
print('Without indicators - Acc:', acc_basic, 'Recall:', recall_basic, 'F1:', f1_basic)
print('With indicators - Acc:', acc_ind, 'Recall:', recall_ind, 'F1:', f1_ind)
if acc_ind > acc_basic:
    print('LSTM with indicators performed better.')
else:
    print('LSTM without indicators performed better.')