In [None]:
!pip install yfinance

In [None]:
import yfinance as yf

numerical_df = yf.download('AMZN', "2015-10-01", "2020-12-31")

In [None]:
import yfinance as yf
import pandas as pd

def calculate_rsi(data, period=14):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)

    avg_gain = gain.rolling(window=period, min_periods=period).mean()[:period+1]
    avg_loss = loss.rolling(window=period, min_periods=period).mean()[:period+1]
    
    for i in range(period+1, len(data)):
        avg_gain = pd.concat(
            [avg_gain, 
             pd.Series([(avg_gain.iloc[-1] * (period - 1) + gain.iloc[i]) / period], index=[gain.index[i]])
            ]
        )
        avg_loss = pd.concat(
            [avg_loss,
                pd.Series([(avg_loss.iloc[-1] * (period - 1) + loss.iloc[i]) / period], index=[loss.index[i]])
            ]
        )

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

n_days = 50
# sp500_tickers = sorted(sp500_company_tickers_in_kaggle_df.split(" "))

# for comp in sp500_tickers:
numerical_df['RSI'] = calculate_rsi(numerical_df['Close'], 50)
numerical_df['EMA'] = numerical_df['Close'].ewm(span=n_days, adjust=False).mean()
numerical_df['SMA'] = numerical_df['Close'].rolling(window=14).mean()
temp_12 = numerical_df['Close'].ewm(span=12, adjust=False).mean()
temp_26 = numerical_df['Close'].ewm(span=26, adjust=False).mean()
numerical_df['MACD'] = temp_12 - temp_26

In [None]:
numerical_df =  numerical_df[numerical_df.index >= pd.Timestamp(2016, 1, 1)]

In [None]:
numerical_df = numerical_df.drop(columns='Adj Close')

In [None]:
X, y = numerical_df.drop(columns=['Close']), numerical_df.Close.values
X.shape, y.shape

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
mm = MinMaxScaler()
ss = StandardScaler()

X_trans = ss.fit_transform(X)
y_trans = mm.fit_transform(y.reshape(-1, 1))

In [None]:
y_trans

In [None]:
!pip install tensorflow

In [None]:
!pip install keras

In [None]:

import keras
from keras import layers
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from ta.trend import SMAIndicator
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

In [None]:

# Step 1: Fetch historical stock data
def fetch_stock_data(symbol, start_date, end_date):
    stock_data = yf.download(symbol, start=start_date, end=end_date)
    return stock_data

def calculate_rsi(data, period=14):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)

    avg_gain = gain.rolling(window=period, min_periods=period).mean()
    avg_loss = loss.rolling(window=period, min_periods=period).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_ema(data, span=50):
    return data.ewm(span=span, adjust=False).mean()

def calculate_sma(data, window=14):
    return data.rolling(window=window).mean()

def calculate_macd(data, span_short=12, span_long=26):
    ema_short = calculate_ema(data, span=span_short)
    ema_long = calculate_ema(data, span=span_long)
    return ema_short - ema_long

    

def evaluate_model(model, X_test, y_test):
    loss = model.evaluate(X_test, y_test, verbose=1)
    return loss


def compute_technical_indicators(data):
    # Compute RSI
    data['RSI'] = calculate_rsi(data['Close'], period=14)

    # Compute EMA
    data['EMA'] = calculate_ema(data['Close'], span=50)

    # Compute SMA
    data['SMA'] = calculate_sma(data['Close'], window=14)

    # Compute MACD
    data['MACD'] = calculate_macd(data['Close'], span_short=12, span_long=26)

    # Handle missing values by filling with the mean of each column
    data_filled = data.fillna(data.mean())

    return data_filled

# Step 3: Prepare data
def prepare_data(data, n_context_days = 5):
#     scaler = MinMaxScaler(feature_range=(0, 1))
#     scaled_data = scaler.fit_transform(data[['Close', 'RSI', 'EMA', 'SMA', 'MACD']])
    scaled_data = data[['Close', 'RSI', 'EMA', 'SMA', 'MACD']].values
    X, y = [], []
    for i in range(n_context_days, len(data)):
        X.append(scaled_data[i-n_context_days:i])
        y.append(scaled_data[i, 0])  # Closing price
    X, y = np.array(X), np.array(y)
    return X, y, scaler


def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
#     inputs = tf.expand_dims(inputs, axis=1)
#     print(inputs.shape)
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)

    return x + res


def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    
#     n_timesteps, n_features, n_outputs = 5, 1, 5
    inputs = keras.Input(shape=(input_shape))
    
#     print("input_shape",inputs.shape)
    
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_last")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(1, activation="linear")(x)
    return keras.Model(inputs, outputs)

def train_model(x_train, y_train, x_test, y_test):
    input_shape = x_train.shape[1:]

    model = build_model(
        input_shape,
        head_size=256,
        num_heads=4,
        ff_dim=3,
        num_transformer_blocks=4,
        mlp_units=[128],
        mlp_dropout=0.4,
        dropout=0.25,
    )

    model.compile(
        loss="mean_squared_error",
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        metrics=["mean_squared_error", "mean_squared_error", "mape"],
    )
    model.summary()

    callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

    model.fit(
        x_train,
        y_train,
#         validation_data=(x_test, y_test),
        validation_split=0.2,
        epochs=25,
        batch_size=128,
        callbacks=callbacks,
    )
    return model


# Step 7: Predict next day's closing price
def predict_next_day_price(model, last_data_point, scaler):
    last_data_point = last_data_point.reshape((1, last_data_point.shape[0], last_data_point.shape[1]))
    predicted_scaled_price = model.predict(last_data_point)
    predicted_price = scaler.inverse_transform([[predicted_scaled_price[0][0], 0, 0, 0, 0]])[0][0]
    return predicted_price



# Fetch data
symbol = 'AAPL'  # Example symbol
start_date = '2015-01-01'
end_date = '2022-01-01'
data = fetch_stock_data(symbol, start_date, end_date)

# Compute technical indicators
data_with_technical_indicators = compute_technical_indicators(data)

# Check for missing values after computing technical indicators
if data_with_technical_indicators.isnull().values.any():
    print("There are missing values after computing technical indicators. Please handle them appropriately.")
    exit()

# Prepare data
X, y, scaler = prepare_data(data_with_technical_indicators)

# X = X.reshape(X.shape[0], 1, X.shape[-1])
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Train model
model = train_model(X_train, y_train, X_test, y_test)

# Evaluate model
# loss = evaluate_model(model, X_test, y_test)
print("Test Loss:", loss)  # Test loss: Represents the average loss (error) between the predicted values and the actual values. Lower values indicate better performance.
# Predict next day's closing price
last_data_point = X_test[-1]
next_day_price = predict_next_day_price(model, last_data_point, scaler)
print("Predicted Next Day's Closing Price:", next_day_price)


# Calculate additional evaluation metrics
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Mean Absolute Error (MAE):", mae)  # Mean Absolute Error (MAE): Average magnitude of the errors in the predictions. Lower values indicate better performance.
print("Mean Squared Error (MSE):", mse)  # Mean Squared Error (MSE): Average of the squared differences between the predicted values and the actual values. Lower values indicate better performance.
print("Root Mean Squared Error (RMSE):", rmse)  # Root Mean Squared Error (RMSE): Standard deviation of the residuals (prediction errors). Lower values indicate better performance.

# Visualize model predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Stock Prices')
plt.plot(y_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()





In [None]:
tf.keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=False,
    show_dtype=False,
    show_layer_names=False,
    rankdir='TB',
    # rankdir='LR',
    expand_nested=False,
    dpi=75,
    show_layer_activations=False,
    show_trainable=False,
    # **kwargs
)

In [None]:
# above exp uses all open, close, rma, ema features

In [None]:
y_train_pred = model.predict(X_train)

In [None]:
mae = mean_absolute_error(y_train, y_train_pred)
mse = mean_squared_error(y_train, y_train_pred)
rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
mae, mse, rmse

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(y_train, label='Actual Stock Prices')
plt.plot(y_train_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()

In [None]:

# Compute technical indicators
data_with_technical_indicators = compute_technical_indicators(data)

# Check for missing values after computing technical indicators
if data_with_technical_indicators.isnull().values.any():
    print("There are missing values after computing technical indicators. Please handle them appropriately.")
    exit()

# Prepare data
X, y, scaler = prepare_data(data_with_technical_indicators)

X = X.reshape(X.shape[0], 1, X.shape[-1])
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Train model
model = train_model(X_train, y_train, X_test, y_test)

# Evaluate model
# loss = evaluate_model(model, X_test, y_test)
print("Test Loss:", loss)  # Test loss: Represents the average loss (error) between the predicted values and the actual values. Lower values indicate better performance.
# Predict next day's closing price
last_data_point = X_test[-1]
next_day_price = predict_next_day_price(model, last_data_point, scaler)
print("Predicted Next Day's Closing Price:", next_day_price)


# Calculate additional evaluation metrics
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Mean Absolute Error (MAE):", mae)  # Mean Absolute Error (MAE): Average magnitude of the errors in the predictions. Lower values indicate better performance.
print("Mean Squared Error (MSE):", mse)  # Mean Squared Error (MSE): Average of the squared differences between the predicted values and the actual values. Lower values indicate better performance.
print("Root Mean Squared Error (RMSE):", rmse)  # Root Mean Squared Error (RMSE): Standard deviation of the residuals (prediction errors). Lower values indicate better performance.

# Visualize model predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Stock Prices')
plt.plot(y_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
 




In [None]:
# above exp - using only close feature, output only next day price

In [None]:
y_train_pred = model.predict(X_train)

In [None]:
mae = mean_absolute_error(y_train, y_train_pred)
mse = mean_squared_error(y_train, y_train_pred)
rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
mae, mse, rmse

In [None]:
X.shape

In [None]:
# ! pip install ta

import keras
from keras import layers
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from ta.trend import SMAIndicator
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt


# Step 1: Fetch historical stock data
def fetch_stock_data(symbol, start_date, end_date):
    stock_data = yf.download(symbol, start=start_date, end=end_date)
    return stock_data

def calculate_rsi(data, period=14):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)

    avg_gain = gain.rolling(window=period, min_periods=period).mean()
    avg_loss = loss.rolling(window=period, min_periods=period).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_ema(data, span=50):
    return data.ewm(span=span, adjust=False).mean()

def calculate_sma(data, window=14):
    return data.rolling(window=window).mean()

def calculate_macd(data, span_short=12, span_long=26):
    ema_short = calculate_ema(data, span=span_short)
    ema_long = calculate_ema(data, span=span_long)
    return ema_short - ema_long

    

def evaluate_model(model, X_test, y_test):
    loss = model.evaluate(X_test, y_test, verbose=1)
    return loss


def compute_technical_indicators(data):
    # Compute RSI
    data['RSI'] = calculate_rsi(data['Close'], period=14)

    # Compute EMA
    data['EMA'] = calculate_ema(data['Close'], span=50)

    # Compute SMA
    data['SMA'] = calculate_sma(data['Close'], window=14)

    # Compute MACD
    data['MACD'] = calculate_macd(data['Close'], span_short=12, span_long=26)

    # Handle missing values by filling with the mean of each column
    data_filled = data.fillna(data.mean())

    return data_filled

# Step 3: Prepare data
def prepare_data(data, n_context_days = 5):
#     scaler = MinMaxScaler(feature_range=(0, 1))
#     scaled_data = scaler.fit_transform(data[['Close', 'RSI', 'EMA', 'SMA', 'MACD']])
    scaled_data = data[['Close', 'RSI', 'EMA', 'SMA', 'MACD']].values
    X, y = [], []
    for i in range(n_context_days, len(data)-5):
        X.append(scaled_data[i-n_context_days:i, 0])
        y.append(scaled_data[i:i+5, 0])  # Closing price
    X, y = np.array(X), np.array(y)
    return X, y, scaler


def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    
#     n_timesteps, n_features, n_outputs = 5, 1, 5
    inputs = keras.Input(shape=(input_shape))
    
#     print("input_shape",inputs.shape)
    
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_last")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(5, activation="linear")(x)
    return keras.Model(inputs, outputs)



# Compute technical indicators
data_with_technical_indicators = compute_technical_indicators(data)

# Check for missing values after computing technical indicators
if data_with_technical_indicators.isnull().values.any():
    print("There are missing values after computing technical indicators. Please handle them appropriately.")
    exit()

# Prepare data
X, y, scaler = prepare_data(data_with_technical_indicators)

X = X.reshape(X.shape[0], 1, X.shape[-1])
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Train model
model = train_model(X_train, y_train, X_test, y_test)

# Evaluate model
# loss = evaluate_model(model, X_test, y_test)
print("Test Loss:", loss)  # Test loss: Represents the average loss (error) between the predicted values and the actual values. Lower values indicate better performance.
# Predict next day's closing price
last_data_point = X_test[-1]
next_day_price = predict_next_day_price(model, last_data_point, scaler)
print("Predicted Next Day's Closing Price:", next_day_price)


# Calculate additional evaluation metrics
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Mean Absolute Error (MAE):", mae)  # Mean Absolute Error (MAE): Average magnitude of the errors in the predictions. Lower values indicate better performance.
print("Mean Squared Error (MSE):", mse)  # Mean Squared Error (MSE): Average of the squared differences between the predicted values and the actual values. Lower values indicate better performance.
print("Root Mean Squared Error (RMSE):", rmse)  # Root Mean Squared Error (RMSE): Standard deviation of the residuals (prediction errors). Lower values indicate better performance.

# Visualize model predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Stock Prices')
plt.plot(y_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()




In [None]:
plt.figure(figsize=(10, 6))
plt.plot(y_test[:,0], label='Actual Stock Prices')
plt.plot(y_pred[:,0], label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()

In [None]:
# input features only close prices ; output next 5 days price

In [None]:
y_test[:,0].shape

In [None]:
X.shape

In [None]:
y_pred

In [None]:
# data_with_technical_indicators

In [None]:
X_train.shape[1:]

In [None]:
X_train.shape

In [None]:
data_with_technical_indicators.shape

In [None]:
# ! pip install ta


# ! pip install ta

import keras
from keras import layers
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from ta.trend import SMAIndicator
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt


# Step 1: Fetch historical stock data
def fetch_stock_data(symbol, start_date, end_date):
    stock_data = yf.download(symbol, start=start_date, end=end_date)
    return stock_data

def calculate_rsi(data, period=14):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)

    avg_gain = gain.rolling(window=period, min_periods=period).mean()
    avg_loss = loss.rolling(window=period, min_periods=period).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_ema(data, span=50):
    return data.ewm(span=span, adjust=False).mean()

def calculate_sma(data, window=14):
    return data.rolling(window=window).mean()

def calculate_macd(data, span_short=12, span_long=26):
    ema_short = calculate_ema(data, span=span_short)
    ema_long = calculate_ema(data, span=span_long)
    return ema_short - ema_long

    

def evaluate_model(model, X_test, y_test):
    loss = model.evaluate(X_test, y_test, verbose=1)
    return loss


def compute_technical_indicators(data):
    # Compute RSI
    data['RSI'] = calculate_rsi(data['Close'], period=14)

    # Compute EMA
    data['EMA'] = calculate_ema(data['Close'], span=50)

    # Compute SMA
    data['SMA'] = calculate_sma(data['Close'], window=14)

    # Compute MACD
    data['MACD'] = calculate_macd(data['Close'], span_short=12, span_long=26)

    # Handle missing values by filling with the mean of each column
    data_filled = data.fillna(data.mean())

    return data_filled

# Step 3: Prepare data
def prepare_data(data, n_context_days = 5):
#     scaler = MinMaxScaler(feature_range=(0, 1))
#     scaled_data = scaler.fit_transform(data[['Close', 'RSI', 'EMA', 'SMA', 'MACD']])
    scaled_data = data[['Close', 'RSI', 'EMA', 'SMA', 'MACD']].values
    X, y = [], []
    for i in range(n_context_days, len(data)):
        X.append(scaled_data[i-n_context_days:i, 0])
        y.append(scaled_data[i, 0])  # Closing price
    X, y = np.array(X), np.array(y)
    return X, y, scaler




def build_model(input_shape):
    
#     model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features)))
#   model.add(Dense(50, activation='relu'))
#   model.add(Dense(n_outputs)
            
    model = Sequential([
        LSTM(units=200, activation='relu', input_shape=input_shape),
#         Dropout(0.2),
        Dense(units=50, activation='relu'),
#         Dropout(0.2),
        Dense(units=1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=["mae", "mape"])
    return model



def train_model(x_train, y_train):
    print("check x_train", x_train.shape[1:])
    input_shape = x_train.shape[1:]

    model = build_model(input_shape)
    
    model.summary()

    callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

    model.fit(
        x_train,
        y_train,
        validation_split=0.2,
        epochs=25,
        batch_size=128,
        callbacks=callbacks,
    )
    return model




# Step 7: Predict next day's closing price
def predict_next_day_price(model, last_data_point, scaler):
    last_data_point = last_data_point.reshape((1, last_data_point.shape[0], last_data_point.shape[1]))
    predicted_scaled_price = model.predict(last_data_point)
    predicted_price = scaler.inverse_transform([[predicted_scaled_price[0][0], 0, 0, 0, 0]])[0][0]
    return predicted_price


# Compute technical indicators
data_with_technical_indicators = compute_technical_indicators(data)

# Check for missing values after computing technical indicators
if data_with_technical_indicators.isnull().values.any():
    print("There are missing values after computing technical indicators. Please handle them appropriately.")
    exit()

# Prepare data
X, y, scaler = prepare_data(data_with_technical_indicators)

X = X.reshape(X.shape[0], 1, X.shape[-1])
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Train model
model = train_model(X_train, y_train)

# Evaluate model
# loss = evaluate_model(model, X_test, y_test)
print("Test Loss:", loss)  # Test loss: Represents the average loss (error) between the predicted values and the actual values. Lower values indicate better performance.
# Predict next day's closing price
last_data_point = X_test[-1]
next_day_price = predict_next_day_price(model, last_data_point, scaler)
print("Predicted Next Day's Closing Price:", next_day_price)


# Calculate additional evaluation metrics
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Mean Absolute Error (MAE):", mae)  # Mean Absolute Error (MAE): Average magnitude of the errors in the predictions. Lower values indicate better performance.
print("Mean Squared Error (MSE):", mse)  # Mean Squared Error (MSE): Average of the squared differences between the predicted values and the actual values. Lower values indicate better performance.
print("Root Mean Squared Error (RMSE):", rmse)  # Root Mean Squared Error (RMSE): Standard deviation of the residuals (prediction errors). Lower values indicate better performance.

# Visualize model predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Stock Prices')
plt.plot(y_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
 


In [None]:
# News

In [None]:
!unzip stocktrend_prediction/ticker_data.zip

In [None]:
# load embeddings
import lzma
import pickle

with lzma.open('ticker_data/embeddings/AAPL.xz') as rf:
    data = pickle.load(rf)


In [None]:
embedding_df = pd.DataFrame.from_dict(data, orient='index')
embedding_df.index = pd.to_datetime(embedding_df.index)

In [None]:
# load sentiment

import lzma
import pickle

with lzma.open('ticker_data/sentiments/AAPL.xz') as rf:
    sentiment_data = pickle.load(rf)


In [None]:
sentiment_df = pd.DataFrame.from_dict(sentiment_data, orient='index')
sentiment_df.index = pd.to_datetime(sentiment_df.index)

In [None]:
sentiment_df

In [None]:
data_with_technical_indicators.shape

In [None]:
data_w_sentiment = data_with_technical_indicators.join(sentiment_df)

In [None]:
decay_factor = 0.9 

def apply_decay(df):
    
    series = df['score']
    mask = series.isna()
    # Calculate the distance since the last non-NaN value
    distance = mask.groupby((mask != mask.shift()).cumsum()).cumcount() + 1
    # Apply decay factor to the filled values
    decayed_values = series.ffill() * (decay_factor ** distance)
    
    df['decayed_score'] = decayed_values
    
    return np.where(df['score'].isna(), df['decayed_score'], df['score'])
    


data_w_sentiment['decayed_score'] = apply_decay(data_w_sentiment)


data_w_sentiment

In [None]:
data_w_sentiment['label'] = data_w_sentiment['label'].ffill()

In [None]:
data_w_sentiment['decayed_score'] = np.where(
    data_w_sentiment['label']=='neutral', 0, data_w_sentiment['decayed_score']
)

In [None]:
data_w_sentiment['decayed_score'] = np.where(
    data_w_sentiment['label']=='negative', -data_w_sentiment['decayed_score'], data_w_sentiment['decayed_score']
)

In [None]:
data_w_sentiment = data_w_sentiment.drop(columns=['score', 'label']).dropna()

In [None]:
data_w_news

In [None]:

# Compute technical indicators
data_with_technical_indicators = compute_technical_indicators(data)
data_w_news = data_with_technical_indicators.join(embedding_df)
data_w_news = data_w_news.fillna(0)

# Check for missing values after computing technical indicators
if data_with_technical_indicators.isnull().values.any():
    print("There are missing values after computing technical indicators. Please handle them appropriately.")
    exit()

# Prepare data
X, y, scaler = prepare_data(data_w_news)

X = X.reshape(X.shape[0], 1, X.shape[-1])
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Train model
model = train_model(X_train, y_train, X_test, y_test)

# Evaluate model
# loss = evaluate_model(model, X_test, y_test)
print("Test Loss:", loss)  # Test loss: Represents the average loss (error) between the predicted values and the actual values. Lower values indicate better performance.
# Predict next day's closing price
last_data_point = X_test[-1]
next_day_price = predict_next_day_price(model, last_data_point, scaler)
print("Predicted Next Day's Closing Price:", next_day_price)


# Calculate additional evaluation metrics
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Mean Absolute Error (MAE):", mae)  # Mean Absolute Error (MAE): Average magnitude of the errors in the predictions. Lower values indicate better performance.
print("Mean Squared Error (MSE):", mse)  # Mean Squared Error (MSE): Average of the squared differences between the predicted values and the actual values. Lower values indicate better performance.
print("Root Mean Squared Error (RMSE):", rmse)  # Root Mean Squared Error (RMSE): Standard deviation of the residuals (prediction errors). Lower values indicate better performance.

# Visualize model predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Stock Prices')
plt.plot(y_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
 



In [None]:
# above exp - financial + news data ; close price output is only next day price

In [None]:

# Compute technical indicators
data_with_technical_indicators = compute_technical_indicators(data)
data_w_news = data_with_technical_indicators.join(embedding_df)
data_w_news = data_w_news.fillna(0)

# Check for missing values after computing technical indicators
if data_with_technical_indicators.isnull().values.any():
    print("There are missing values after computing technical indicators. Please handle them appropriately.")
    exit()

# Prepare data
X, y, scaler = prepare_data(data_w_news)

# X = X.reshape(X.shape[0], 1, X.shape[-1])
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Train model
model = train_model(X_train, y_train, X_test, y_test)

# Evaluate model
# loss = evaluate_model(model, X_test, y_test)
print("Test Loss:", loss)  # Test loss: Represents the average loss (error) between the predicted values and the actual values. Lower values indicate better performance.
# Predict next day's closing price
last_data_point = X_test[-1]
next_day_price = predict_next_day_price(model, last_data_point, scaler)
print("Predicted Next Day's Closing Price:", next_day_price)


# Calculate additional evaluation metrics
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Mean Absolute Error (MAE):", mae)  # Mean Absolute Error (MAE): Average magnitude of the errors in the predictions. Lower values indicate better performance.
print("Mean Squared Error (MSE):", mse)  # Mean Squared Error (MSE): Average of the squared differences between the predicted values and the actual values. Lower values indicate better performance.
print("Root Mean Squared Error (RMSE):", rmse)  # Root Mean Squared Error (RMSE): Standard deviation of the residuals (prediction errors). Lower values indicate better performance.

# Visualize model predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Stock Prices')
plt.plot(y_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()






In [None]:
# above exp: all finanical + news



In [None]:


# Compute technical indicators
# data_with_technical_indicators = compute_technical_indicators(data)
# data_w_news = data_with_technical_indicators.join(embedding_df)
# data_w_news = data_w_news.fillna(0)

# Check for missing values after computing technical indicators
# if data_with_technical_indicators.isnull().values.any():
#     print("There are missing values after computing technical indicators. Please handle them appropriately.")
#     exit()

# Prepare data
X, y, scaler = prepare_data(data_w_sentiment)

X = X.reshape(X.shape[0], 1, X.shape[-1])
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Train model
model = train_model(X_train, y_train, X_test, y_test)

# Evaluate model
# loss = evaluate_model(model, X_test, y_test)
print("Test Loss:", loss)  # Test loss: Represents the average loss (error) between the predicted values and the actual values. Lower values indicate better performance.
# Predict next day's closing price
last_data_point = X_test[-1]
next_day_price = predict_next_day_price(model, last_data_point, scaler)
print("Predicted Next Day's Closing Price:", next_day_price)


# Calculate additional evaluation metrics
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Mean Absolute Error (MAE):", mae)  # Mean Absolute Error (MAE): Average magnitude of the errors in the predictions. Lower values indicate better performance.
print("Mean Squared Error (MSE):", mse)  # Mean Squared Error (MSE): Average of the squared differences between the predicted values and the actual values. Lower values indicate better performance.
print("Root Mean Squared Error (RMSE):", rmse)  # Root Mean Squared Error (RMSE): Standard deviation of the residuals (prediction errors). Lower values indicate better performance.

# Visualize model predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Stock Prices')
plt.plot(y_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
 




In [None]:
# above exp financial close + sentiment

In [None]:

# Prepare data
X, y, scaler = prepare_data(data_w_sentiment)

# X = X.reshape(X.shape[0], 1, X.shape[-1])
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Train model
model = train_model(X_train, y_train, X_test, y_test)

# Evaluate model
# loss = evaluate_model(model, X_test, y_test)
print("Test Loss:", loss)  # Test loss: Represents the average loss (error) between the predicted values and the actual values. Lower values indicate better performance.
# Predict next day's closing price
last_data_point = X_test[-1]
next_day_price = predict_next_day_price(model, last_data_point, scaler)
print("Predicted Next Day's Closing Price:", next_day_price)


# Calculate additional evaluation metrics
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("Mean Absolute Error (MAE):", mae)  # Mean Absolute Error (MAE): Average magnitude of the errors in the predictions. Lower values indicate better performance.
print("Mean Squared Error (MSE):", mse)  # Mean Squared Error (MSE): Average of the squared differences between the predicted values and the actual values. Lower values indicate better performance.
print("Root Mean Squared Error (RMSE):", rmse)  # Root Mean Squared Error (RMSE): Standard deviation of the residuals (prediction errors). Lower values indicate better performance.

# Visualize model predictions
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual Stock Prices')
plt.plot(y_pred, label='Predicted Stock Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()







In [None]:
# transformers


# financial (close) ; using only close feature, output only next day price
Mean Absolute Error (MAE): 4.334137613123113
Mean Squared Error (MSE): 29.039432252151695
Root Mean Squared Error (RMSE): 5.388824756118137
    
    
    
# financial (close) + news data ; close price output is only next day price
Mean Absolute Error (MAE): 2.743355014107444
Mean Squared Error (MSE): 12.688472780434411
Root Mean Squared Error (RMSE): 3.562088261179727

    
# all financial
Mean Absolute Error (MAE): 16.880988359451294
Mean Squared Error (MSE): 309.14230541645105
Root Mean Squared Error (RMSE): 17.5824431014706
    
    
#  all finanical + news
Mean Absolute Error (MAE): 8.653478340669112
Mean Squared Error (MSE): 101.70793979023455
Root Mean Squared Error (RMSE): 10.085035438224029
    
    
    
#  financial close + sentiment
Mean Absolute Error (MAE): 2.257353359181098
Mean Squared Error (MSE): 8.602969036914159
Root Mean Squared Error (RMSE): 2.9330818326317045

    

#  all financial  + sentiment
Mean Absolute Error (MAE): 5.52126697857027
Mean Squared Error (MSE): 45.17546689611988
Root Mean Squared Error (RMSE): 6.721269738384249