In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

def preprocess_data(file_path_real_time, file_path_historical):
    # Load the real-time data
    df_real_time = pd.read_csv(file_path_real_time, parse_dates=['Datetime'])
    df_real_time.set_index('Datetime', inplace=True)
    df_real_time.sort_index(inplace=True)

    # Load the historical data
    df_historical = pd.read_csv(file_path_historical, parse_dates=['Date'])
    df_historical.rename(columns={'Date': 'Datetime'}, inplace=True)
    df_historical.set_index('Datetime', inplace=True)
    df_historical.sort_index(inplace=True)

    # Combine historical and real-time data
    df = pd.concat([df_historical, df_real_time])

    # Select features and target
    features = df[['Open', 'High', 'Low', 'Close', 'Volume']]
    target = df['Close']

    # Scale features
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_features = scaler.fit_transform(features)

    # Create sequences
    sequence_length = 60
    X, y = [], []
    for i in range(sequence_length, len(scaled_features)):
        X.append(scaled_features[i-sequence_length:i])
        y.append(scaled_features[i, 3])  # 'Close' price

    X, y = np.array(X), np.array(y)
    return X, y, scaler

def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))  # Predicting the 'Close' price
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def main():
    file_path_real_time = '../2.Data Cleaning/cleaned_real_time.csv'
    file_path_historical = '../2.Data Cleaning/cleaned_history.csv'

    X, y, scaler = preprocess_data(file_path_real_time, file_path_historical)
    
    # Split data into training and testing sets
    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    model = build_lstm_model((X_train.shape[1], X_train.shape[2]))

    checkpoint = ModelCheckpoint('stock_model.keras', save_best_only=True, monitor='val_loss', mode='min')
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[checkpoint])

    # Evaluate the model
    loss = model.evaluate(X_test, y_test)
    print(f'Test Loss: {loss}')

if __name__ == "__main__":
    main()


  super().__init__(**kwargs)


Epoch 1/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 93ms/step - loss: 0.1092 - val_loss: 7.7080e-05
Epoch 2/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 65ms/step - loss: 0.0063 - val_loss: 1.9390e-05
Epoch 3/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 66ms/step - loss: 0.0046 - val_loss: 1.4913e-04
Epoch 4/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 67ms/step - loss: 0.0043 - val_loss: 4.0786e-04
Epoch 5/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 57ms/step - loss: 0.0042 - val_loss: 2.7262e-05
Epoch 6/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 58ms/step - loss: 0.0038 - val_loss: 1.5731e-05
Epoch 7/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 66ms/step - loss: 0.0037 - val_loss: 1.5843e-05
Epoch 8/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 61ms/step - loss: 0.0036 - val_loss: 1.6004e-05
Epoch 9/50
[1m

In [6]:
# file_path_real_time = '../2.Data Cleaning/cleaned_real_time.csv'
#     file_path_historical = '../2.Data Cleaning/cleaned_history.csv'


In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model

def preprocess_data_for_prediction(file_path_real_time, file_path_historical):
    # Load the real-time data
    df_real_time = pd.read_csv(file_path_real_time, parse_dates=['Datetime'])
    df_real_time.set_index('Datetime', inplace=True)
    df_real_time.sort_index(inplace=True)

    # Load the historical data
    df_historical = pd.read_csv(file_path_historical, parse_dates=['Date'])
    df_historical.rename(columns={'Date': 'Datetime'}, inplace=True)
    df_historical.set_index('Datetime', inplace=True)
    df_historical.sort_index(inplace=True)

    # Combine historical and real-time data
    df = pd.concat([df_historical, df_real_time])

    # Select features and target
    features = df[['Open', 'High', 'Low', 'Close', 'Volume']]

    # Scale features
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_features = scaler.fit_transform(features)

    # Create the input sequence for prediction
    sequence_length = 60
    X_input = scaled_features[-sequence_length:]  # Last 60 data points

    X_input = np.array([X_input])
    return X_input, scaler, df

def predict_next_day_close(file_path_real_time, file_path_historical, model_path):
    # Preprocess data for prediction
    X_input, scaler, df = preprocess_data_for_prediction(file_path_real_time, file_path_historical)
    
    # Load the trained model
    model = load_model(model_path)
    
    # Make the prediction
    predicted_scaled_close = model.predict(X_input)
    
    # Inverse scale the prediction
    last_sequence = df[['Open', 'High', 'Low', 'Close', 'Volume']].values[-60:]
    
    # Prepare a dummy array with the same shape as the original features
    dummy_scaled_features = np.zeros((1, last_sequence.shape[1]))
    dummy_scaled_features[0, :-1] = last_sequence[-1, :-1]
    dummy_scaled_features[0, -1] = predicted_scaled_close[0, 0]  # Extract scalar value
    
    predicted_close = scaler.inverse_transform(dummy_scaled_features)[:, -1]
    
    return predicted_close[0]

def main():
    file_path_real_time = '../2.Data Cleaning/cleaned_real_time.csv'
    file_path_historical = '../2.Data Cleaning/cleaned_history.csv'
    model_path = 'stock_model.keras'
    
    predicted_close = predict_next_day_close(file_path_real_time, file_path_historical, model_path)
    print(f'Predicted Close Price for Next Day: {predicted_close}')

if __name__ == "__main__":
    main()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Predicted Close Price for Next Day: 325052407.6064222
