### data retrieving:

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

In [2]:
# Function to get historical data for a ticker
start = '2023-01-01'
end = '2024-01-01'  # This can be None if you want data up to the current date

def get_ticker_data(tickerSymbol, start, end=None, interval='1d'):
    tickerData = yf.Ticker(tickerSymbol)
    # Fetch historical data within the specified date range
    df = tickerData.history(start=start, end=end, interval=interval, auto_adjust=False)
    #df['RSI'] = calculate_rsi(df)
    df['Date'] = pd.to_datetime(df.index)
    df['Month'] = df['Date'].dt.month
    df['Year'] = df['Date'].dt.year
    #df['EOM'] = df['Date'] == df.groupby(['Year', 'Month'])['Date'].transform('max')
    # No change to the data processing code that follows

    return df

### data reading:

In [3]:
import os
import re

directory = 'data'

data = dict()

for file in os.listdir(directory):
    symbol_pattern = re.match(r'([^_]+)_', file)
    symbol = symbol_pattern.group(1)
    data[symbol] = pd.read_csv(os.path.join(directory, file))
    data[symbol].rename(columns={'Date': 'date', 'Open':'open', 'High':'high', 'Low':'low', 'Close':'close', 'Volume':'volume'}, inplace=True)

In [4]:
data

{'VRTX':                            date        open        high         low  \
 0     2004-01-02 00:00:00-05:00   10.350000   10.750000   10.230000   
 1     2004-01-05 00:00:00-05:00   10.550000   10.680000   10.100000   
 2     2004-01-06 00:00:00-05:00   10.090000   10.360000   10.040000   
 3     2004-01-07 00:00:00-05:00   10.130000   10.430000    9.900000   
 4     2004-01-08 00:00:00-05:00   10.400000   10.560000   10.290000   
 ...                         ...         ...         ...         ...   
 5028  2023-12-22 00:00:00-05:00  403.589996  406.910004  402.350006   
 5029  2023-12-26 00:00:00-05:00  406.970001  408.519989  404.480011   
 5030  2023-12-27 00:00:00-05:00  408.649994  409.000000  405.589996   
 5031  2023-12-28 00:00:00-05:00  411.260010  412.570007  409.000000   
 5032  2023-12-29 00:00:00-05:00  409.000000  409.619995  406.070007   
 
            close  volume  
 0      10.500000  544900  
 1      10.120000  910300  
 2      10.080000  618700  
 3      10.400

### import the model:

In [5]:
import pickle

# Load the pickled model from file
with open('model_c.pkl', 'rb') as file:
    model_c = pickle.load(file)

with open('model_h.pkl', 'rb') as file:
    model_h = pickle.load(file)

with open('model_l.pkl', 'rb') as file:
    model_l = pickle.load(file)

2024-04-16 07:39:18.951094: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-16 07:39:19.026924: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-16 07:39:19.027004: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-16 07:39:19.029804: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-16 07:39:19.043321: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


### prepare data for prediction:

In [9]:
import numpy as np

# Create input sequences and targets
def create_sequences(features, target, seq_length):
    X_seq = []
    y_seq = []
    for i in range(len(features) - seq_length):
        X_seq.append(features[i:i+seq_length])  # Input sequence
        y_seq.append(target[i+seq_length]) # Target value (next data point)
    return np.array(X_seq), np.array(y_seq)

In [10]:
best_features_c = ('close', 'ma_9', 'sma_9', 'macd', 'signal', 'hist', 'adx', 'atr', 'sar', 'tema', 'roc')
best_features_h = ('high', 'ma_9', 'sma_9', 'macd', 'signal', 'hist', 'adx', 'atr', 'sar', 'tema', 'roc')
best_features_l = ('low', 'ma_9', 'sma_9', 'macd', 'signal', 'hist', 'adx', 'atr', 'sar', 'tema', 'roc')

In [8]:
for symbol in data:
    from talib import RSI
    data[symbol]['rsi_14'] = RSI(data[symbol]['close'], timeperiod=14)
    data[symbol]['rsi_14'] = data[symbol]['rsi_14'].shift(1)
    
    from talib import MA, SMA, EMA, WMA
    data[symbol]['ma_9'] = MA(data[symbol]['close'], timeperiod=9)
    data[symbol]['ma_9'] = data[symbol]['ma_9'].shift(1)
    data[symbol]['sma_9'] = SMA(data[symbol]['close'], timeperiod=9)
    data[symbol]['sma_9'] = data[symbol]['sma_9'].shift(1)
    data[symbol]['wma_9'] = WMA(data[symbol]['close'], timeperiod=9)
    data[symbol]['wma_9'] = data[symbol]['wma_9'].shift(1)
    
    from talib import MACD
    data[symbol]['macd'], data[symbol]['signal'], data[symbol]['hist'] = MACD(data[symbol]['close'])
    data[symbol]['macd'] = data[symbol]['macd'].shift(1)
    data[symbol]['signal'] = data[symbol]['signal'].shift(1)
    data[symbol]['hist'] = data[symbol]['hist'].shift(1)
    
    from talib import ADX
    data[symbol]['adx'] = ADX(data[symbol]['high'], data[symbol]['low'], data[symbol]['close'])
    data[symbol]['adx'] = data[symbol]['adx'].shift(1)
    
    from talib import ATR
    data[symbol]['atr'] = ATR(high=data[symbol]['high'], low=data[symbol]['low'], close=data[symbol]['close'], timeperiod=14)
    data[symbol]['atr'] = data[symbol]['atr'].shift(1)
    
    from talib import SAR
    data[symbol]['sar'] = SAR(high=data[symbol]['high'], low=data[symbol]['low'], acceleration=0.02, maximum=0.2)
    data[symbol]['sar'] = data[symbol]['sar'].shift(1)
    
    from talib import TEMA
    data[symbol]['tema'] = TEMA(data[symbol]['close'], timeperiod=14)
    data[symbol]['tema'] = data[symbol]['tema'].shift(1)
    
    from talib import ROC
    data[symbol]['roc'] = ROC(data[symbol]['close'], timeperiod=14)
    data[symbol]['roc'] = data[symbol]['roc'].shift(1)
    
    data[symbol].dropna(axis=0, inplace=True)

In [None]:
for symbol in data:
    data_backup_c_scaled = scaler_h.transform(data_backup[list(best_features_h)])
    target_backup_h_scaled = scaler_target_h.transform(data_backup[['high']])
    X_b_h, y_b_h = create_sequences(data_backup_h_scaled, target_backup_h_scaled, seq_length)
    X_b_h = X_b_h.reshape(X_b_h.shape[0], seq_length, num_features_h)
    
    data_backup_h_scaled = scaler_h.transform(data_backup[list(best_features_h)])
    target_backup_h_scaled = scaler_target_h.transform(data_backup[['high']])
    X_b_h, y_b_h = create_sequences(data_backup_h_scaled, target_backup_h_scaled, seq_length)
    X_b_h = X_b_h.reshape(X_b_h.shape[0], seq_length, num_features_h)
    
    data_backup_l_scaled = scaler_l.transform(data_backup[list(best_features_l)])
    target_backup_l_scaled = scaler_target_l.transform(data_backup[['low']])
    X_b_l, y_b_l = create_sequences(data_backup_l_scaled, target_backup_l_scaled, seq_length)
    X_b_l = X_b_l.reshape(X_b_l.shape[0], seq_length, num_features_l)

In [6]:
data

{'VRTX':                            date        open        high         low  \
 0     2004-01-02 00:00:00-05:00   10.350000   10.750000   10.230000   
 1     2004-01-05 00:00:00-05:00   10.550000   10.680000   10.100000   
 2     2004-01-06 00:00:00-05:00   10.090000   10.360000   10.040000   
 3     2004-01-07 00:00:00-05:00   10.130000   10.430000    9.900000   
 4     2004-01-08 00:00:00-05:00   10.400000   10.560000   10.290000   
 ...                         ...         ...         ...         ...   
 5028  2023-12-22 00:00:00-05:00  403.589996  406.910004  402.350006   
 5029  2023-12-26 00:00:00-05:00  406.970001  408.519989  404.480011   
 5030  2023-12-27 00:00:00-05:00  408.649994  409.000000  405.589996   
 5031  2023-12-28 00:00:00-05:00  411.260010  412.570007  409.000000   
 5032  2023-12-29 00:00:00-05:00  409.000000  409.619995  406.070007   
 
            close  volume  
 0      10.500000  544900  
 1      10.120000  910300  
 2      10.080000  618700  
 3      10.400