In [1]:
import pandas as pd
import numpy as np

class model6_4:

    def __init__(self, *args, ohlc='close', csv='data/^NDX_raw_data.csv'):
        self.ohlc = ohlc
        
        self.period = 60
        self.trend_period = 14
        self.rsi_period = 14
        self.num_features = 1
        self.input_period = 60
        self.output_step = 7
        self.units = 512

        self.components = {'savgol'}
        
        if not args:
            print('Enter model components')
            exit(0)
        else:
            self.model_dict = dict()
            for arg in args:
                assert arg.lower() in self.components
                self.model_dict[arg] = self.create_model(component=arg)

    # Logistic transformation
    @staticmethod
    def logistic_transform(r2):
        return round(1 / (1 + np.exp(-r2)),2)
        
    def report_score(self, outputs):
        from sklearn.metrics import r2_score
        
        pred, y = outputs
        
        try:
            assert pred.shape[2] == self.output_step
            assert y.shape[1] == self.output_step

            _pred = pred.reshape(-1, self.output_step)
            _y = y.reshape(-1, self.output_step)
        
            res = list()
            for i in range(_pred.shape[0]):
                res.append(r2_score(_y[i], _pred[i]))

            res = np.array(res)
        
            return {
                'mean_score': np.mean(res),
                'max_score': res.max(),
                'min_score': res.min(),
                'logistic_score': model6_4.logistic_transform(np.mean(res))
            }
        except:
            print("Array shapes don't match")

    def load_data(self, split=None, csv='data/^NDX_raw_data.csv'):
        _data = pd.read_csv(csv)
        _data.rename(columns={'Date': 'date', 'Open':'open', 'High':'high', 'Low':'low', 'Close':'close', 'Volume':'volume'}, inplace=True)

        if not split:
            return _data, pd.DataFrame()
        else:
            _data_backup = _data.iloc[split:]
            _data = _data.iloc[:split]
    
            return _data, _data_backup

    @staticmethod
    ## Creating sequences
    def create_dataset(dataset, time_step=1, output_step=1):
        dataX, dataY = [], []
        for i in range(len(dataset)-time_step-output_step):
            a = dataset[i:(i+time_step), 0]
            b = dataset[(i+time_step):(i+time_step)+output_step, 0]
            dataX.append(a)
            dataY.append(b)
    
        return np.array(dataX), np.array(dataY)

    def savgol_smoothing(self, data_input):
        from scipy.signal import savgol_filter

        # Apply Savitzky-Golay filter
        window_length = 11  # Window length (must be odd)
        polyorder = 2      # Polynomial order
        
        return np.array([ savgol_filter(data_input[i], window_length, polyorder) for i in range(data_input.shape[0]) ])

    def preprocess_data(self, data):
        data_cropped = list()

        # NO NEED TO CROP
        for _data in data:
            data_cropped.append(_data)
        
        data_cropped = np.array(data_cropped)
        
        from sklearn.preprocessing import MinMaxScaler
        
        scaler_data = list(MinMaxScaler() for i in range(data_cropped.shape[0]))
        data_scaled = list()
        
        for i in range(data_cropped.shape[0]):
            data_scaled.append(scaler_data[i].fit_transform(data_cropped[i].reshape(-1,1)))
        
        data_scaled = np.array(data_scaled)
        
        X_input = list()
        y_input = list()
        
        for _data in data_scaled:
            X_input.append(_data[:-self.y.shape[1]])
            y_input.append(_data[-self.y.shape[1]:])
        
        X_input = np.array(X_input)
        y_input = np.array(y_input)

        return X_input, y_input, scaler_data

    def create_model(self, checkpoint_path='model_weights_6_4_OOP', component=''):
        import numpy as np
        from sklearn.preprocessing import MinMaxScaler, StandardScaler
        from tensorflow.keras import Input
        from tensorflow.keras.models import Sequential, Model
        from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout, Conv1D, AveragePooling1D, Flatten, Reshape, SimpleRNN, GRU, MaxPooling1D, concatenate
        from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
        from tensorflow.keras.initializers import Zeros
        
        inputs = Input(shape=(self.input_period, self.num_features))
        
        model_cnn = Sequential([
            Conv1D(filters=352, kernel_size=1, activation='relu'),
            Conv1D(filters=352, kernel_size=1, activation='relu'),
            MaxPooling1D(pool_size=3),
            Dense(units=128),
            Flatten()
            #Dense(units=output_step),
            #Reshape((output_step,1))
        ])
        
        model_bilstm = Sequential([
            Bidirectional(LSTM(units=384, return_sequences=True, activation='tanh', recurrent_activation='sigmoid')),
            Dropout(0.2),
            Flatten()
            #Dense(units=output_step),
            #Reshape((output_step,1))
        ])
        
        model_bigru = Sequential([
            Bidirectional(GRU(units=128, activation='tanh', return_sequences=True)),
            Dropout(0.4),
            Flatten()
        ])
        
        model_multilayer_lstm = Sequential([
            LSTM(units=64, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'),
            Dropout(0.2),
            LSTM(units=64, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'),
            Flatten()
        ])

        output_cnn = model_cnn(inputs)
        output_bilstm = model_bilstm(inputs)
        output_bigru = model_bigru(inputs)
        output_multilayer_lstm = model_multilayer_lstm(inputs)
        
        concatenated_outputs = concatenate([output_cnn, output_bilstm, output_bigru, output_multilayer_lstm])
        
        main_model = Sequential([
            Input(shape=(concatenated_outputs.shape[1],)),
            #Dense(units=output_step),
            Dense(units=self.output_step, kernel_initializer=Zeros(), use_bias=False),
            Reshape((self.output_step,1))
        ])
        
        final_output = main_model(concatenated_outputs)
        
        functional_pipeline = Model(inputs=inputs, outputs=final_output)

        # Compile the pipeline model
        functional_pipeline.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
        
        self.checkpoint = ModelCheckpoint(filepath=f'{checkpoint_path}_{self.ohlc}-{component}' + '/model_weights_epoch_{epoch:02d}.h5', 
                                    save_best_only=True, save_weights_only=True)
        self.early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        return functional_pipeline

    def generate_input_data(self, component, split=None, csv='data/^NDX_raw_data.csv'):
        self.data, self.data_backup = self.load_data(split=split, csv=csv)
        self.X, self.y = model6_4.create_dataset(self.data[[self.ohlc]].to_numpy(), time_step=self.period, output_step=self.output_step)
        self.data_input = np.array([np.concatenate((self.X[i], self.y[i]), axis=0) for i in range(self.X.shape[0])])

        if 'savgol' == component.lower():
            self.feat_eng = self.savgol_smoothing(self.data_input)

        self.X_input, self.y_input, self.scalers = self.preprocess_data(self.feat_eng)

        return self.X_input, self.y_input, self.scalers
    
    def fit_model(self, component, split, checkpoint_path='model_weights_6_4_OOP'):
        X_input, y_input, _ = self.generate_input_data(component=component, split=split)

        # Train the pipeline model
        history = self.model_dict[component].fit(X_input, y_input, epochs=150, batch_size=64, validation_split=0.2, callbacks=[self.checkpoint, self.early_stopping])

        # Pickle the object and save it to a file
        #import pickle
        with open(f'{checkpoint_path}_{self.ohlc}-{component}/best_epoch.txt', 'w') as f:
            best_epoch = np.argmin(history.history['val_loss']) + 1
            f.write(f'best epoch: {best_epoch}')
        
        return history

    def load_epoch_weights(self, component, best_epoch=None, checkpoint_path='model_weights_6_4_OOP'):
        # Load the weights of the model at the chosen epoch
        self.model_dict[component].load_weights(f'{checkpoint_path}_{self.ohlc}-{component}/model_weights_epoch_{best_epoch:02d}.h5')
        print('Backup: Weigths for the best epoch has been loaded.')
    
    def predict_model(self, component, split=None, csv='data/^NDX_raw_data.csv'):
        X_input, y_input, scalers = self.generate_input_data(component, split=split, csv=csv)
        
        predictions = list()
        test_inputs = list()

        for j in range(X_input.shape[0]):
            if (j+1) % 100 == 0:
                print(j+1)

            predictions.append(
                scalers[j].inverse_transform(
                    self.model_dict[component].predict(X_input[j].reshape(1, self.input_period, self.num_features), verbose=0)[0].reshape(1,self.output_step)
                )
            )

            test_inputs.append(
                scalers[j].inverse_transform(
                    y_input[j]
                )
            )

        return np.array(predictions), np.array(test_inputs)

In [2]:
model_high = model6_4('savgol', ohlc='high')
model_low = model6_4('savgol', ohlc='low')

2024-06-12 13:05:03.553635: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-12 13:05:04.160447: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-12 13:05:04.160643: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-12 13:05:04.245221: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-12 13:05:04.430801: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [3]:
model_high.load_epoch_weights('savgol', best_epoch=17)
model_low.load_epoch_weights('savgol', best_epoch=15)

Backup: Weigths for the best epoch has been loaded.
Backup: Weigths for the best epoch has been loaded.


In [4]:
predictions_high, y_high = model_high.predict_model('savgol', csv='data-china-from2018-test/000001.SS_raw_data.csv')
predictions_low, y_low = model_low.predict_model('savgol', csv='data-china-from2018-test/000001.SS_raw_data.csv')

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500


In [7]:
model_high.report_score((predictions_high, y_high))

{'mean_score': -37.931656809103245,
 'max_score': 0.9839730443351258,
 'min_score': -4627.562665964143,
 'logistic_score': 0.0}

In [8]:
model_low.report_score((predictions_low, y_low))

{'mean_score': -34.665983441306274,
 'max_score': 0.9980883133138815,
 'min_score': -11001.616064098589,
 'logistic_score': 0.0}

In [17]:
predictions_high_df = pd.DataFrame(predictions_high.reshape(-1,7), columns=[f'pred_high_{i}' for i in range(predictions_high.shape[2])], index=model_high.data.index[60:-7])
predictions_low_df = pd.DataFrame(predictions_low.reshape(-1,7), columns=[f'pred_low_{i}' for i in range(predictions_low.shape[2])], index=model_low.data.index[60:-7])

In [22]:
output = pd.DataFrame()
output = pd.concat([model_high.data, predictions_high_df, predictions_low_df], axis=1, join='outer')

In [23]:
output.iloc[30:70]

Unnamed: 0,date,open,high,low,close,volume,pred_high_0,pred_high_1,pred_high_2,pred_high_3,pred_high_4,pred_high_5,pred_high_6,pred_low_0,pred_low_1,pred_low_2,pred_low_3,pred_low_4,pred_low_5,pred_low_6
30,2017-10-20 00:00:00+08:00,3363.513916,3379.764893,3360.100098,3378.647949,127200,,,,,,,,,,,,,,
31,2017-10-23 00:00:00+08:00,3382.280029,3385.284912,3374.705078,3380.698975,130800,,,,,,,,,,,,,,
32,2017-10-24 00:00:00+08:00,3376.599121,3388.688965,3374.125,3388.248047,139900,,,,,,,,,,,,,,
33,2017-10-25 00:00:00+08:00,3384.85791,3398.303955,3382.033936,3396.897949,123100,,,,,,,,,,,,,,
34,2017-10-26 00:00:00+08:00,3397.519043,3414.241943,3391.455078,3407.566895,183800,,,,,,,,,,,,,,
35,2017-10-27 00:00:00+08:00,3404.498047,3421.103027,3402.114014,3416.812012,170300,,,,,,,,,,,,,,
36,2017-10-30 00:00:00+08:00,3413.86792,3419.731934,3357.275879,3390.336914,208300,,,,,,,,,,,,,,
37,2017-10-31 00:00:00+08:00,3380.999023,3397.099121,3376.124023,3393.342041,153500,,,,,,,,,,,,,,
38,2017-11-01 00:00:00+08:00,3393.968018,3410.352051,3388.5979,3395.913086,180600,,,,,,,,,,,,,,
39,2017-11-02 00:00:00+08:00,3391.6521,3391.6521,3372.212891,3383.310059,166900,,,,,,,,,,,,,,


In [24]:
output.to_excel(f'data_w_pred_m6_4-oop/data_w_pred-china.xlsx')