In [17]:
import pandas as pd
import numpy as np

class model6_5:

    def __init__(self, *args, ohlc='close', csv='data/^NDX_raw_data.csv'):
        self.ohlc = ohlc

        self.period = 60
        self.trend_period = 14
        self.rsi_period = 14
        self.num_features = 1
        self.input_period = 60
        self.output_step = 7
        self.units = 512

        self.components = {'savgol', 'seasons'}
        
        if not args:
            print('Enter model components')
            exit(0)
        else:
            self.model_dict = dict()
            for arg in args:
                assert arg.lower() in self.components
                self.model_dict[arg] = self.create_model(component=arg)

    # Logistic transformation
    @staticmethod
    def logistic_transform(r2):
        return round(1 / (1 + np.exp(-r2)),2)
    
    def report_score(self, outputs):
        from sklearn.metrics import r2_score
        
        pred, y = outputs
        
        try:
            assert pred.shape[2] == self.output_step
            assert y.shape[1] == self.output_step

            _pred = pred.reshape(-1, self.output_step)
            _y = y.reshape(-1, self.output_step)
        
            res = list()
            for i in range(_pred.shape[0]):
                res.append(r2_score(_y[i], _pred[i]))

            res = np.array(res)
        
            return {
                'mean_score': np.mean(res),
                'max_score': res.max(),
                'min_score': res.min(),
                'logistic_score': model6_5.logistic_transform(np.mean(res))
            }
        except:
            print("Array shapes don't match")

    def load_data(self, split=None, csv='data/^NDX_raw_data.csv'):
        _data = pd.read_csv(csv)
        _data.rename(columns={'Date': 'date', 'Open':'open', 'High':'high', 'Low':'low', 'Close':'close', 'Volume':'volume'}, inplace=True)

        if not split:
            return _data, pd.DataFrame()
        else:
            _data_backup = _data.iloc[split:]
            _data = _data.iloc[:split]
    
            return _data, _data_backup

    @staticmethod
    ## Creating sequences
    def create_dataset(dataset, time_step=1, output_step=1):
        dataX, dataY = [], []
        for i in range(len(dataset)-time_step-output_step):
            a = dataset[i:(i+time_step), 0]
            b = dataset[(i+time_step):(i+time_step)+output_step, 0]
            dataX.append(a)
            dataY.append(b)
    
        return np.array(dataX), np.array(dataY)

    def savgol_smoothing(self, data_input):
        from scipy.signal import savgol_filter

        # Apply Savitzky-Golay filter
        window_length = 17  # Window length (must be odd)
        polyorder = 1      # Polynomial order
        
        return np.array([ savgol_filter(data_input[i], window_length, polyorder) for i in range(data_input.shape[0]) ])

    def decompose_seasons(self, data_input):
        from statsmodels.tsa.seasonal import seasonal_decompose

        decompositions = np.array([seasonal_decompose(data_input[i], model='additive', period=14) for i in range(data_input.shape[0])])
        
        return np.array([decompositions[i].seasonal for i in range(decompositions.shape[0])])

    def preprocess_data(self, data):
        data_cropped = list()

        # NO NEED TO CROP
        for _data in data:
            data_cropped.append(_data)
        
        data_cropped = np.array(data_cropped)
        
        from sklearn.preprocessing import MinMaxScaler
        
        scaler_data = list(MinMaxScaler() for i in range(data_cropped.shape[0]))
        data_scaled = list()
        
        for i in range(data_cropped.shape[0]):
            data_scaled.append(scaler_data[i].fit_transform(data_cropped[i].reshape(-1,1)))
        
        data_scaled = np.array(data_scaled)
        
        X_input = list()
        y_input = list()
        
        for _data in data_scaled:
            X_input.append(_data[:-self.y.shape[1]])
            y_input.append(_data[-self.y.shape[1]:])
        
        X_input = np.array(X_input)
        y_input = np.array(y_input)

        return X_input, y_input, scaler_data

    def create_model(self, checkpoint_path='model_weights_6_5_OOP', component=''):
        import numpy as np
        from sklearn.preprocessing import MinMaxScaler, StandardScaler
        from tensorflow.keras import Input
        from tensorflow.keras.models import Sequential, Model
        from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout, Conv1D, AveragePooling1D, Flatten, Reshape, SimpleRNN, GRU, MaxPooling1D, concatenate
        from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
        from tensorflow.keras.initializers import Zeros
        
        inputs = Input(shape=(self.input_period, self.num_features))
        
        model_cnn = Sequential([
            Conv1D(filters=352, kernel_size=1, activation='relu'),
            Conv1D(filters=352, kernel_size=1, activation='relu'),
            MaxPooling1D(pool_size=3),
            Dense(units=128),
            Flatten()
            #Dense(units=output_step),
            #Reshape((output_step,1))
        ])
        
        model_bilstm = Sequential([
            Bidirectional(LSTM(units=384, return_sequences=True, activation='tanh', recurrent_activation='sigmoid')),
            Dropout(0.2),
            Flatten()
            #Dense(units=output_step),
            #Reshape((output_step,1))
        ])
        
        model_bigru = Sequential([
            Bidirectional(GRU(units=128, activation='tanh', return_sequences=True)),
            Dropout(0.4),
            Flatten()
        ])
        
        model_multilayer_lstm = Sequential([
            LSTM(units=64, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'),
            Dropout(0.2),
            LSTM(units=64, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'),
            Flatten()
        ])

        output_cnn = model_cnn(inputs)
        output_bilstm = model_bilstm(inputs)
        output_bigru = model_bigru(inputs)
        output_multilayer_lstm = model_multilayer_lstm(inputs)
        
        concatenated_outputs = concatenate([output_cnn, output_bilstm, output_bigru, output_multilayer_lstm])
        
        main_model = Sequential([
            Input(shape=(concatenated_outputs.shape[1],)),
            #Dense(units=output_step),
            Dense(units=self.output_step, kernel_initializer=Zeros(), use_bias=False),
            Reshape((self.output_step,1))
        ])
        
        final_output = main_model(concatenated_outputs)
        
        functional_pipeline = Model(inputs=inputs, outputs=final_output)

        # Compile the pipeline model
        functional_pipeline.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
        
        self.checkpoint = ModelCheckpoint(filepath=f'{checkpoint_path}_{self.ohlc}-{component}' + '/model_weights_epoch_{epoch:02d}.h5', 
                                    save_best_only=True, save_weights_only=True)
        self.early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        return functional_pipeline

    def generate_input_data(self, component, split=None, csv='data/^NDX_raw_data.csv'):
        self.data, self.data_backup = self.load_data(split=split, csv=csv)
        self.X, self.y = model6_5.create_dataset(self.data[[self.ohlc]].to_numpy(), time_step=self.period, output_step=self.output_step)
        self.data_input = np.array([np.concatenate((self.X[i], self.y[i]), axis=0) for i in range(self.X.shape[0])])

        if 'savgol' == component.lower():
            self.feat_eng = self.savgol_smoothing(self.data_input)
        if 'seasons' == component.lower():
            self.feat_eng = self.decompose_seasons(self.data_input)

        self.X_input, self.y_input, self.scalers = self.preprocess_data(self.feat_eng)

        return self.X_input, self.y_input, self.scalers

    def fit_model(self, component, split, checkpoint_path='model_weights_6_5_OOP'):
        X_input, y_input, _ = self.generate_input_data(component=component, split=split)

        # Train the pipeline model
        history = self.model_dict[component].fit(X_input, y_input, epochs=150, batch_size=64, validation_split=0.2, callbacks=[self.checkpoint, self.early_stopping])

        # Pickle the object and save it to a file
        #import pickle
        with open(f'{checkpoint_path}_{self.ohlc}-{component}/best_epoch.txt', 'w') as f:
            best_epoch = np.argmin(history.history['val_loss']) + 1
            f.write(f'best epoch: {best_epoch}')
        
        return history

    def load_epoch_weights(self, component, best_epoch=None, checkpoint_path='model_weights_6_5_OOP'):
        # Load the weights of the model at the chosen epoch
        self.model_dict[component].load_weights(f'{checkpoint_path}_{self.ohlc}-{component}/model_weights_epoch_{best_epoch:02d}.h5')
        print('Backup: Weigths for the best epoch has been loaded.')

    def predict_model(self, component, split=None, csv='data/^NDX_raw_data.csv'):
        X_input, y_input, scalers = self.generate_input_data(component, split=split, csv=csv)
        
        predictions = list()
        test_inputs = list()

        for j in range(X_input.shape[0]):
            if (j+1) % 100 == 0:
                print(j+1)

            predictions.append(
                scalers[j].inverse_transform(
                    self.model_dict[component].predict(X_input[j].reshape(1, self.input_period, self.num_features), verbose=0)[0].reshape(1,self.output_step)
                )
            )

            test_inputs.append(
                scalers[j].inverse_transform(
                    y_input[j]
                )
            )

        return np.array(predictions), np.array(test_inputs)

In [18]:
model_savgol_high = model6_5("savgol", ohlc='high')
model_seasons_high = model6_5("seasons", ohlc='high')

model_savgol_low = model6_5("savgol", ohlc='low')
model_seasons_low = model6_5("seasons", ohlc='low')

In [19]:
model_savgol_high.load_epoch_weights("savgol", best_epoch=45)
model_seasons_high.load_epoch_weights("seasons", best_epoch=16)

model_savgol_low.load_epoch_weights("savgol", best_epoch=32)
model_seasons_low.load_epoch_weights("seasons", best_epoch=19)

Backup: Weigths for the best epoch has been loaded.
Backup: Weigths for the best epoch has been loaded.
Backup: Weigths for the best epoch has been loaded.
Backup: Weigths for the best epoch has been loaded.


In [4]:
pred_savgol_high, y_savgol_high = model_savgol_high.predict_model("savgol", csv='data-china-from2018-test/000001.SS_raw_data.csv')
print("done")
pred_seasons_high, y_seasons_high = model_seasons_high.predict_model("seasons", csv='data-china-from2018-test/000001.SS_raw_data.csv')
print("done")

pred_savgol_low, y_savgol_low = model_savgol_low.predict_model("savgol", csv='data-china-from2018-test/000001.SS_raw_data.csv')
print("done")
pred_seasons_low, y_seasons_low = model_seasons_low.predict_model("seasons", csv='data-china-from2018-test/000001.SS_raw_data.csv')
print("done")

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
done
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
done
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
done
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
done


In [20]:
from sklearn.metrics import r2_score

score_savgol_high = model_savgol_high.report_score((pred_savgol_high, y_savgol_high))
score_seasons_high = model_seasons_high.report_score((pred_seasons_high, y_seasons_high))

score_savgol_low = model_savgol_low.report_score((pred_savgol_low, y_savgol_low))
score_seasons_low = model_seasons_low.report_score((pred_seasons_low, y_seasons_low))

print(score_savgol_high)
print(score_seasons_high)
print(score_savgol_low)
print(score_seasons_low)

  return round(1 / (1 + np.exp(-r2)),2)


{'mean_score': -108.99353763865534, 'max_score': 0.9998553703092787, 'min_score': -105620.24572780391, 'logistic_score': 0.0}
{'mean_score': 0.9997689064592966, 'max_score': 0.9999885967724471, 'min_score': 0.9973684142241515, 'logistic_score': 0.73}
{'mean_score': -23261.468928716884, 'max_score': 0.9997198372720412, 'min_score': -30034704.984813962, 'logistic_score': 0.0}
{'mean_score': 0.9998392844467581, 'max_score': 0.9999886117228687, 'min_score': 0.9972401469350262, 'logistic_score': 0.73}


In [5]:
observation_high_df = pd.DataFrame([
    pred_savgol_high.reshape(-1, 7)[i] + 3*pred_seasons_high.reshape(-1, 7)[i] for i in range(pred_savgol_high.shape[0])
], columns=[f'pred_high_{i}' for i in range(model_savgol_high.output_step)],
index=model_savgol_high.data.index[model_savgol_high.period:-model_savgol_high.output_step])

observation_low_df = pd.DataFrame([
    pred_savgol_low.reshape(-1, 7)[i] + 3*pred_seasons_low.reshape(-1, 7)[i] for i in range(pred_savgol_low.shape[0])
], columns=[f'pred_low_{i}' for i in range(model_savgol_low.output_step)],
index=model_savgol_low.data.index[model_savgol_low.period:-model_savgol_low.output_step])

In [6]:
data_output = pd.concat([ model_savgol_high.data, observation_high_df, observation_low_df ], axis=1, join='outer')

In [19]:
data_output.to_excel(f'data_w_pred_m6_5-oop/test_w_pred.xlsx')

In [7]:
data_output

Unnamed: 0,date,open,high,low,close,volume,pred_high_0,pred_high_1,pred_high_2,pred_high_3,pred_high_4,pred_high_5,pred_high_6,pred_low_0,pred_low_1,pred_low_2,pred_low_3,pred_low_4,pred_low_5,pred_low_6
0,2004-01-02 00:00:00-05:00,1474.160034,1479.589966,1458.510010,1463.569946,1666780000,,,,,,,,,,,,,,
1,2004-01-05 00:00:00-05:00,1474.550049,1496.579956,1474.189941,1496.579956,2362910000,,,,,,,,,,,,,,
2,2004-01-06 00:00:00-05:00,1492.410034,1504.469971,1486.589966,1501.260010,2273220000,,,,,,,,,,,,,,
3,2004-01-07 00:00:00-05:00,1498.380005,1514.449951,1491.199951,1514.260010,2294280000,,,,,,,,,,,,,,
4,2004-01-08 00:00:00-05:00,1524.060059,1530.650024,1513.339966,1530.650024,2683950000,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5028,2023-12-22 00:00:00-05:00,16799.019531,16839.250000,16703.570312,16777.400391,4796600000,,,,,,,,,,,,,,
5029,2023-12-26 00:00:00-05:00,16816.779297,16907.509766,16813.570312,16878.460938,6120600000,,,,,,,,,,,,,,
5030,2023-12-27 00:00:00-05:00,16896.140625,16922.009766,16859.550781,16906.800781,7480170000,,,,,,,,,,,,,,
5031,2023-12-28 00:00:00-05:00,16963.519531,16969.169922,16891.320312,16898.470703,5090570000,,,,,,,,,,,,,,
