# Parameter Optimizing & Main Engine Setup

> *The rests are in private mode.*

In [13]:
import random
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from datetime import date, datetime, timedelta
import pytz

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
import xgboost as xgb
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso

from skopt import BayesSearchCV
from sklearn.pipeline import Pipeline
from lazypredict.Supervised import LazyClassifier  # import regression if needed 
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, MaxAbsScaler, PolynomialFeatures
from sklearn.metrics import make_scorer, accuracy_score, fbeta_score, recall_score, f1_score, fbeta_score, mean_squared_error, r2_score, classification_report
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV

import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, LSTM
from tensorflow.keras.models import Sequential
import tensorflow_datasets as tfds
import tensorflow_addons as tfa

tfds.disable_progress_bar()
import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)

tf.keras.utils.set_random_seed(0)


def day_check():
    checkday = datetime.today().strftime('%A')
    return checkday


def timechecknow():
    # time
    ttoday = date.today()
    tz_NY = pytz.timezone('America/New_York') 
    datetime_NY = datetime.now(tz_NY) 
    speed_hour = int(datetime_NY.strftime("%H"))
    speed_minute = int(datetime_NY.strftime("%M"))
    speed_second = int(datetime_NY.strftime("%S"))
    minutepassed = np.round((speed_hour-9)*60 + (speed_minute-30) + speed_second/60,30)
    return minutepassed


def tensorflow_model_processing(days, h, o, l, tf_X_train, tf_y_train, tf_X_test, tf_y_test, tf_X_val, tf_y_val, pred_features):
    # initiate
    tf.keras.backend.clear_session() 

    # build model
    layer_neurons = [256, 128, 64, 32, 16, 8]
    input_layers_features = tf_X_train.shape[1]
    output_layers_features = 1

    # model design
    tf_model = tf.keras.Sequential()
    tf_model.add(tf.keras.layers.Flatten(input_shape=(input_layers_features, 1)))
    for neurons in layer_neurons:
        tf_model.add(tf.keras.layers.Dense(neurons, activation = h))
        tf_model.add(tf.keras.layers.Dropout(0.2))
    tf_model.add(tf.keras.layers.Dense(output_layers_features, activation = o))
    
    # compile
    tf_model.compile(optimizer='adam', 
                     loss = l, 
                     metrics=[tfa.metrics.FBetaScore(num_classes=1, beta=0.5, threshold=0.5)])

    # Stop training when there is no improvement in the validation loss for n consecutive epochs
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_fbeta_score', patience = 10)

    # Save the Model with the lowest validation loss
    save_best = tf.keras.callbacks.ModelCheckpoint('./best_model.h5',
                                                   monitor = 'val_fbeta_score',
                                                   save_best_only=True)

#         # evaluate loss and fbeta before tuning
#         loss, fbeta = tf_model.evaluate(tf_X_test, tf_y_test, verbose = 0)
#         print(f'\n\nTensorFlow Model Evalution before training\n• Loss: {loss}\n• fbeta: {fbeta}\n\n')

    # train the model
    EPOCHS = 500    
    history = tf_model.fit(tf_X_train, tf_y_train, 
                           epochs = EPOCHS, 
                           validation_data = (tf_X_val, tf_y_val), 
                           batch_size = 8, 
                           verbose = 0, 
                           callbacks = [early_stopping, save_best])
    
    # history = tf_model.fit(tf_X_train, tf_y_train, epochs = EPOCHS, validation_data = (tf_X_val, tf_y_val), batch_size = 8, verbose = 1, callbacks = [early_stopping])
    # history = tf_model.fit(tf_X_train, tf_y_train, epochs = EPOCHS, validation_data = (tf_X_val, tf_y_val), batch_size = 8, verbose = 1)

    # evaluate loss and fbeta after tuning
    loss, fbeta = tf_model.evaluate(tf_X_test, tf_y_test, verbose = 0)
    print(f'\nTensorFlow Model Evalution after training\n• Loss: {loss}\n• fbeta: {fbeta}\n\n\n\n\n')

    # predict up or down
    prediction = tf_model.predict(pred_features)
    print(prediction)
    print(f'{days} days moving average applied\n\n\n')
    return [days, h, o, l, loss, fbeta, prediction[0][0]]


def opt_data_processing(df):
    # features to predict
    pred_features = df[-1:]

    # get the outcome from the tomorrow price
    df['Tmr_price'] = df['Stock_price'].shift(-1)
    df['classifier_result'] = (df['Tmr_price'] > df['Stock_price']).astype(int)
    df.drop(columns = 'Tmr_price', inplace = True)

    # features to train &test
    df = df[:-1]

    # outcome 
    outcomes = df.pop('classifier_result').values
    # filter out unwanted columns
    features = df.values

    # for non-tf
    X_train, X_test, y_train, y_test = train_test_split(features, outcomes, test_size=0.2, random_state = 0, stratify = outcomes)
    sc = MinMaxScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)  
    non_tf_pred_features = sc.transform(pred_features)

    # for tf
    tf_X_train, tf_X_test, tf_y_train, tf_y_test = train_test_split(features, outcomes, test_size=0.2, random_state = 0, stratify = outcomes)

    def tf_normalize(set):  
        return tf.keras.utils.normalize(set)
    tf_X_train = tf_normalize(tf_X_train)
    tf_X_test = tf_normalize(X_test[:int(len(X_test)/2)])
    tf_X_val = tf_normalize(X_test[int(len(X_test)/2):])
    tf_y_test = y_test[:int(len(y_test)/2)]
    tf_y_val = y_test[int(len(y_test)/2):] 
    tf_pred_features = tf_normalize(pred_features)
    
    return tf_X_train, tf_y_train, tf_X_test, tf_y_test, tf_X_val, tf_y_val, tf_pred_features


def param_optimizer(df):
    with open("moving_avg_value.txt", 'r') as f:
        moving_avg_value = [line.rstrip('\n') for line in f]
        moving_avg_value = int(moving_avg_value[0]) 
    
    # for hidden layer model
#     hidden_activation_f = ['relu', 'tanh']
#     output_activation_f = ['softmax', 'sigmoid']
#     loss_f = ['BinaryCrossentropy', 'Hinge', 'MeanSquaredError']
    
    # for LSTM model
    hidden_activation_f = ['relu'] # n/a
    output_activation_f = ['sigmoid']    
    loss_f = ['BinaryCrossentropy', 'Hinge', 'MeanSquaredError']

    param_tune = []
    for days in range(moving_avg_value, moving_avg_value+1):
        print(f'\n\n########## Initial setting: {days} days moving average ##########\n\n')
        # ML-Classifier
        # Get moving average
        mvp = days
        mavg = pd.DataFrame()
        for column in df.columns[1:]:
            mv_change = np.array(df[column])
            mv = []
            for i in range(len(mv_change)-mvp+1):
                mv.append(np.average(mv_change[i:mvp+i]))
                i+=1
            mavg[column] = pd.DataFrame(mv)
        tf_X_train, tf_y_train, tf_X_test, tf_y_test, tf_X_val, tf_y_val, pred_features = opt_data_processing(mavg)
        
        for h in hidden_activation_f:
            print(f'>>> Hidden Function: {h}')
            for o in output_activation_f:
                print(f'>> Output Function: {o}')
                for l in loss_f:
                    print(f'> Loss Function: {l}')
                    param_tune.append(tensorflow_model_processing(days, h, o, l, tf_X_train, tf_y_train, tf_X_test, tf_y_test, tf_X_val, tf_y_val, pred_features))
                    print('----------------------------------------------------')
    return param_tune


def mvg_optimizer(df, hidden_activation, output_activation, loss):
    with open("moving_avg_value.txt", 'r') as f:
        moving_avg_value = [line.rstrip('\n') for line in f]
        moving_avg_value = int(moving_avg_value[0])    
        
        if moving_avg_value == 3:
            moving_avg_value = 4
    
    mvg_tune = []
    for days in range(moving_avg_value-3, moving_avg_value+5):
        if days < 1:
            days = 1
        try:
            print(f'\n\n\n##### {days} days moving average #####')
            # ML-Classifier
            # Get moving average
            mvp = days
            mavg = pd.DataFrame()
            for column in df.columns[1:]:
                mv_change = np.array(df[column])
                mv = []
                for i in range(len(mv_change)-mvp+1):
                    mv.append(np.average(mv_change[i:mvp+i]))
                    i+=1
                mavg[column] = pd.DataFrame(mv)
            tf_X_train, tf_y_train, tf_X_test, tf_y_test, tf_X_val, tf_y_val, pred_features = opt_data_processing(mavg)
            tensorflow_model_processing_result = tensorflow_model_processing(days, hidden_activation, output_activation, loss, tf_X_train, tf_y_train, tf_X_test, tf_y_test, tf_X_val, tf_y_val, pred_features)
            mvg_tune.append(tensorflow_model_processing_result)
            print('====================================================')   
            print('Loop breaker:', tensorflow_model_processing_result[5])            
            if tensorflow_model_processing_result[5] > moving_avg_searchbreak:
                print('\n\n\n\n\n########## Moving average searching: Break ##########\n\n\n\n\n')
                break                
        except:
            print(f'Except >>> {days} moving average')
            pass

    df = pd.DataFrame(mvg_tune)
    df.columns = ['moving_avg', 'hidden_activation', 'output_activation', 'loss_compile', 'loss', 'fbeta', 'result']
    
    df = df.sort_values('loss').reset_index(drop = True)
    df.to_csv('para_mvg_tuned.csv', index = False)
    return df


def main_param():
    voice_message("""\
    TensorFlow optimizing starts""")
    from os.path import exists
    snp_source_exists = exists('predict_aim_sourcedata_monitoring_^GSPC_%s.csv' %currentdate)
    if snp_source_exists == True:
        df_source = pd.read_csv('predict_aim_sourcedata_monitoring_^GSPC_%s.csv' %currentdate)
    else:
        df_source = pd.read_csv('predict_aim_sourcedata_snp.csv')
        
    # parameter optimize
    df = df_source.copy()
    df = pd.DataFrame(param_optimizer(df))
    df.columns = ['moving_avg', 'hidden_activation', 'output_activation', 'loss_compile', 'loss', 'fbeta', 'result']
    df = df.sort_values('loss').reset_index(drop = True)

    param_top_ten = df[df['loss'] == df.head(1).loss.values[0]].sort_values('loss').reset_index(drop = True).head(10)
    param_top_ten.to_csv('param_top_ten.csv', index = False)
    print(param_top_ten)
    
    hidden_activation = param_top_ten.hidden_activation[0] # hidden_activation
    output_activation = param_top_ten.output_activation[0] # output_activation
    loss = param_top_ten.loss_compile[0]      # loss_compile
    
    print('\n\n\n##################################################')
    print(f'• Hidden Layer Activator ==> {hidden_activation}')
    print(f'• Output Layer Activator ==> {output_activation}')
    print(f'• Loss Compiler          ==> {loss}')
    print('##################################################\n\n\n\n\n')
    
    # moving average optimize
    final_opt_df = mvg_optimizer(df_source, hidden_activation, output_activation, loss)  
    df = final_opt_df.copy()
    print(df.sort_values('loss').reset_index(drop = True))
    
    # plot
    df = df.sort_values('moving_avg', ascending = False)
    fig, ax = plt.subplots()
    fig.set_size_inches(16, 9)

    ax.plot(df.moving_avg, df.fbeta, color = 'blue', marker = 'X')
    ax.set_xlabel('Moving Average (days)')
    ax.set_ylabel('fbeta', color = 'blue')
    # ax.axvline(x = 5, color = 'black', linestyle = '--')
    # ax.grid(axis = 'x')

    ax1 = ax.twinx()
    ax1.plot(df.moving_avg, df.loss, color = 'red', marker = 'X')
    ax1.set_ylabel('Loss', color = 'red')
    ax1.grid(axis = 'y')

    # ax.set_xticklabels(df.Date, rotation = 90)
    plt.show()


def pred_tensorflow(tf_X_train, tf_y_train, tf_X_val, tf_y_val, tf_X_test, tf_y_test, pred_features):
    with tf.device('/GPU:0'):
        
        # parameter input
        opt_param_verified_result = opt_param_verified()
        hidden_activation = opt_param_verified_result[0]
        output_activation = opt_param_verified_result[1]
        loss_compile = opt_param_verified_result[2]        
#         print(f'• Hidden Layer Activator           ==> {hidden_activation}')
#         print(f'• Output Layer Activator           ==> {output_activation}')
#         print(f'• Loss Compiler                    ==> {loss_compile}\n')
        
        # initiate
        tf.keras.backend.clear_session() 
        
        # build model
        layer_neurons = [256, 128, 64, 32, 16, 8]
        input_layers_features = tf_X_train.shape[1]
        output_layers_features = 1
        
        # model design
        tf_model = tf.keras.Sequential()
        tf_model.add(tf.keras.layers.Flatten(input_shape=(input_layers_features, 1)))
        for neurons in layer_neurons:
            tf_model.add(tf.keras.layers.Dense(neurons, activation = hidden_activation))
            tf_model.add(tf.keras.layers.Dropout(0.2))
        tf_model.add(tf.keras.layers.Dense(output_layers_features, activation = output_activation))
        
        # compile
        tf_model.compile(optimizer='adam', 
                         loss = loss_compile, 
                         metrics=[tfa.metrics.FBetaScore(num_classes=1, beta=0.5, threshold=0.5)])

        # Stop training when there is no improvement in the validation loss for n consecutive epochs
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_fbeta_score', patience = 10)

        # Save the Model with the lowest validation loss
        save_best = tf.keras.callbacks.ModelCheckpoint('./best_model.h5',
                                                       monitor = 'val_fbeta_score',
                                                       save_best_only=True)

#         # evaluate loss and fbeta before tuning
#         loss, fbeta = tf_model.evaluate(tf_X_test, tf_y_test, verbose = 0)
#         print(f'\n\nTensorFlow Model Evalution before training\n• Loss: {loss}\n• fbeta: {fbeta}\n\n')

        # train the model
        EPOCHS = 500    
        history = tf_model.fit(tf_X_train, tf_y_train, 
                               epochs = EPOCHS, 
                               validation_data = (tf_X_val, tf_y_val), 
                               batch_size = 8, 
                               verbose = 0, 
                               callbacks = [early_stopping, save_best])
        
        # history = tf_model.fit(tf_X_train, tf_y_train, epochs = EPOCHS, validation_data = (tf_X_val, tf_y_val), batch_size = 8, verbose = 1, callbacks = [early_stopping])
        # history = tf_model.fit(tf_X_train, tf_y_train, epochs = EPOCHS, validation_data = (tf_X_val, tf_y_val), batch_size = 8, verbose = 1)

        # evaluate loss and fbeta after tuning
        loss, fbeta = tf_model.evaluate(tf_X_test, tf_y_test, verbose = 0)
#         print(f'TensorFlow Model Evalution after training\n• Loss: {loss}\n• fbeta: {fbeta}\n\n')
#         print(f'• Applied_Hidden Layer Activator           ==> {hidden_activation}')
#         print(f'• Applied_Output Layer Activator           ==> {output_activation}')
#         print(f'• Applied_Loss Compiler                    ==> {loss_compile}\n')
#         print(f'• Applied_Moving Average Value (optimized) ==> {moving_avg_value} days\n')

        # predict up or down
        prediction = tf_model.predict(pred_features)
    
    return [fbeta[0], prediction[0][0]]


def pred_tensorflow_RNN(tf_X_train, tf_y_train, tf_X_val, tf_y_val, tf_X_test, tf_y_test, pred_features):
    with tf.device('/GPU:0'):
        
        # parameter input
        opt_param_verified_result = opt_param_verified()
        hidden_activation = opt_param_verified_result[0]
        output_activation = opt_param_verified_result[1]
        loss_compile = opt_param_verified_result[2]        
#         print(f'• Hidden Layer Activator           ==> {hidden_activation}')
#         print(f'• Output Layer Activator           ==> {output_activation}')
#         print(f'• Loss Compiler                    ==> {loss_compile}\n')
        
        # initiate
        tf.keras.backend.clear_session() 
        
        # build model
        layer_neurons = [256, 128, 64, 32, 16, 8]
        input_layers_features = tf_X_train.shape[1]
        output_layers_features = 1
        
#         # model design
#         tf_model = tf.keras.Sequential()
#         tf_model.add(tf.keras.layers.Flatten(input_shape=(input_layers_features, 1)))
#         for neurons in layer_neurons:
#             tf_model.add(tf.keras.layers.Dense(neurons, activation = hidden_activation))
#             tf_model.add(tf.keras.layers.Dropout(0.2))
#         tf_model.add(tf.keras.layers.Dense(output_layers_features, activation = output_activation))

        # LSTM model design
        tf_model = tf.keras.Sequential()    
        tf_model.add(LSTM(100, return_sequences = True, input_shape = (input_layers_features, 1)))
        tf_model.add(LSTM(100, return_sequences = False))
        tf_model.add(Dense(25))
        tf_model.add(Dense(output_layers_features, activation = output_activation))
        
        # compile
        tf_model.compile(optimizer='adam', 
                         loss = loss_compile, 
                         metrics=[tfa.metrics.FBetaScore(num_classes=1, beta=0.5, threshold=0.5)])

        # Stop training when there is no improvement in the validation loss for n consecutive epochs
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_fbeta_score', patience = 10)

        # Save the Model with the lowest validation loss
        save_best = tf.keras.callbacks.ModelCheckpoint('./best_model.h5',
                                                       monitor = 'val_fbeta_score',
                                                       save_best_only=True)

#         # evaluate loss and fbeta before tuning
#         loss, fbeta = tf_model.evaluate(tf_X_test, tf_y_test, verbose = 0)
#         print(f'\n\nTensorFlow Model Evalution before training\n• Loss: {loss}\n• fbeta: {fbeta}\n\n')

        # train the model
        EPOCHS = 500    
        history = tf_model.fit(tf_X_train, tf_y_train, 
                               epochs = EPOCHS, 
                               validation_data = (tf_X_val, tf_y_val), 
                               batch_size = 8, 
                               verbose = 0, 
                               callbacks = [early_stopping, save_best])
        
        # history = tf_model.fit(tf_X_train, tf_y_train, epochs = EPOCHS, validation_data = (tf_X_val, tf_y_val), batch_size = 8, verbose = 1, callbacks = [early_stopping])
        # history = tf_model.fit(tf_X_train, tf_y_train, epochs = EPOCHS, validation_data = (tf_X_val, tf_y_val), batch_size = 8, verbose = 1)

        # evaluate loss and fbeta after tuning
        loss, fbeta = tf_model.evaluate(tf_X_test, tf_y_test, verbose = 0)
#         print(f'TensorFlow Model Evalution after training\n• Loss: {loss}\n• fbeta: {fbeta}\n\n')
#         print(f'• Applied_Hidden Layer Activator           ==> {hidden_activation}')
#         print(f'• Applied_Output Layer Activator           ==> {output_activation}')
#         print(f'• Applied_Loss Compiler                    ==> {loss_compile}\n')
#         print(f'• Applied_Moving Average Value (optimized) ==> {moving_avg_value} days\n')

        # predict up or down
        prediction = tf_model.predict(pred_features)
    
    return [fbeta[0], prediction[0][0]]


def pred_logi(X_train, X_test, y_train, y_test, pred_features):
    pipe = Pipeline(steps = [('classifier', LogisticRegression(random_state = 0))])

    pipe.fit(X_train, y_train)

    y_pred = pipe.predict_proba(X_test)[:,1]
    y_pred[y_pred > predict_proba_value] = 1
    y_pred[y_pred <= predict_proba_value] = 0
    y_pred = pd.Series(y_pred) 

    fbeta = fbeta_score(y_test, y_pred, beta=0.5)
    prediction = pipe.predict_proba(pred_features)[:,1][0]
    
    return [fbeta, prediction]


def pred_svc(X_train, X_test, y_train, y_test, pred_features):
    clf_svc = SVC(random_state = 0, probability = True)

    # Set up the hyperparameter search
    param_dist = {"C": [0.1, 0.5, 1, 3, 5],
                  "kernel": ['linear', 'poly', 'rbf', 'sigmoid'],
                  "degree": [1, 4]}
    
    scorer = make_scorer(fbeta_score, beta = 0.5)
    
    # Run a randomized search over the hyperparameters
    random_search = RandomizedSearchCV(estimator = clf_svc, 
                                       param_distributions = param_dist,
                                       scoring = scorer,
                                       cv = 2, 
                                       n_iter = 10, 
                                       n_jobs = -1)

    # Fit the model on the training data
    random_search.fit(X_train, y_train)
    
    # reflect Proba Conversion Rate
    y_pred = random_search.best_estimator_.predict_proba(X_test)[:,1]
    y_pred[y_pred > predict_proba_value] = 1
    y_pred[y_pred <= predict_proba_value] = 0
    y_pred = pd.Series(y_pred)   

    fbeta = fbeta_score(y_test, y_pred, beta=0.5)
    prediction = random_search.best_estimator_.predict_proba(pred_features)[:,1][0]
     
    return [fbeta, prediction]


def pred_rf(X_train, X_test, y_train, y_test, pred_features):
    clf_rf = RandomForestClassifier(random_state = 0)

    # Set up the hyperparameter search
    param_dist = {"max_depth": [3, None],
                  "n_estimators": list(range(10, 200)),
                  "max_features": list(range(1, X_test.shape[1]+1)),
                  "min_samples_split": list(range(2, 11)),
                  "min_samples_leaf": list(range(1, 11)),
                  "bootstrap": [True, False],
                  "criterion": ["gini", "entropy"]}
    
    scorer = make_scorer(fbeta_score, beta = 0.5)
    
    # Run a randomized search over the hyperparameters
    random_search = RandomizedSearchCV(estimator = clf_rf, 
                                       param_distributions = param_dist,
                                       scoring = scorer,
                                       cv = 2, 
                                       n_iter = 10, 
                                       n_jobs = -1)

    # Fit the model on the training data
    random_search.fit(X_train, y_train)
    
    # reflect Proba Conversion Rate
    y_pred = random_search.best_estimator_.predict_proba(X_test)[:,1]
    y_pred[y_pred > predict_proba_value] = 1
    y_pred[y_pred <= predict_proba_value] = 0
    y_pred = pd.Series(y_pred)   

    fbeta = fbeta_score(y_test, y_pred, beta=0.5)
    prediction = random_search.best_estimator_.predict_proba(pred_features)[:,1][0]
     
    return [fbeta, prediction]


def pred_ada(X_train, X_test, y_train, y_test, pred_features):
    clf_ada = AdaBoostClassifier(random_state = 0)

    # Set up the hyperparameter search
    # look at  setting up your search for n_estimators, learning_rate
    # http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html
    param_dist = {"n_estimators": [10, 100, 200, 400],
                  "learning_rate": [0.001, 0.005, .01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2, 10, 20]}
    
    scorer = make_scorer(fbeta_score, beta = 0.5)
    
    # Run a randomized search over the hyperparameters
    random_search = RandomizedSearchCV(estimator = clf_ada, 
                                       param_distributions = param_dist,
                                       scoring = scorer,
                                       cv = 2, 
                                       n_iter = 10, 
                                       n_jobs = -1)

    # Fit the model on the training data
    random_search.fit(X_train, y_train)
    
    # reflect Proba Conversion Rate
    y_pred = random_search.best_estimator_.predict_proba(X_test)[:,1]
    y_pred[y_pred > predict_proba_value] = 1
    y_pred[y_pred <= predict_proba_value] = 0
    y_pred = pd.Series(y_pred)       

    fbeta = fbeta_score(y_test, y_pred, beta=0.5)
    prediction = random_search.best_estimator_.predict_proba(pred_features)[:,1][0]
    
    return [fbeta, prediction]


def pred_grd(X_train, X_test, y_train, y_test, pred_features):
    clf_grd = GradientBoostingClassifier(random_state = 0)

    # Set up the hyperparameter search
    param_dist = {'learning_rate': [0.01, 0.02, 0.03],
                  'subsample'    : [0.9, 0.5, 0.2],
                  'n_estimators' : [100, 500, 1000], 
                  'max_depth'    : [4, 6, 8]}
    
    scorer = make_scorer(fbeta_score, beta = 0.5)

    # Run a randomized search over the hyperparameters
    random_search = RandomizedSearchCV(estimator = clf_grd, 
                                       param_distributions = param_dist,
                                       scoring = scorer,
                                       cv = 2, 
                                       n_iter = 10, 
                                       n_jobs = -1)

    # Fit the model on the training data
    random_search.fit(X_train, y_train)

    # reflect Proba Conversion Rate
    y_pred = random_search.best_estimator_.predict_proba(X_test)[:,1]
    y_pred[y_pred > predict_proba_value] = 1
    y_pred[y_pred <= predict_proba_value] = 0
    y_pred = pd.Series(y_pred)       

    fbeta = fbeta_score(y_test, y_pred, beta=0.5)
    prediction = random_search.best_estimator_.predict_proba(pred_features)[:,1][0]

    return [fbeta, prediction]


def pred_xgb(X_train, X_test, y_train, y_test, pred_features):
    clf_xg = xgb.XGBClassifier(random_state = 0)

    # Set up the hyperparameter search
    param_dist = {'learning_rate'   : [0.05, 0.10, 0.15, 0.20, 0.25, 0.30],
                  'max_depth'       : [3, 4, 5, 6, 8, 10, 12, 15], 
                  'min_child_weight': [1, 3, 5, 7],
                  'gamma'           : [0.0, 0.1, 0.2, 0.3, 0.4],
                  'colsample_bytree': [0.3, 0.4, 0.5, 0.7]}
    
    scorer = make_scorer(fbeta_score, beta = 0.5)

    # Run a randomized search over the hyperparameters
    random_search = RandomizedSearchCV(estimator = clf_xg, 
                                       param_distributions = param_dist,
                                       scoring = scorer,
                                       cv = 2, 
                                       n_iter = 10, 
                                       n_jobs = -1)

    # Fit the model on the training data
    random_search.fit(X_train, y_train)

    # reflect Proba Conversion Rate
    y_pred = random_search.best_estimator_.predict_proba(X_test)[:,1]
    y_pred[y_pred > predict_proba_value] = 1
    y_pred[y_pred <= predict_proba_value] = 0
    y_pred = pd.Series(y_pred)       

    fbeta = fbeta_score(y_test, y_pred, beta=0.5)
    prediction = random_search.best_estimator_.predict_proba(pred_features)[:,1][0]

    return [fbeta, prediction]


def pred_lr_opt(df):
    df = df.drop(columns = ['Date'])

    # features to predict
    pred_features = df[-1:].drop(columns = ['Stock_price'])

    # features to train &test
    df = df[:-1]         

    # outcome 
    outcomes = df.pop('Stock_price').values
    # filter out unwanted columns
    features = df.values

    X_train, X_test, y_train, y_test = train_test_split(features, outcomes, test_size=0.2, random_state = 0)

    mse_opt = {}
    for degree_value in range(1,4):    
        # find the poly degree having the lowest MSE
        poly_feat = PolynomialFeatures(degree = degree_value)
        X_train_poly = poly_feat.fit_transform(X_train)
        X_test_poly = poly_feat.transform(X_test)
        poly_model = LinearRegression(fit_intercept = False).fit(X_train_poly, y_train)
        y_pred = poly_model.predict(X_test_poly)
        mse = mean_squared_error(y_test, y_pred)
        mse_opt[degree_value] = mse

    mse = pd.DataFrame([mse_opt]).T
    mse.rename(columns = {0:'mse'}, inplace = True)
    mse = mse.reset_index()
    degree_opt = mse[mse['mse'] == mse.mse.min()]['index'].values[0]

    poly_feat = PolynomialFeatures(degree = degree_opt)
    X_train_poly = poly_feat.fit_transform(X_train)
    X_test_poly = poly_feat.transform(X_test)
    poly_model = LinearRegression(fit_intercept = False).fit(X_train_poly, y_train)
    y_pred = poly_model.predict(X_test_poly)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    pred_features_poly = poly_feat.fit_transform(pred_features)
    prediction = float(poly_model.predict(pred_features_poly)[0])
    return [int(features.shape[0]-1), degree_opt, r2, mse, prediction]


def pred_lr(df):
    opt_tail_value = {}
    for i in range(df.shape[0]-30, df.shape[0]):
        pred_lr_opt_result = pred_lr_opt(df.tail(i))
        opt_tail_value[i] = pred_lr_opt_result[3]
#         if pred_lr_opt_result[3] < pred_lr_opt_result[4]*0.03:
#             break

    tail_value = pd.DataFrame([opt_tail_value]).T
    tail_value.rename(columns = {0:'mse'}, inplace = True)
    tail_value = tail_value.reset_index()
    tail_value = tail_value[tail_value['mse'] == tail_value.mse.min()]['index'].values[0]
        
    lr_result = pred_lr_opt(df.tail(tail_value))
    print('\n\n#################################')
    print(f'• No. Observations: {lr_result[0]} records')
    print(f'• Most recent {tail_value} days')
    print(f'• Poly degree: {lr_result[1]}')
    print(f'• R squared: {lr_result[2]}')
    print(f'• MSE: {lr_result[3]}')
    print(f'• Prediction: {lr_result[4]}')    
    print('#################################')
        
    return lr_result[4]


def classifer_data_input(df, moving_avg_value):

    with open("moving_avg_value.txt", 'r') as f:
        moving_avg_value = [line.rstrip('\n') for line in f]
        moving_avg_value = int(moving_avg_value[0]) 

    # ML-Classifier
    # Get moving average
    mvp = moving_avg_value
    mavg = pd.DataFrame()
    for column in df.columns[1:]:
        mv_change = np.array(df[column])
        mv = []
        for i in range(len(mv_change)-mvp+1):
            mv.append(np.average(mv_change[i:mvp+i]))
            i+=1
        mavg[column] = pd.DataFrame(mv)
    df = mavg

    # features to predict
    pred_features = df[-1:]

    # get the outcome from the tomorrow price
    df['Tmr_price'] = df['Stock_price'].shift(-1)
    df['classifier_result'] = (df['Tmr_price'] > df['Stock_price']).astype(int)
    df.drop(columns = 'Tmr_price', inplace = True)

    # features to train &test
    df = df[:-1]

    # outcome 
    outcomes = df.pop('classifier_result').values
    # filter out unwanted columns
    features = df.values

    # for non-tf
    X_train, X_test, y_train, y_test = train_test_split(features, outcomes, test_size=0.2, random_state = 42, stratify = outcomes)
    sc = MinMaxScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)  
    non_tf_pred_features = sc.transform(pred_features)

    # for tf
    tf_X_train, tf_X_test, tf_y_train, tf_y_test = train_test_split(features, outcomes, test_size=0.2, random_state= 42, stratify = outcomes)

    tf_X_train = sc.transform(tf_X_train)
    tf_X_test = sc.transform(X_test[:int(len(X_test)/2)])
    tf_X_val = sc.transform(X_test[int(len(X_test)/2):])
    tf_y_test = y_test[:int(len(y_test)/2)]
    tf_y_val = y_test[int(len(y_test)/2):] 
    tf_pred_features = sc.transform(pred_features)

    print('\n')
    
    print('tf', end = '                               \r')
    tensorflow = pred_tensorflow(tf_X_train, tf_y_train, tf_X_val, tf_y_val, tf_X_test, tf_y_test, tf_pred_features)
    print(end = '                                     \r')    
    
    print('tf RNN', end = '                               \r')
    tensorflow_RNN = pred_tensorflow_RNN(tf_X_train, tf_y_train, tf_X_val, tf_y_val, tf_X_test, tf_y_test, tf_pred_features)
    print(end = '                                     \r')
    
    print('logi', end = '                               \r')
    logi = pred_logi(X_train, X_test, y_train, y_test, non_tf_pred_features)
    print(end = '                                     \r')
    
    print('svc', end = '                               \r')
    svc = pred_svc(X_train, X_test, y_train, y_test, non_tf_pred_features)
    print(end = '                                     \r')
    
    print('rf', end = '                               \r')
    rf = pred_rf(X_train, X_test, y_train, y_test, non_tf_pred_features)
    print(end = '                                     \r')
    
    print('ada', end = '                               \r')
    ada = pred_ada(X_train, X_test, y_train, y_test, non_tf_pred_features)
    print(end = '                                     \r')
    
    print('grd', end = '                               \r')
    grd = pred_grd(X_train, X_test, y_train, y_test, non_tf_pred_features)
    print(end = '                                     \r')    

    print('xgb', end = '                               \r')
    grd = pred_xgb(X_train, X_test, y_train, y_test, non_tf_pred_features)
    print(end = '                                     \r')        

    result = {}
    result['tensorflow'] = tensorflow[1]
    result['tensorflow_RNN'] = tensorflow_RNN[1]
    result['logi'] = logi[1]
    result['svc'] = svc[1]
    result['rf'] = rf[1]
    result['ada'] = ada[1]
    result['grd'] = grd[1]
    result['xgb'] = grd[1]

    fbeta = {}
    fbeta['tensorflow'] = tensorflow[0]
    fbeta['tensorflow_RNN'] = tensorflow_RNN[0]
    fbeta['logi'] = logi[0]
    fbeta['svc'] = svc[0]
    fbeta['rf'] = rf[0]
    fbeta['ada'] = ada[0]
    fbeta['grd'] = grd[0]    
    fbeta['xgb'] = grd[0]   

    rank = pd.DataFrame([fbeta, result]).T.rename(columns = {0:'fbeta' , 1:'result'})
    rank.sort_values('fbeta', ascending = False, inplace = True)
    print(f'\n\n• Moving Average Days: {mvp}')
    print('====================================\n', rank)
    print('====================================')
    
    print('\n========[Bullish Conditions]========')
    print(f'• Min. fbeta value:  {np.round(modelchoice_minimum_fbeta, 2)}')
    print(f'• Min. Result value: {np.round(predict_proba_value, 2)}')
    print('------------------------------------')
    print(f'• Average fbeta: {min_average_fbeta}')
    print(f'• Number of (+) Models: {min_num_positive_models}')
    print(f'• Pred. (+) Percent: {min_positive_percent}')    
    print('====================================')
    rank = rank[rank['fbeta'] >= modelchoice_minimum_fbeta]
    
    # Proba conversion
    rank_prediction = []
    for result_prediction in rank['result'].values:
        if result_prediction >= np.round(predict_proba_value, 2):
            rank_prediction.append(1)
        else:
            rank_prediction.append(0)
    rank['sign'] = rank_prediction
    
    print(rank)
    
    average_fbeta = np.round(rank.fbeta.mean(), 2)
    print('\n==============[Result]==============')
    print(f'• Average fbeta: {average_fbeta}')
    
    positive_modes = rank['sign'].sum()
    print(f'• Number of (+) Models: {positive_modes}')
    
    average_result = np.round(rank['sign'].mean(), 2)
    print(f'• Pred. (+) Percent: {average_result}')
    print('====================================')
    
    if average_result >= min_positive_percent and positive_modes >= min_num_positive_models and average_fbeta > min_average_fbeta:
        print("• Result: " + '\033[1m' + '(+)' + '\033[0m')
        print('====================================\n\n\n')
        return [average_fbeta, 1]
    else:
        print("• Result: " + '\033[1m' + '(-)' + '\033[0m')
        print('====================================\n\n\n')
        return [average_fbeta, 0]   
    

def opt_param_verified(): # decide parameters and moving average value
    global moving_avg_value
    
    df = pd.read_csv('para_mvg_tuned.csv')
    df.sort_values('loss', inplace = True)
    df.reset_index(drop = True, inplace = True)
    hidden_activation = df.hidden_activation[0]
    output_activation = df.output_activation[0]
    loss_compile = df.loss_compile[0]

#     # moving average value based on 'loss'
#     df.sort_values('loss', inplace = True)
#     df.reset_index(drop = True, inplace = True)
#     moving_avg_value = df[df['loss'] == df.head(1).loss.values[0]].sort_values('moving_avg').head(1).moving_avg.values[0]
#     with open("moving_avg_value.txt", 'w') as f:
#         f.write(str(moving_avg_value))
        
    # moving average value based on 'fbeta'   
    df.sort_values('fbeta', ascending = False, inplace = True)
    df.reset_index(drop = True, inplace = True)
    moving_avg_value = df[df['fbeta'] == df.head(1).fbeta.values[0]].sort_values('moving_avg').head(1).moving_avg.values[0]
    if moving_avg_value < 0:
        moving_avg_value = 1
        
    if moving_avg_value > moving_avg_value_max:
        moving_avg_value = moving_avg_value_max 
    if moving_avg_value < moving_avg_value_min:
        moving_avg_value = moving_avg_value_min 
    else:
        pass        
    with open("moving_avg_value.txt", 'w') as f:
        f.write(str(moving_avg_value))
            
    return [hidden_activation, output_activation, loss_compile, moving_avg_value]
            
 
def main_engine():    
    print('\u2022 TensorFlow version:', tf.__version__)
    print('\u2022 tf.keras version:', tf.keras.__version__)
    print('\u2022 Running on GPU' if tf.test.is_gpu_available() else '\t\u2022 GPU device not found. Running on CPU')

    print('\n\nAlgorithm Test with S&P500 ...')
    moving_avg_value = opt_param_verified()[3]

    from os.path import exists
    snp_source_exists = exists('predict_aim_sourcedata_monitoring_^GSPC_%s.csv' %currentdate)
    if snp_source_exists == True:
        df_source = pd.read_csv('predict_aim_sourcedata_monitoring_^GSPC_%s.csv' %currentdate)
    else:
        df_source = pd.read_csv('predict_aim_sourcedata_snp.csv')

    al_test_results = classifer_data_input(df_source, moving_avg_value)
    if al_test_results[0] < minimum_fbeta:
        print(f'\n\nExcept >>> The fbeta average is less than the minimum value:\n{al_test_results[0]} ==(Improve)==> {minimum_fbeta}')
        voice_message("""\
        The highest prediction fbeta is less than the mininum value. Code cell break """)
        # code cell break ###############
        class StopExecution(Exception):
            def _render_traceback_(self):
                pass

        raise StopExecution
        # code cell break ###############

    else:
        print('\n\nPrediction Result: Passed\n\n')
        voice_message("""\
        TensorFlow has been optimized""")
        pass

    param_result = opt_param_verified()
    print(f'• Hidden_activation:   {param_result[0]}')
    print(f'• Output_activation:   {param_result[1]}')
    print(f'• Loss_compile:        {param_result[2]}')
    print(f'• Moving Average Days: {param_result[3]}')

    
if __name__ == '__main__':
    minutepassed = timechecknow()
    if day_check() != 'Saturday' and day_check() != 'Sunday' and minutepassed < -20:
        main_engine()
    else:
        print('\n', '\033[1m' + 'The market is closed or underway.' + '\033[1m', '\n')


 [1mThe market is closed or underway.[1m 



___

*Execution Video*

`Settings`

https://www.youtube.com/watch?v=4pRMAbvSjow

`Realizing Gain`

https://www.youtube.com/watch?v=QapupAMCR7U

`Daily Blue-Chip Finding`

https://www.youtube.com/watch?v=kwU3WXKIdAQ

___