# Optimized vs Non-Optimized

Review charts of optimized vs non-optimized models.

## Generating Optimized Models...
As the Bayesian Hyperparameter Optimization can take some time, it should be run as a standalone process.
It should save out the optimized Hyperparameters for each Ticker to a file, and load the optimized Hyperparameters
as needed for training the model for a given Ticker.


In [9]:
from pricepredict import PricePredict
from datetime import datetime, timedelta

# Direcoty paths...
model_dir = '../models/'
chart_dir = '../charts/'
preds_dir = '../predictions/'

# Directory for gui files
gui_data = '../gui_data/'
# Save/Restore file for the all_df_symbols DataFrame
guiAllSymbolsCsv = f'{gui_data}gui_all_symbols.csv'
# Pickle files for the PP objects
dill_sym_dpps_d = f'{gui_data}sym_dpps.dil'
dill_sym_dpps_w = f'{gui_data}sym_dpps_w.dil'
dillbk_sym_dpps_d = f'{gui_data}sym_dpps.dil.bk'
dillbk_sym_dpps_w = f'{gui_data}sym_dpps_w.dil.bk'
# JSON file for the optimized hyperparameters
opt_hyperparams = f'{gui_data}ticker_bopts.json'

def tst_prediction_analysis(in_ticker, pp_obj=None):
        
        if pp_obj is None:
                # Create an instance of the price prediction object
                pp = PricePredict(model_dir='../models/', 
                                  chart_dir='../charts/', 
                                  preds_dir='../predictions/')
        else:
                pp = pp_obj        
        
        ticker = in_ticker
        test_ticker = "Test-" + ticker
        # Data download dates
        train_end_dt = datetime.now()
        train_start_dt = (train_end_dt - timedelta(days=365 * 4))
        train_end_date = train_end_dt.strftime("%Y-%m-%d")
        train_start_date = train_start_dt.strftime("%Y-%m-%d")
        
        pred_end_dt = datetime.now()
        pred_start_dt = (pred_end_dt - timedelta(days=30 * 3))
        pred_start_date = pred_start_dt.strftime("%Y-%m-%d")
        pred_end_date = pred_end_dt.strftime("%Y-%m-%d")
        
        pp.cache_training_data(ticker, train_start_date, train_end_date, PricePredict.PeriodDaily)
        pp.cache_prediction_data(ticker, pred_start_date, pred_end_date, PricePredict.PeriodDaily)
        pp.cached_train_predict_report(save_plot=False, show_plot=True)

        return pp

def do_bayes_opt(in_ticker, pp_obj=None, opt_csv=None,
                  only_fetch_opt_data=False, do_optimize=False,
                  cache_train=False, cache_predict=False, train_and_predict=False):
    if pp_obj is None:
        # Create an instance of the price prediction object
        pp = PricePredict(model_dir=model_dir,
                          chart_dir=chart_dir,
                          preds_dir=preds_dir)
    else:
        pp = pp_obj

    # Load data from Yahoo Finance
    ticker = in_ticker
    # Training Data (Training uses 20% of the latest data for validation)
    end_dt = datetime.now()
    start_dt = (end_dt - timedelta(days=365 * 4))
    end_date = end_dt.strftime("%Y-%m-%d")
    start_date = start_dt.strftime("%Y-%m-%d")
    # Prediction Data
    pred_end_dt = datetime.now()
    pred_start_dt = (pred_end_dt - timedelta(days=30 * 3))
    pred_end_date = pred_end_dt.strftime("%Y-%m-%d")
    pred_start_date = pred_start_dt.strftime("%Y-%m-%d")

    if only_fetch_opt_data:
        data, pp.features = pp.fetch_data_yahoo(ticker, start_date, end_date)

        # Augment the data with additional indicators/features
        if data is None:
            print(f"'Close' column not found in {ticker}'s data. Skipping...")
            return None

        return pp

    if do_optimize:
        aug_data, features, targets, dates_data = pp.augment_data(pp.orig_data, 0)

        # Scale the data so the model can use it more effectively
        scaled_data, scaler = pp.scale_data(aug_data)

        # Prepare the scaled data for model inputs
        X, y = pp.prep_model_inputs(scaled_data, pp.features)

        # Train the model
        model, y_pred, mse = pp.train_model(X, y)

        # Perform Bayesian optimization
        pp.bayesian_optimization(X, y, opt_csv=opt_csv)

    if cache_train:
        pp.cache_training_data(ticker, start_date, end_date, PricePredict.PeriodDaily)
    if cache_predict:
        pp.cache_prediction_data(ticker, pred_start_date, pred_end_date, PricePredict.PeriodDaily)
    if train_and_predict:
        # Training, will load last saved model which is the optimized model.
        pp.cached_train_predict_report(force_training=False, save_plot=False, show_plot=True)

    return pp
        

## Review Optimized vs Non-Optimized Models Predictions Visually

Generate a Prediction Analysis Chart for one Ticker and Optimize the Model's Hyperparameters


In [10]:

ticker = 'AAPL'

pp1 = tst_prediction_analysis(ticker)
pp2 = do_bayes_opt(ticker)
pp2 = tst_prediction_analysis(ticker, pp_obj=pp2)

print(f"Optimized Model Hyperparameters: {pp2.bayes_opt_hypers}")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Error: Predicting Price: Input 0 of layer "functional_19" is incompatible with the layer: expected shape=(None, 15, 13), found shape=(32, 15, 19)


TypeError: object of type 'NoneType' has no len()

## Generate a CSV file of the optimized hyperparameters for each Ticker

### * * * This codes does not run as it produces too much output * * *

In [None]:
import futureproof as fp
from tqdm.notebook import tqdm_notebook

tickers = [ 'AAPL', 'ABT', 'ACN', 'ADBE', 'ADM', 'ADP', 'AIG', 'ALKS', 'ALL', 'AMGN', 'AMX', 'AMZN', 'ANTM.JK', 'AON', 'APD', 'APTV', 'AVGO', 'AXON', 'AXP', 'BA', 'BAC', 'BAX', 'BKNG', 'BLK', 'BRK-B', 'CAT', 'CB', 'CCI', 'CI', 'CL', 'CLDX', 'CLS', 'CLSK', 'CMCSA', 'CNC', 'CRM', 'CRSP', 'CSCO', 'CSX', 'CVX', 'DHR', 'DIS', 'DUK', 'ECL', 'EGIO', 'EL', 'EMR', 'EOG', 'ERIC', 'EURUSD=X', 'EXAS', 'EXEL', 'FOLD', 'FXI', 'FXP', 'GBPUSD=X', 'GC=F', 'GD', 'GE', 'GILD', 'GLPG', 'GOOG', 'GOOGL', 'HAE', 'HD', 'HON', 'IART', 'IBM', 'IMAX', 'INTC', 'INTU', 'IRTC', 'ISRG', 'ITW', 'JAZZ', 'JD', 'JNJ', 'JPM', 'JPYUSD=X', 'KO', 'LIN', 'LIVN', 'LLY', 'LMT', 'LOW', 'LRCX', 'LSCC', 'MA', 'MASI', 'MCD', 'MDLZ', 'MDT', 'META', 'MMM', 'MO', 'MRK', 'MRVL', 'MSFT', 'MTCH', 'NEE', 'NFLX', 'NKE', 'NNDM', 'NOW', 'NVCR', 'NVDA', 'ORCL', 'PACB', 'PEP', 'PFE', 'PG', 'PLD', 'PNC', 'PSA', 'PTCT', 'PYPL', 'QCOM', 'RTX', 'SBUX', 'SCHW', 'SEDG', 'SO', 'SPGI', 'SYK', 'SYY', 'T', 'TMO', 'TRV', 'TSLA', 'TXN', 'UCTT', 'UNH', 'UPS', 'USB', 'V', 'VRTX', 'VZ', 'WM', 'WMT', 'WOLF', 'XAB=F', 'XAE=F', 'XAF=F', 'XAI=F', 'XAK=F', 'XAU=F', 'XNCR', 'XOM', 'ZM', '^DJI', '^GSPC', '^IXIC', '^N225', '^XAX' ]

executor = fp.ThreadPoolExecutor(max_workers=15)
ticker_pp = {}
with fp.TaskManager(executor) as tm:
        for ticker in tqdm_notebook(tickers, desc="Submitting Models", unit="Ticker"):
                # Sync: Pull in Training and Prediction Data for each Ticker
                print(f"Pulling Optimization data for {ticker}...")
                pp = tst_bayes_opt(ticker, only_fetch_opt_data=True)
                ticker_pp[ticker] = pp
        for ticker in tqdm_notebook(tickers, desc="Submitting Models", unit="Ticker"):
                # Async: Optimize the Model's Hyperparameters for each Ticker
                print(f"Optimizing model for {ticker}...")
                pp = ticker_pp[ticker]
                tm.submit(tst_bayes_opt, ticker, pp_obj=pp, do_optimize=True)        
        print("Waiting for tasks to complete...")
        with open(f"./ticker_bopts.json", 'w') as f:
                for future in tm.as_completed():
                        pp = future.result()
                        # Write out the optimized hyperparameters to a JSON file
                        f.write(f"{{ {pp.ticker}: {pp.bayes_opt_hypers} }}")
                        print(f"Completed Hyperparameters Optimization: {pp.ticker}")


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 605ms/step - loss: 0.0140
[1m30/57[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m13s[0m 485ms/step - loss: 0.0119| [39m6        [39m | [39m-0.01343 [39m | [39m0.01916  [39m | [39m0.2217   [39m | [39m149.5    [39m |
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 582ms/step - loss: 0.0260
[1m53/57[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m2s[0m 552ms/step - loss: 0.0077| [39m6        [39m | [39m-0.02527 [39m | [39m0.01916  [39m | [39m0.2217   [39m | [39m149.5    [39m |
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 579ms/step - loss: 0.0102
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 546ms/step - loss: 0.0075
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 561ms/step - loss: 0.0077
[1m50/57[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m3s[0m 505ms/step - loss: 0.0105| [35m6        [39m | [35m-0.007578[39m | [35m0.

IOStream.flush timed out


[1m 2/57[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m21s[0m 398ms/step - loss: 0.0993