# Evolution Algorithm

In [0]:
# setup Google Colaboratory
!mkdir models
!mkdir data
!mkdir data/stock_prices
!mkdir evolution_model_graphs

## Import dependencies

In [0]:
import numpy as np
from sklearn.model_selection import train_test_split
from os import path

In [0]:
from build_dataset import build_training_dataset, get_stock_data

In [0]:
from models.dnn_regression import DenseNeuralNetwork

In [0]:
import json
from keras.callbacks import TensorBoard
from keras.backend import clear_session
from keras.utils import plot_model
from hashlib import sha256

## Get last run data

In [0]:
last_run = None
if path.isfile("last_run.json"):
    with open("last_run.json", "r") as last_run_file:
        last_run = json.load(last_run_file)

## Initialize models

In [0]:
population = []

In [0]:
POPULATION_SIZE = 10

In [0]:
input_options = {
    "config": [
        {"type": "lookback", "n": 22, "stock_code": "GOOGL", "column": "adjusted_close"},
        {"type": "moving_avg", "n": 5, "stock_code": "GOOGL", "column": "adjusted_close"},
        {"type": "moving_avg", "n": 10, "stock_code": "GOOGL", "column": "adjusted_close"},
        {"type": "moving_avg", "n": 30, "stock_code": "GOOGL", "column": "adjusted_close"},
        {"type": "moving_avg", "n": 90, "stock_code": "GOOGL", "column": "adjusted_close"},
        {"type": "moving_avg", "n": 180, "stock_code": "GOOGL", "column": "adjusted_close"},
        {"type": "moving_avg", "n": 365, "stock_code": "GOOGL", "column": "adjusted_close"}
    ],
    "stock_codes": ["GOOGL"],
    "stock_code": "GOOGL",
    "column": "adjusted_close"
}
print(sha256(json.dumps(input_options).encode()).hexdigest())

In [0]:
stock_code = "GOOGL"

In [0]:
if last_run is not None:
    population = [DenseNeuralNetwork(
        model["model_options"],
        model["input_options"],
        model["stock_code"],
        build_model=False
    ) for model in last_run["population"]]
else:
    init_model_options = DenseNeuralNetwork.random_model_options(POPULATION_SIZE, "dense")
    for i in range(POPULATION_SIZE):
        population.append(DenseNeuralNetwork(
            init_model_options[i],
            input_options,
            stock_code,
            build_model=False
        ))

## Get Stock Data

In [0]:
stock_data = get_stock_data(input_options["stock_codes"])

## Initialize errors

In [0]:
errors = []
error_history = []

In [0]:
if last_run is not None:
    errors = last_run["errors"]
    error_history = last_run["error_history"]
else:
    for model_idx, model in enumerate(population):
        print("Initial model {}".format(model_idx + 1))
        
        # reset session graph
        clear_session()
        
        # build the model
        model.build_model()
        
        # prepare the data
        x, y, other_data = build_training_dataset(model.input_options, model.model_options["predict_n"], stock_data=stock_data)
        # split the data into training set and testing set
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
        # train the model
        model.train(
            x_train,
            y_train,
            verbose=0,
            callbacks=[TensorBoard(log_dir="./evolution_tensorboard_logs/initial_{}".format(model_idx + 1))]
        )
        # calculate the model error
        error = model.model.evaluate(x_test, y_test, verbose=0)
        errors.append(error[-1])
    error_history = [errors]

errors = np.array(errors)
error_history = np.array(error_history)

## Evolution algorithm

In [0]:
last_iterations = last_run["last_iterations"] if last_run is not None else 0

In [0]:
ITERATIONS = 100

In [0]:
for i in range(last_iterations, last_iterations + ITERATIONS):
    print("Iteration {}".format(i + 1))
    
    # randomly choose 2 models
    model_idxs = np.random.choice(np.arange(POPULATION_SIZE), size=2, replace=False)
    
    better_model_idx = -1
    worse_model_idx = -1
    if errors[model_idxs[0]] <= errors[model_idxs[1]]:
        better_model_idx = model_idxs[0]
        worse_model_idx = model_idxs[1]
    else:
        better_model_idx = model_idxs[1]
        worse_model_idx = model_idxs[0]
    
    parent_model = population[better_model_idx]
    
    # kill and remove the worse model
    population.pop(worse_model_idx)
    errors = np.delete(errors, worse_model_idx)
    
    # reproduce the child model
    child_model_options, mutation = DenseNeuralNetwork.evolve_model_options(parent_model.model_options)
    print("Mutation: {}".format(mutation))
    child_model = DenseNeuralNetwork(
        child_model_options,
        input_options,
        stock_code,
        build_model=False
    )
    
    # reset session graph
    clear_session()

    # build the model
    child_model.build_model()
    
    # train the child model
    # prepare the data
    x, y, other_data = build_training_dataset(child_model.input_options, child_model.model_options["predict_n"], stock_data=stock_data)
    # split the data into training set and testing set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
    # train the child model
    child_model.train(
        x_train,
        y_train,
        verbose=0,
        callbacks=[TensorBoard(log_dir="./evolution_tensorboard_logs/{}".format(i + 1))]
    )
    # calculate the child model error
    child_error = child_model.model.evaluate(x_test, y_test, verbose=0)
    
    # save the child model if it is the best model
    if child_error[-1] < np.min(errors):
        child_model.save_model("./model.h5", DenseNeuralNetwork.KERAS_MODEL)
    
    # append the child model and its error
    population.append(child_model)
    errors = np.append(errors, child_error[-1])
    error_history = np.append(error_history, [errors], axis=0)
    
    # plot the child model graph
    plot_model(child_model.model, to_file="evolution_model_graphs/{}.png".format(i + 1))

In [0]:
# get the best model
best_model_idx = np.argmin(errors)
best_model = population[best_model_idx]

## Write this run data

In [0]:
with open("last_run.json", "w") as last_run_file:
    json.dump({
        "last_iterations": last_iterations + ITERATIONS,
        "population": [{
            "model_options": model.model_options,
            "input_options": model.input_options,
            "stock_code": model.stock_code
        } for model in population],
        "errors": errors.tolist(),
        "error_history": error_history.tolist()
    }, last_run_file, indent=4)

In [0]:
!zip -r evolution_tensorboard_logs.zip evolution_tensorboard_logs

In [0]:
!zip -r evolution_model_graphs.zip evolution_model_graphs

## Plot evolution data

In [0]:
import matplotlib.pyplot as plt

In [0]:
# plot evolution error history
plt.scatter(
    np.array([[i for _ in range(POPULATION_SIZE)] for i in range(error_history.shape[0])]).flatten(),
    error_history.flatten()
)
plt.title("Population Mean Squared Error")
plt.ylabel("Mean Squared Error")
plt.xlabel("Iteration")

In [0]:
plt.scatter(
    np.array([[i for _ in range(POPULATION_SIZE)] for i in range(error_history.shape[0])]).flatten(),
    np.sqrt(error_history).flatten()
)
plt.title("Population Root Mean Squared Error")
plt.ylabel("Root Mean Squared Error")
plt.xlabel("Iteration")

In [0]:
plt.scatter(
    np.arange(error_history.shape[0]),
    np.min(error_history, axis=1)
)
plt.title("Population Minimum Mean Squared Error")
plt.ylabel("Mean Squared Error")
plt.xlabel("Iteration")

In [0]:
plt.scatter(
    np.arange(error_history.shape[0]),
    np.min(np.sqrt(error_history), axis=1)
)
plt.title("Minimum Root Mean Squared Error")
plt.ylabel("Root Mean Squared Error")
plt.xlabel("Iteration")