### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

import numpy as np

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
from deap import base, creator, tools, algorithms
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = 'D'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

### ------------------------------------------------------------------------------------------------------------

In [None]:
#### This version of the notebook evolves more complex genomes that use thresholds instead of bins

### Parameters

In [None]:
system.train_set_end = 0.5 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 0.75    # percentage point specifying the validation set end point (1.0 means no test set)
system.balance_data = 1

system.multiclass = 0
system.multiclass_move_threshold = 1.0

combine_method = 'and' # whether to combine the strategies with 'and' or 'or'
multi_objective = 1 # whether to use multiple objectives (NSGA-2) or not

individual_size = 5 # number of strategies to combine for each genome
eval_min_trades = 100 # minumum number of trades to be eligible for reproduction

In [None]:
# EA Parameters
pop_size = 128
num_generations = 200
crossover_prob = 0.8

mutation_prob = 0.2
mutation_prob_gene = 0.2
mutation_prob_gene_hard = 0.05
mutation_prob_gene_c = 0.5
mutation_prob_gene_d = 0.25
mutation_prob_gene_a = 0.1
mutation_power = 0.33

avg_profit_fitness_weight = 1.0
winners_fitness_weight = 1.0
pf_fitness_weight = 1.0

### ------------------------------------------------------------------------------------------------------------

### LGBMClassifier base classifier

In [None]:
# Train LogisticRegression classifier on train data
clf, scaler = train_classifier(LGBMClassifier, data)

In [None]:
# Test on val data
equity, pf, base_trades = qbacktest(clf, scaler, data)

In [None]:
base_trades.head()

### ------------------------------------------------------------------------------------------------------------

### Obtain all individual startegies (genes) to combine and their stats

In [None]:
min_pf = 0.1
min_trades = 10
max_trades = 10000

In [None]:
feature_names, centers = compute_centers(data)
list(zip(feature_names, centers))

### Evolutionary algorithm setup

In [None]:
worst_possible_fitness = -999999.0

In [None]:
def evaluate(genome):
    alltrades = []
    for i in range(len(genome)):
        try:
            r,c,d,a = genome[i]
            if d == 'above':
                _, mtrades = compute_stats(data,
                                           filter_trades_by_feature(base_trades, data,
                                                                    featformat(feature_names[r]),
                                                                    min_value=c,
                                                                    use_abs=a))
            elif d == 'below':
                _, mtrades = compute_stats(data,
                                           filter_trades_by_feature(base_trades, data,
                                                                    featformat(feature_names[r]),
                                                                    max_value=c,
                                                                    use_abs=a))
            else:
                _, mtrades = compute_stats(data,
                                           filter_trades_by_feature(base_trades, data,
                                                                    featformat(feature_names[r]),
                                                                    exact_value=c,
                                                                    use_abs=a))
            alltrades.append(mtrades)
        except Exception as ex:
            print(ex)
            print(i)
            print(genome)
    alltrades = combined_trades(alltrades, combine_method=combine_method)
    if len(alltrades) >= eval_min_trades:
        if multi_objective:
            return float(np.mean(alltrades['profit'].values)), float(get_winner_pct(alltrades)), float(get_profit_factor(alltrades)),
        else:
            return float(np.mean(alltrades['profit'].values)),
    else:
        if multi_objective:
            return worst_possible_fitness, worst_possible_fitness, worst_possible_fitness
        else:
            return worst_possible_fitness,

In [None]:
def rand_gene():
    # make new gene
    r = rnd.randint(0, len(feature_names)-1)
    c = rnd.uniform(centers[r][0], centers[r][1])
    d = rnd.choice(['above','below','exact'])
    a = rnd.choice([True, False])

    return r,c,d,a

def create_individual():
    ind = []
    for i in range(individual_size):
        ind.append(rand_gene())
    return ind

def create_population(n):
    return [creator.Individual(create_individual()) for _ in range(n)]

def custom_mutation(individual):
    for i in range(len(individual)):
        if rnd.random() < mutation_prob_gene:
            if rnd.random() < mutation_prob_gene_hard:
                individual[i] = rand_gene()
            else:
                gene_r, gene_c, gene_d, gene_a = individual[i]

                mag = (centers[gene_r][1]-centers[gene_r][0])*mutation_power
                if rnd.random() < mutation_prob_gene_c:
                    gene_c += rnd.uniform(-mag, mag)
                if rnd.random() < mutation_prob_gene_d:
                    gene_d = rnd.choice(['above','below','exact'])
                if rnd.random() < mutation_prob_gene_a:
                    gene_a = rnd.choice([True, False])
                individual[i] = (gene_r, gene_c, gene_d, gene_a)
    return individual,

creator.create("FitnessMax", base.Fitness, weights=(avg_profit_fitness_weight, winners_fitness_weight, pf_fitness_weight)
                                                   if multi_objective else (avg_profit_fitness_weight,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
# Register individual and population creation functions
toolbox.register("individual", create_individual)
toolbox.register("population", create_population)
# Register genetic operators
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", custom_mutation)
toolbox.register("select", tools.selNSGA2 if multi_objective else tools.selBest)
# Register the fitness function
toolbox.register("evaluate", lambda x: evaluate(x))

### ------------------------------------------------------------------------------------------------------------

### Evolutionary algorithm

In [None]:
# Create initial population
pop = toolbox.population(n=pop_size)
# Evaluate the initial population
fitnesses = list(map(toolbox.evaluate, pop))
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit
# Set up the statistics and logbook
stats = tools.Statistics(lambda ind: np.sum(ind.fitness.values))
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)
logbook = tools.Logbook()
logbook.header = "gen", "evals", "std", "min", "avg", "max"
# Record initial population statistics
record = stats.compile(pop)
logbook.record(gen=0, evals=len(pop), **record)
print(logbook.stream)
# Run the genetic algorithm
best_ever = worst_possible_fitness
cbest = None
try:
    for gen in range(1, num_generations + 1):

        offspring = toolbox.select(pop, len(pop))
        offspring = list(offspring)
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if rnd.random() < crossover_prob:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        for mutant in offspring:
            if rnd.random() < mutation_prob:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        # Evaluate offspring
        fitnesses = list(map(toolbox.evaluate, offspring))
        for ind, fit in zip(offspring, fitnesses):
            ind.fitness.values = fit
        # keep the best ever found
        ctop = tools.selBest(pop, 1)[0]
        if np.sum(ctop.fitness.values) > best_ever:
            print('NEW RECORD:', np.sum(ctop.fitness.values))
            cbest = deepcopy(ctop)
            best_ever = np.sum(ctop.fitness.values)
        # Replace the old population with the offspring and the best individuals
        pop[:] = offspring
        # Update the statistics and logbook
        record = stats.compile(pop)
        logbook.record(gen=gen, evals=len(pop), **record)
        print(logbook.stream)
except KeyboardInterrupt:
    print('Interrupted.')

# the best individual found
best_ind = cbest
print("\nBest individual: {}\nBest score: {}".format(best_ind, np.sum(best_ind.fitness.values)))

In [None]:
# Description of the best strategy (feature name [ range where it can trade ])
for i in range(len(best_ind)):
    r,c,d,a = best_ind[i]
    print(feature_names[r], d, f'{c:.5f} abs: {a}')

In [None]:
best_genome = best_ind

### ------------------------------------------------------------------------------------------------------------

### Test the best individual on val data

In [None]:
alltrades = []
for i in range(len(best_genome)):
    try:
        r,c,d,a = best_genome[i]
        if d == 'above':
            _, mtrades = compute_stats(data,
                                       filter_trades_by_feature(base_trades, data,
                                                                featformat(feature_names[r]),
                                                                min_value=c,
                                                                use_abs=a))
        elif d == 'below':
            _, mtrades = compute_stats(data,
                                       filter_trades_by_feature(base_trades, data,
                                                                featformat(feature_names[r]),
                                                                max_value=c,
                                                                use_abs=a))
        else:
            _, mtrades = compute_stats(data,
                                       filter_trades_by_feature(base_trades, data,
                                                                featformat(feature_names[r]),
                                                                exact_value=c,
                                                                use_abs=a))
        alltrades.append(mtrades)
    except Exception as ex:
        print(ex)
        print(i)
        print(best_genome)
alltrades = combined_trades(alltrades, combine_method=combine_method)
plt.plot(alltrades['profit'].cumsum())
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

### Test the best individual on unseen data

In [None]:
# Base test without the filter
equity, _, test_trades = qbacktest(clf, scaler, data, skip_val=1, skip_test=0, quiet=1)

In [None]:
# Test with the filter
alltrades = []
for i in range(len(best_genome)):
    try:
        r,c,d,a = best_genome[i]
        if d == 'above':
            _, mtrades = compute_stats(data,
                                       filter_trades_by_feature(test_trades, data,
                                                                featformat(feature_names[r]),
                                                                min_value=c,
                                                                use_abs=a))
        elif d == 'below':
            _, mtrades = compute_stats(data,
                                       filter_trades_by_feature(test_trades, data,
                                                                featformat(feature_names[r]),
                                                                max_value=c,
                                                                use_abs=a))
        else:
            _, mtrades = compute_stats(data,
                                       filter_trades_by_feature(test_trades, data,
                                                                featformat(feature_names[r]),
                                                                exact_value=c,
                                                                use_abs=a))
        alltrades.append(mtrades)
    except Exception as ex:
        print(ex)
        print(i)
        print(best_genome)
alltrades = combined_trades(alltrades, combine_method=combine_method)
plt.plot(alltrades['profit'].cumsum())
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

In [None]:
alltrades[0:20]

### ------------------------------------------------------------------------------------------------------------