### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

import numpy as np

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
from deap import base, creator, tools, algorithms
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = 'D'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

### ------------------------------------------------------------------------------------------------------------

In [None]:
#### This version of the notebook evolves more complex genomes that use thresholds instead of bins

### Parameters

In [None]:
system.train_set_end = 0.5 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 0.75    # percentage point specifying the validation set end point (1.0 means no test set)
system.balance_data = 1
system.scale_data = 1

system.multiclass = 0
system.multiclass_move_threshold = 1.0

combine_method = 'and' # whether to combine the strategies with 'and' or 'or'

individual_size = 5 # number of strategies to combine for each genome
eval_min_trades = 100 # minumum number of trades to be eligible for reproduction

In [None]:
# EA Parameters
pop_size = 128
num_generations = 50
survival_rate = 0.2
crossover_prob = 0.5
mutation_prob = 0.5
mutation_prob_gene = 0.1
mutation_prob_gene_hard = 0.1
mutation_prob_gene_c = 0.5
mutation_prob_gene_d = 0.25
mutation_prob_gene_a = 0.0
mutation_power = 0.2

In [None]:
def objective_1(a): # mean profit
    return float(np.mean(a['profit'].values))
def objective_2(a): # winner %
    return float(get_winner_pct(a))
def objective_3(a): # PF
    return float(get_profit_factor(a))
def objective_4(a): # num trades
    return float(len(a))

from empyrical import sortino_ratio, omega_ratio, sharpe_ratio, calmar_ratio, stability_of_timeseries
def obj_sortino(a):
    returns = a['profit'].values
    return float(sortino_ratio(returns))

# Combine one or more objectives here for the fitness function and set their relative weights
objectives = [

      (obj_sortino, 1.0),

    # (objective_1, 0.2),
    # (objective_2, 0.5),
    # (objective_3, 1.0),

              ]

### ------------------------------------------------------------------------------------------------------------

### Base classifier

In [None]:
# Train LogisticRegression classifier on train data
clf, scaler = train_classifier(LogisticRegression, data)

In [None]:
# Test on val data
equity, pf, base_trades = qbacktest(clf, scaler, data)

In [None]:
base_trades.head()

### ------------------------------------------------------------------------------------------------------------

### Obtain all individual startegies (genes) to combine and their stats

In [None]:
feature_names, ranges = compute_ranges(data)
list(zip(feature_names, ranges))

### Evolutionary algorithm setup

In [None]:
worst_possible_fitness = -999999.0

In [None]:
def evaluate(genome):
    return fitness_function(get_genome_alltrades_nonbinned(data, genome, base_trades, feature_names, combine_method=combine_method),
                            objectives=objectives,
                            eval_min_trades=eval_min_trades, worst_possible_fitness=worst_possible_fitness)

def eval_wrapper(ind_list):
    return evaluate(ind_list)

In [None]:
def rand_gene():
    # make new gene
    r = rnd.randint(0, len(feature_names)-1)
    c = rnd.uniform(ranges[r][0], ranges[r][1])
    d = rnd.choice(['above','below','exact'])
    a = False #rnd.choice([True, False])

    return r,c,d,a

def create_individual():
    ind = []
    for i in range(individual_size):
        ind.append(rand_gene())
    return ind

def create_population(n):
    return [creator.Individual(create_individual()) for _ in range(n)]

def custom_mutation(individual):
    for i in range(len(individual)):
        if rnd.random() < mutation_prob_gene:
            if rnd.random() < mutation_prob_gene_hard:
                individual[i] = rand_gene()
            else:
                gene_r, gene_c, gene_d, gene_a = individual[i]

                mag = np.abs(ranges[gene_r][1]-ranges[gene_r][0])*mutation_power
                if rnd.random() < mutation_prob_gene_c:
                    gene_c += rnd.uniform(-mag, mag)
                if rnd.random() < mutation_prob_gene_d:
                    ch = rnd.choice(['above','below','exact'])
                    while ch == gene_d:
                        ch = rnd.choice(['above','below','exact'])
                    gene_d = ch
                if rnd.random() < mutation_prob_gene_a:
                    gene_a = not gene_a
                individual[i] = (gene_r, gene_c, gene_d, gene_a)
    return individual,

In [None]:
creator.create("FitnessMax", base.Fitness, weights=list([x[1] for x in objectives]))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
# Register individual and population creation functions
toolbox.register("individual", create_individual)
toolbox.register("population", create_population)
# Register genetic operators
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", custom_mutation)
toolbox.register("select", tools.selSPEA2)
# Register the fitness function
toolbox.register("evaluate", eval_wrapper)

### ------------------------------------------------------------------------------------------------------------

### Evolutionary algorithm

In [None]:
best_genome, best_score = run_evolution(pop_size, toolbox, num_generations, survival_rate, crossover_prob, mutation_prob, objectives, worst_possible_fitness)

In [None]:
# Description of the best strategy (feature name : [ condition to trade ])
for i in range(len(best_genome)):
    r,c,d,a = best_genome[i]
    print(feature_names[r], ':', d, f'{c:.5f} {"abs" if a else ""}')

### ------------------------------------------------------------------------------------------------------------

### Test the best individual on val data

In [None]:
alltrades = get_genome_alltrades_nonbinned(data, best_genome, base_trades, feature_names, combine_method=combine_method)
plt.plot(alltrades['profit'].cumsum())
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

### Test the best individual on unseen data

In [None]:
# Base test without the filter
*_, test_trades = qbacktest(clf, scaler, data, skip_val=1, skip_test=0, quiet=1)

In [None]:
# Test with the filter
alltrades = get_genome_alltrades_nonbinned(data, best_genome, test_trades, feature_names, combine_method=combine_method)
plt.plot(alltrades['profit'].cumsum())
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

In [None]:
alltrades[0:20]

### ------------------------------------------------------------------------------------------------------------