### ------------------------------------------------------------------------------------------------------------

In [None]:
import os

if 'COLAB_GPU' in os.environ:
    !git clone https://github.com/impulsecorp/PickStocks.git
    !mv PickStocks/*.py .
    !mv PickStocks/data .
    !pip install -U -qq -r PickStocks/requirements.txt

In [None]:
import warnings
warnings.filterwarnings("ignore")
import system
from system import *
# small hack to prevent Colab error
try:
    from datablock import *
except:
    from datablock import *
from deap import base, creator, tools, algorithms
seed

In [None]:
%pylab inline

### Setup

In [None]:
data_timeperiod = 'D'
data = get_data('SPY', period=data_timeperiod, nrows=None)
data = procdata_lite(data)

In [None]:
# for inspectiion
print(data.shape)
data.head()

In [None]:
system.train_set_end = 0.5 # percentage point specifying the training set end point (1.0 means all data is training set)
system.val_set_end = 0.75    # percentage point specifying the validation set end point (1.0 means no test set)
system.balance_data = 1

### ------------------------------------------------------------------------------------------------------------

### LogisticRegression base classifier

In [None]:
# Train LogisticRegression classifier on train data
clf, scaler = train_classifier(LogisticRegression, data)

In [None]:
# Test on val data
equity, pf, base_trades = qbacktest(clf, scaler, data)

In [None]:
base_trades.head()

### ------------------------------------------------------------------------------------------------------------

### Obtain all individual startegies (genes) to combine and their stats

In [None]:
min_pf = 0.1
min_trades = 10
max_trades = 10000

nbins = 20 # number of bins for each feature (see FeatureMatrix for reference)
individual_size = 20 # number of strategies to combine for each genome

In [None]:
feature_names = [featdeformat(x) for x in data.filter(like='X')]
feature_ranges = []
for fn in feature_names:
    d = data[featformat(fn)].values
    feature_ranges.append((np.min(d), np.max(d)))
num_bins = nbins + 1
feat_bins = []
for fmin, fmax in feature_ranges:
    feat_bins.append(np.linspace(fmin, fmax, num_bins))
feat_bins = np.array(feat_bins)
pf_matrix = []
nt_matrix = []
wn_matrix = []
coords = []
for row_idx, (fname, bins) in enumerate(zip(tqdm(feature_names), feat_bins)):
    pfs = []
    nts = []
    wns = []
    for col_idx in range(1,len(bins)):
        if bins[col_idx-1] > bins[col_idx]:
            bs = bins[col_idx], bins[col_idx-1]
        else:
            bs = bins[col_idx-1], bins[col_idx]
        pf, ntrades = compute_stats(data, filter_trades_by_feature(base_trades, data, featformat(fname), min_value=bs[0], max_value=bs[1]))
        if (pf != -1) and (len(ntrades) > 0):
            pf_matrix.append(pf)
            nt_matrix.append(len(ntrades))
            wn_matrix.append(get_winner_pct(ntrades))
            coords.append((row_idx, col_idx))
zpd = sorted(list(zip(pf_matrix, nt_matrix, wn_matrix, coords)), key = lambda x: x[2], reverse=True)
top_pfs = []
top_nts = []
top_wns = []
all_coords = []
for pf, nt, wn, coords in zpd:
    if (nt >= min_trades) and (nt <= max_trades) and (pf >= min_pf):
        top_pfs.append(pf)
        top_nts.append(nt)
        top_wns.append(wn)
        all_coords.append( coords )
pd.DataFrame(data=list(zip(top_pfs, top_nts, top_wns)), columns=['PF', 'Trades', ' % Winners'])

### Evolutionary algorithm setup

In [None]:
def evaluate(coord_list):
    alltrades = []
    for i in range(len(coord_list)):
        try:
            r,c = coord_list[i]
            _, mtrades = compute_stats(data,
                                       filter_trades_by_feature(base_trades, data,
                                                                featformat(feature_names[r]),
                                                                min_value=feat_bins[r][c-1],
                                                                max_value=feat_bins[r][c]))
            alltrades.append(mtrades)
        except Exception as ex:
            print(ex)
            print(i)
            print(coord_list)
    alltrades = pd.concat(alltrades, axis=0).drop_duplicates().sort_index()
    return float(get_winner_pct(alltrades)),

In [None]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
def create_individual():
    return rnd.sample(all_coords, individual_size)
def create_population(n):
    return [creator.Individual(create_individual()) for _ in range(n)]
toolbox = base.Toolbox()
# Register individual and population creation functions
toolbox.register("individual", create_individual)
toolbox.register("population", create_population)
# Register genetic operators
toolbox.register("mate", tools.cxTwoPoint)
def custom_mutation(individual):
    for i in range(len(individual)):
        if rnd.random() < mutation_prob_gene:
            individual[i] = rnd.choice(all_coords)
    return individual,
toolbox.register("mutate", custom_mutation)
toolbox.register("select", tools.selBest)
# Register the fitness function
toolbox.register("evaluate", lambda x: evaluate(x))

### Evolutionary algorithm

In [None]:
# Parameters
pop_size = 100
num_generations = 50
crossover_prob = 0.8
mutation_prob = 0.2
mutation_prob_gene = 0.2

In [None]:
# Create initial population
pop = toolbox.population(n=pop_size)
# Evaluate the initial population
fitnesses = list(map(toolbox.evaluate, pop))
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit
# Set up the statistics and logbook
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)
logbook = tools.Logbook()
logbook.header = "gen", "evals", "std", "min", "avg", "max"
# Record initial population statistics
record = stats.compile(pop)
logbook.record(gen=0, evals=len(pop), **record)
print(logbook.stream)
# Run the genetic algorithm
for gen in range(1, num_generations + 1):
    offspring = toolbox.select(pop, len(pop))
    offspring = list(offspring)
    # Apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if rnd.random() < crossover_prob:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values
    for mutant in offspring:
        if rnd.random() < mutation_prob:
            toolbox.mutate(mutant)
            del mutant.fitness.values
    # Evaluate offspring
    fitnesses = list(map(toolbox.evaluate, offspring))
    for ind, fit in zip(offspring, fitnesses):
        ind.fitness.values = fit
    # Replace the old population with the offspring
    pop[:] = offspring
    # Update the statistics and logbook
    record = stats.compile(pop)
    logbook.record(gen=gen, evals=len(pop), **record)
    print(logbook.stream)
# Get the best individual from the final population
best_ind = tools.selBest(pop, 1)[0]
print("\nBest individual: {}\nBest score: {}".format(best_ind, best_ind.fitness.values[0]))

In [None]:
best_coords = best_ind

### Test the best individual on val data

In [None]:
alltrades = []
for i in range(len(best_coords)):
    r,c = best_coords[i]
    _, mtrades = compute_stats(data,
                               filter_trades_by_feature(base_trades, data,
                                                        featformat(feature_names[r]),
                                                        min_value=feat_bins[r][c-1],
                                                        max_value=feat_bins[r][c]))
    alltrades.append(mtrades)
alltrades = pd.concat(alltrades, axis=0).drop_duplicates().sort_index()
plt.plot(alltrades['profit'].cumsum())
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

### Test the best individual on unseen data

In [None]:
# Base test without the filter
equity, _, test_trades = qbacktest(clf, scaler, data, skip_val=1, skip_test=0, quiet=1)

In [None]:
# Test with the filter
alltrades = []
for r,c in best_coords:
    _, mtrades = compute_stats(data, filter_trades_by_feature(test_trades, data, featformat(feature_names[r]), min_value=feat_bins[r,c-1], max_value=feat_bins[r,c]))
    alltrades.append(mtrades)
alltrades = pd.concat(alltrades, axis=0).drop_duplicates().sort_index()
plt.plot(alltrades['profit'].cumsum())
print(f'Profit factor: {get_profit_factor(alltrades):.5f}, Winners: {get_winner_pct(alltrades):.2f}%, Trades: {len(alltrades)}')

In [None]:
alltrades[0:20]

### ------------------------------------------------------------------------------------------------------------