<a href="https://colab.research.google.com/github/komo135/tradingrl/blob/master/neuro_evolution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Google ドライブをマウントするには、このセルを実行してください。
from google.colab import drive
drive.mount('/content/drive')

%cd drive/My Drive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive


In [2]:
!pip install ta



In [0]:
import numpy as np
import math
import pandas as pd
import pickle
from numba import jit as njit
from functools import lru_cache
import time
import random
import ta
from net import *
from memory import *
from reward import *

In [0]:
class neuralnetwork:
    def __init__(self, id_, hidden_size = 128*3, window_size = 100*2):
        self.W1 = np.random.randn(window_size, hidden_size) / np.sqrt(window_size)
        self.W2 = np.random.randn(hidden_size, 3) / np.sqrt(hidden_size)
        self.fitness = 0
        self.id = id_

@njit
def sigmoid(x):
  x = 1 / (1 + np.exp(-x))
  return x

def swish(x):
  x *= sigmoid(x)
  return x
 
def relu(X):
    return np.maximum(X, 0)
  
@njit
def softmax(X):
    e_x = np.exp(X - np.max(X, axis=-1, keepdims=True))
    e_x /= np.sum(e_x, axis=-1, keepdims=True)
    return e_x
  
@njit
def feed_forward(X, nets):
    X = X.flatten().reshape(1,-1)
    a1 = np.dot(X, nets.W1)
    z1 = swish(a1)
    a2 = np.dot(z1, nets.W2)
    return softmax(a2)

In [0]:
class NeuroEvolution:
    def __init__(self, population_size, mutation_rate, model_generator, state_size, window_size, path, step_size, spread=10, pip_cost=1000, los_cut=100):
        self.population_size = population_size
        self.mutation_rate = mutation_rate
        self.model_generator = model_generator
        self.state_size = state_size
        self.window_size = window_size
        self.path = path
        self.step_size = step_size
        self.spread = spread / pip_cost
        self.pip_cost = pip_cost
        self.los_cut = los_cut
        self.preproc()
        self.rewards = reward3
        
    def preproc(self):
          self.dat = df = pd.read_csv(self.path)
          s = np.asanyarray(ta.stoch(df["High"],df["Low"],df["Close"],14)).reshape((-1, 1)) - np.asanyarray(ta.stoch_signal(df["High"],df["Low"],df["Close"],14)).reshape((-1, 1))
          x = np.asanyarray(ta.daily_log_return(df["Close"])).reshape((-1,1))
          m = np.asanyarray(ta.macd_diff(df["Close"])).reshape((-1,1))
          x = np.concatenate([x,m], 1)
          y = np.asanyarray(self.dat[["Open"]])

          gen = tf.keras.preprocessing.sequence.TimeseriesGenerator(x, y, self.window_size)
          self.x = []
          self.y = []
          for i in gen:
              self.x.extend(i[0].tolist())
              self.y.extend(i[1].tolist())
          self.x = np.asanyarray(self.x)
          self.y = np.asanyarray(self.y)

          self.df = self.x[-self.step_size::]
          self.trend = self.y[-self.step_size::]

    def _initialize_population(self):
        self.population = []
        for i in range(self.population_size):
            self.population.append(self.model_generator(i,window_size = self.window_size*self.df.shape[-1]))
    
    def mutate(self, individual, scale=1.0):
        mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=individual.W1.shape)
        individual.W1 += np.random.normal(loc=0, scale=scale, size=individual.W1.shape) * mutation_mask
        mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=individual.W2.shape)
        individual.W2 += np.random.normal(loc=0, scale=scale, size=individual.W2.shape) * mutation_mask
        return individual
    
    def inherit_weights(self, parent, child):
        child.W1 = parent.W1.copy()
        child.W2 = parent.W2.copy()
        return child
    
    def crossover(self, parent1, parent2):
        child1 = self.model_generator((parent1.id+1)*10)
        child1 = self.inherit_weights(parent1, child1)
        child2 = self.model_generator((parent2.id+1)*10)
        child2 = self.inherit_weights(parent2, child2)
        # first W
        n_neurons = child1.W1.shape[1]
        cutoff = np.random.randint(0, n_neurons)
        child1.W1[:, cutoff:] = parent2.W1[:, cutoff:].copy()
        child2.W1[:, cutoff:] = parent1.W1[:, cutoff:].copy()
        # second W
        n_neurons = child1.W2.shape[1]
        cutoff = np.random.randint(0, n_neurons)
        child1.W2[:, cutoff:] = parent2.W2[:, cutoff:].copy()
        child2.W2[:, cutoff:] = parent1.W2[:, cutoff:].copy()
        return child1, child2
    
    def act(self, p, state):
        logits = feed_forward(state, p)
        return np.argmax(logits, 1)[0]
#         return np.argmax(logits[0])
    
    def test(self, individual, i):
        states = []
        pip = []
        history = []
        h_p = []
        provisional_pip = []
        total_pip = 0.0
        position = 3
        h = np.random.randint(self.x.shape[0]-(self.step_size+1))
        self.df = self.x[h:h+self.step_size]
        self.trend = self.y[h:h+self.step_size]
        for t in range(0, len(self.trend) - 1):
            action = self.act(individual, self.df[t])
            history.append(action)
            states,provisional_pip,position,total_pip = self.rewards(self.trend[t],pip,provisional_pip,action,position,states,self.pip_cost,self.spread,total_pip,lc=self.los_cut/2/1000)
#             print(len(provisional_pip))
            h_p.append(position)
        self.pip = np.asanyarray(provisional_pip) * self.pip_cost
        self.pip = [p if p >= -self.los_cut else -self.los_cut for p in self.pip]
        self.total_pip = np.sum(self.pip)
        mean_pip = self.total_pip / (t + 1)
        trade_accuracy = np.mean(np.asanyarray(self.pip) > 0)
        self.trade = trade_accuracy
        mean_pip *= 24
        prob = self.prob(history)
        position_prob = self.prob(h_p)
      
        print("")
        print('action probability = ', prob)
        print("buy = ", position_prob[1], " sell = ", position_prob[-1])
        print('trade accuracy = ', trade_accuracy)
        print('epoch: %d, total rewards: %f, mean rewards: %f' % (i+1, float(self.total_pip), float(mean_pip)))
    
    def calculate_fitness(self):
        for i in range(self.population_size):
          states = []
          pip = []
          provisional_pip = []
          total_pip = 0.0
          position = 3
#           h = np.random.randint(self.x.shape[0]-(self.step_size+1))
#           self.df = self.x[h:h+self.step_size]
#           self.trend = self.y[h:h+self.step_size]
          for t in range(0, len(self.trend) - 1):
              action = self.act(self.population[i], self.df[t])
              states,provisional_pip,position,total_pip = self.rewards(self.trend[t],pip,provisional_pip,action,position,states,self.pip_cost,self.spread,total_pip,lc=self.los_cut/2/1000)
          invest = total_pip * self.pip_cost
          self.population[i].fitness = invest
    
    def prob(self,history):
        prob = np.asanyarray(history)
        a = np.mean(prob == 0)
        b = np.mean(prob == 1)
        c = 1 - (a + b)
        prob = [a,b,c]
        return prob
    
    def evolve(self, generations=20, checkpoint= 1):
        self._initialize_population()
        n_winners = int(self.population_size * 0.4)
        n_parents = self.population_size - n_winners
        for epoch in range(generations):
            self.calculate_fitness()
            self.fitnesses = fitnesses = [i.fitness for i in self.population]
            self.sort_fitness = sort_fitness = np.argsort(fitnesses,None).reshape(-1,1)
            self.population = [self.population[int(i)] for i in sort_fitness]
            fittest_individual = self.population[0]
            if (epoch+1) % checkpoint == 0:
                self.test(fittest_individual, epoch)
                save = [fittest_individual.W1, fittest_individual.W2]
                f = open('neuro_w.txt', 'wb')
                pickle.dump(save, f)
            next_population = [self.population[i] for i in range(n_winners)]
            total_fitness = np.sum([np.abs(i.fitness) for i in self.population])
            parent_probabilities = [np.abs(i.fitness / total_fitness) for i in self.population]
            parent_probabilities = np.array(parent_probabilities).flatten()
            parents = np.random.choice(self.population, size=n_parents, p=parent_probabilities, replace=False)
            for i in np.arange(0, len(parents), 2):
                child1, child2 = self.crossover(parents[i], parents[i+1])
                next_population += [self.mutate(child1), self.mutate(child2)]
            self.population = next_population


In [0]:
population_size = 100
generations = 100
mutation_rate = 0.1
window_size = 5
step_size = 96
path = "audpred15.csv"

neural_evolve = NeuroEvolution(population_size, mutation_rate, neuralnetwork,
                              window_size, window_size, path, step_size)

In [0]:
fittest_nets = neural_evolve.evolve(50000)


action probability =  [0.3263157894736842, 0.37894736842105264, 0.2947368421052632]
buy =  0.43157894736842106  sell =  0.5684210526315789
trade accuracy =  0.38181818181818183
epoch: 1, total rewards: 9349.000000, mean rewards: 2361.852632

action probability =  [0.3157894736842105, 0.3684210526315789, 0.3157894736842106]
buy =  0.3473684210526316  sell =  0.6526315789473685
trade accuracy =  0.2807017543859649
epoch: 2, total rewards: -2026.000000, mean rewards: -511.831579

action probability =  [0.30526315789473685, 0.10526315789473684, 0.5894736842105264]
buy =  0.6842105263157895  sell =  0.3157894736842105
trade accuracy =  0.2982456140350877
epoch: 3, total rewards: -2300.000000, mean rewards: -581.052632

action probability =  [0.3894736842105263, 0.21052631578947367, 0.4]
buy =  0.7473684210526316  sell =  0.25263157894736843
trade accuracy =  0.3114754098360656
epoch: 4, total rewards: 1056.000000, mean rewards: 266.778947

action probability =  [0.4, 0.22105263157894736, 0

In [0]:
neural_evolve.pip