In [1]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import pandas as pd

In [2]:
config = {
        'training':{
            'steps': 2000,
            'batch_size': 16,
            'buffer_bias': 5e-5
        },
        'coin_no': 11, 
        'window_size': 50, 
        'feature_no': 3,
        "test_portion": 0.08,
        "global_period": 1800,
        "trading_consumption": 0.0025
        }


In [3]:
# input size is 11x50x3
# Remember: Channels last

class CNN(tf.keras.Model):
    
    def __init__(self, rows = 11, cols = 50, features = 3, batch_size=None):
        super(CNN, self).__init__()
        
        self.tensor_shape = (rows, cols, features)
        self.batch_size = batch_size
        
        self.conv1 = tf.keras.layers.Conv2D(
                            filters = 2, 
                            kernel_size = (1,3), 
                            padding='valid', 
                            activation='relu',
                            name = 'conv1'
                        )    
        
        self.conv2 =  keras.layers.Conv2D(
                            filters = 20, 
                            kernel_size = (1, cols-2), 
                            activation="relu", 
                            name = 'conv2'
                        )
        self.votes = keras.layers.Conv2D(1, (1,1), name = 'votes')
        self.b = tf.Variable(tf.zeros((1, 1), dtype=tf.float32), trainable=True)
        self.softmax = tf.keras.layers.Activation('softmax')

    def call(self, inputs):
        x = self.conv1(inputs[0])
        x = self.conv2(x)
        x = tf.concat((x, inputs[1]), axis=3)
        #x = keras.layers.Concatenate(axis=3)([x, inputs[1]])
        x = self.votes(x)
        x = tf.squeeze(x)
        cash_bias = tf.tile(self.b, [tf.shape(x)[0], 1])
        x = tf.concat((cash_bias, x), axis = -1)
        x = self.softmax(x)
        return x


model = CNN()

X = np.random.randn(100, 11,50,3)
w = np.random.randn(100, 11, 1, 1)
with tf.GradientTape() as tape:
    y = model([X,w])

print([var.name for var in tape.watched_variables()])
grads = tape.gradient(y, model.trainable_variables)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

['Variable:0', 'cnn/conv1/kernel:0', 'cnn/conv1/bias:0', 'cnn/conv2/kernel:0', 'cnn/conv2/bias:0', 'cnn/votes/kernel:0', 'cnn/votes/bias:0']


In [4]:
from __future__ import division,absolute_import,print_function
import logging

class ReplayBuffer:
    def __init__(self, start_index, end_index, batch_size, coin_no, sample_bias=1.0):
        """
        :param start_index: start index of the training set on the global data matrices
        :param end_index: end index of the training set on the global data matrices
        """
        self.__coin_no = coin_no
        self.__experiences = [Experience(i) for i in range(start_index, end_index)]
        # NOTE: in order to achieve the previous w feature
        self.__batch_size = batch_size
        self.__sample_bias = sample_bias
        logging.debug("buffer_bias is %f" % sample_bias)

    def append_experience(self, state_index):
        self.__experiences.append(Experience(state_index))
        logging.debug("a new experience, indexed by %d, was appended" % state_index)

    def __sample(self, start, end, bias):
        """
        @:param end: is excluded
        @:param bias: value in (0, 1)
        """
        # TODO: deal with the case when bias is 0
        ran = np.random.geometric(bias)
        while ran > end - start:
            ran = np.random.geometric(bias)
        result = end - ran
        return result

    def next_experience_batch(self):
        # First get a start point randomly
        batch_start = self.__sample(0, len(self.__experiences) - self.__batch_size,
                                    self.__sample_bias)
        batch = self.__experiences[batch_start:batch_start+self.__batch_size]
        return batch


class Experience:
    def __init__(self, state_index):
        self.state_index = int(state_index)


In [8]:
class Agent:
    
    def __init__(self, config):
        
        self.train_config = config['training']        
        self.batch_size = self.train_config['batch_size']
        self.buffer_bias = self.train_config['buffer_bias']
        
        self.coin_no = config['coin_no']
        self.window_size = config['window_size']
        self.global_period = config["global_period"]
        self.feature_no = config['feature_no']
        
        self.no_periods = 150
        
        self.commission_ratio = config["trading_consumption"]
        
        #Just make something random
        self.global_data = tf.random.uniform(shape = (self.feature_no, self.coin_no, self.no_periods))
        
        PVM = np.ones((self.global_data.shape[2], self.global_data.shape[1]), dtype='float32')/self.coin_no
        self.PVM = pd.DataFrame(PVM)
        
        # Notice this part is made with pandas.panel
#          # portfolio vector memory, [time, assets]
#         self.__PVM = pd.DataFrame(index=self.__global_data.minor_axis,
#                                   columns=self.__global_data.major_axis)
#         self.__PVM = self.__PVM.fillna(1.0 / self.__coin_no)

        
        self.pv_vector = None
        
        
        self.model = CNN(
            config['coin_no'], 
            config['window_size'], 
            config['feature_no'], 
            config['training']['batch_size']
        )
        
        self.divide_data(config['test_portion']) # This gives the indekses of the training and test data
        
        # This needs to be written such that it gets arguments from config, like sample bias (geo dist)
        end_index = self._train_ind[-1]
        self.__replay_buffer = ReplayBuffer(start_index=self._train_ind[0],
                                               end_index=end_index,
                                               sample_bias=self.buffer_bias,
                                               batch_size=self.batch_size,
                                               coin_no=self.coin_no)
    #@tf.function
    def train_step(self, batch):
        
        w = batch['last_w']
        w = tf.reshape(w, [w.shape[0], w.shape[1], 1, 1] )
        X = tf.transpose(batch['X'], [0, 2, 3, 1])
        y = batch['y']
                
        with tf.GradientTape() as tape:
                output = self.model([X, w])
                                
                # Compute negative reward
                loss = self.loss(y, output)

        grads = tape.gradient(loss, self.model.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.model.trainable_weights))

        # Save the model output in PVM
        #batch['setw'](w[:, 1:])
        self.PVM.iloc[self.indexs, :] = output[:, 1:].numpy()
        
        return loss
    
    def train(self):
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
        #loss_metric = -tf.keras.metrics.Mean()
        
        for step in range(self.train_config['steps']):
            
            batch = self.next_batch()
    
            # Do a train step
            loss_value = self.train_step(batch)
            
            # You can write a custom metric here. See tf.org Keras -> Train and Evaluate
            #loss_metric(loss)
            portfolio_value = tf.reduce_prod(self.pv_vector)
            mean = tf.reduce_mean(self.pv_vector)
            standard_deviation = tf.math.sqrt(tf.reduce_mean((self.pv_vector - mean) ** 2))
            sharp_ratio = (mean - 1) / standard_deviation
            
            
            if step % 200 == 0:
                print('Step %2d: loss=%2.5f, cumval=%.1f' %
                        (step, -loss_value, portfolio_value))
                
                # You can add a log between steps here
                # Both manually and with tensorboard
            
            
    def next_batch(self):
        """
        @:return: the next batch of training sample. The sample is a dictionary
        with key "X"(input data); "y"(future relative price); "last_w" a numpy array
        with shape [batch_size, assets]; "w" a list of numpy arrays list length is
        batch_size
        """
        batch = self.pack_samples([exp.state_index for exp in self.__replay_buffer.next_experience_batch()])
        return batch

    
    def pack_samples(self, indexs):
        self.indexs = indexs
        indexs = np.array(indexs)
        last_w = self.PVM.values[indexs-1, :]

        def setw(w):                      # Notice that this function is defined in terms of the specifik indexs
            self.PVM.iloc[indexs, :] = w    
        M = [self.get_submatrix(index) for index in indexs]   # For each state_index in the batch, get a input tensor
        M = np.array(M, dtype='float32')
        X = M[:, :, :, :-1]    # X_t tensor
        y = M[:, :, :, -1] / M[:, 0, None, :, -2]     # y_{t+1} obtained by dividing all features by prev close price
        return {"X": X, "y": y, "last_w": last_w, "setw": setw}
    

    # volume in y is the volume in next access period
    def get_submatrix(self, ind):
        return self.global_data[:, :, ind-(self.window_size):ind+1]
    
    
    def divide_data(self, test_portion, portion_reversed = False):
        train_portion = 1 - test_portion
        s = float(train_portion + test_portion)
        if portion_reversed:
            portions = np.array([test_portion]) / s
            portion_split = (portions * self.no_periods).astype(int)
            indices = np.arange(self.no_periods)
            self._test_ind, self._train_ind = np.split(indices, portion_split)
        else:
            portions = np.array([train_portion]) / s
            portion_split = (portions * self.no_periods).astype(int)
            indices = np.arange(self.no_periods)
            self._train_ind, self._test_ind = np.split(indices, portion_split)

        self._train_ind = self._train_ind[(self.window_size):-1]
        # NOTE(zhengyao): change the logic here in order to fit both
        # reversed and normal version
        self._train_ind = list(self._train_ind)
        self._num_train_samples = len(self._train_ind)
        self._num_test_samples = len(self._test_ind)

        
    #get a loss function, which is minus the reward function
    def loss(self, y, output):
        #r_t = log(mu_t * y_t dot w_{t-1})
        
        self.future_price = tf.concat([tf.ones([16, 1]), y[:, 0, :]], 1)
        self.future_w = (self.future_price * output) / tf.reduce_sum(self.future_price * output, axis=1)[:, None]
        self.pv_vector = tf.reduce_sum(output * self.future_price, axis=1) *\
                           (tf.concat([tf.ones(1), self.pure_pc(output)], axis=0))
        
        
        return -tf.reduce_mean(tf.math.log(self.pv_vector))
        
        
        
    # consumption vector (on each periods)
    def pure_pc(self, output):
        c = self.commission_ratio
        w_t = self.future_w[:self.batch_size-1]  # rebalanced
        w_t1 = output[1:self.batch_size]
        mu = 1 - tf.reduce_sum(tf.math.abs(w_t1[:, 1:]-w_t[:, 1:]), axis=1)*c
        """
        mu = 1-3*c+c**2

        def recurse(mu0):
            factor1 = 1/(1 - c*w_t1[:, 0])
            if isinstance(mu0, float):
                mu0 = mu0
            else:
                mu0 = mu0[:, None]
            factor2 = 1 - c*w_t[:, 0] - (2*c - c**2)*tf.reduce_sum(
                tf.nn.relu(w_t[:, 1:] - mu0 * w_t1[:, 1:]), axis=1)
            return factor1*factor2

        for i in range(20):
            mu = recurse(mu)
        """
        return mu

In [9]:
agent = Agent(config)

In [10]:
agent.train()

Step  0: loss=0.60621, cumval=16307.5
Step 200: loss=1.22533, cumval=326918176.0
Step 400: loss=2.09840, cumval=381250187558912.0
Step 600: loss=1.88675, cumval=12896217071616.0
Step 800: loss=1.86873, cumval=9666566291456.0
Step 1000: loss=2.03957, cumval=148735354994688.0
Step 1200: loss=1.96895, cumval=48050126979072.0


KeyboardInterrupt: 

In [None]:
batch = agent.next_batch()
w = batch['last_w']
w = tf.reshape(w, [w.shape[0], w.shape[1], 1, 1] )
X = tf.transpose(batch['X'], [0, 2, 3, 1])
y = batch['y']
output=agent.model([X,w])


In [None]:
PVM = np.ones((agent.global_data.shape[2], agent.global_data.shape[1]))/agent.coin_no
PVM = pd.DataFrame(PVM)

In [None]:
PVM.iloc[agent.indexs, :] = output[:, 1:]

In [None]:
w = agent.next_batch()['last_w']
w = tf.reshape(w, [w.shape[0], w.shape[1], 1, 1] )
tensor = tf.transpose(agent.next_batch()['X'], [0, 2, 3, 1])

In [None]:
future = tf.concat([tf.ones([16, 1]), agent.next_batch()['y'][:, 0, :]], 1)

In [None]:
output = agent.model([tensor, w])

In [None]:
output.shape

In [None]:
(output * future) / np.sum(future * output, axis=1)[:,None]

In [None]:
b_init = tf.zeros_initializer()
b = tf.Variable(
                initial_value=b_init(shape=(1, 1), dtype="float32"),
                trainable=True
                )
