In [46]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import pandas as pd

In [47]:
config = {
        'batch_size': 16, 
        'coin_no': 11, 
        'window_size': 50, 
        'feature_no': 3,
        "test_portion": 0.08,
        "global_period": 1800
        }


In [48]:
# input size is 11x50x3
# Remember: Channels last

def CNN(rows, cols, features, batch_size):
    input_shape = (rows, cols, features)
    x = keras.Input(shape= input_shape, batch_size=batch_size)
    w = keras.Input(shape = (11, 1, 1), batch_size=batch_size)

    y = tf.keras.layers.Conv2D(
    filters = 2, kernel_size = (1,3), padding='valid', activation='relu')(x)
    y = keras.layers.Conv2D(20, (1, y.shape[2]), activation="relu", name = 'conv2')(y)    

    con = keras.layers.Concatenate(axis=3)([y, w])

    y = keras.layers.Conv2D(1, (1,1), name = 'votes')(con)
    y = y[:,:,0,0]

    with_bias = CashBias()(y)
    outputs = keras.layers.Activation('softmax')(with_bias)
    return keras.Model(inputs = [x, w], outputs = outputs, name = "Policy")

class CashBias(keras.layers.Layer):
    def __init__(self):
        super(CashBias, self).__init__()
    
    def build(self, input_shape):
        b_init = tf.zeros_initializer()
        b = tf.Variable(
                initial_value=b_init(shape=(1, 1), dtype="float32"),
                trainable=True
                )
        self.b = tf.tile(b, [input_shape[0], 1])

    def call(self, inputs):
        return keras.layers.Concatenate(axis=1)([self.b, inputs])

model = CNN(config['coin_no'], config['window_size'], config['feature_no'], config['batch_size'])
print(model.summary())

Model: "Policy"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_37 (InputLayer)           [(16, 11, 50, 3)]    0                                            
__________________________________________________________________________________________________
conv2d_18 (Conv2D)              (16, 11, 48, 2)      20          input_37[0][0]                   
__________________________________________________________________________________________________
conv2 (Conv2D)                  (16, 11, 1, 20)      1940        conv2d_18[0][0]                  
__________________________________________________________________________________________________
input_38 (InputLayer)           [(16, 11, 1, 1)]     0                                            
_____________________________________________________________________________________________

In [108]:
class Agent:
    
    def __init__(self, config):
        self.net = CNN(CNN(config['input_shape'], config['batch_size']))
        self.batch_size = config['batch_size']
        self.coin_no = config['coin_no']
        self.window_size = config['window_size']
        self.no_periods = config["global_period"]
        
        
        #Just make something random
        self.global_data = np.random.rand(500, 11, 3)
        
        # PVM should be weights for assets and not the cash/btc
        self.PVM = pd.DataFrame(index=self.global_data.shape[0],
                                  columns=self.global_data.shape[1])
        self.PVM = self.PVM.fillna(1.0 / self.coin_no)
        
        self.loss = self.set_loss()
        
        self.divide_data(config['test_portion'])
        
        
        # This needs to be written such that it gets arguments from config
        end_index = self._train_ind[-1]
        self.__replay_buffer = rb.ReplayBuffer(start_index=self._train_ind[0],
                                               end_index=end_index,
                                               sample_bias=5e-5,
                                               batch_size=self.batch_size,
                                               coin_number=self.coin_no)
        
    def train(self):
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
        
        loss_metric = -tf.keras.metrics.Mean()
        
        for step in range(config['steps']):
            batch = # get batch
            with tf.GradientTape() as tape:
                w = self.net(batch)
                # Compute negative reward
                loss = mse_loss_fn(x_batch_train, reconstructed)

            grads = tape.gradient(loss, self.net.trainable_weights)
            optimizer.apply_gradients(zip(grads, self.net.trainable_weights))
            
            loss_metric(loss)
            
            if step % 100 == 0:
                print("step %d: mean loss = %.4f" % (step, loss_metric.result()))
            
    def next_batch(self):
        """
        @:return: the next batch of training sample. The sample is a dictionary
        with key "X"(input data); "y"(future relative price); "last_w" a numpy array
        with shape [batch_size, assets]; "w" a list of numpy arrays list length is
        batch_size
        """
        batch = self.__pack_samples([exp.state_index for exp in self.__replay_buffer.next_experience_batch()])
        return batch

    def __pack_samples(self, indexs):
        indexs = np.array(indexs)
        last_w = self.__PVM.values[indexs-1, :]

        def setw(w):
            self.__PVM.iloc[indexs, :] = w
        M = [self.get_submatrix(index) for index in indexs]
        M = np.array(M)
        X = M[:, :, :, :-1]
        y = M[:, :, :, -1] / M[:, 0, None, :, -2]
        return {"X": X, "y": y, "last_w": last_w, "setw": setw}

    # volume in y is the volume in next access period
    def get_submatrix(self, ind):
        return self.global_data[ind:ind+self._window_size+1, :, :]
    
    def divide_data(self, test_portion, portion_reversed = False):
        train_portion = 1 - test_portion
        s = float(train_portion + test_portion)
        if portion_reversed:
            portions = np.array([test_portion]) / s
            portion_split = (portions * self.no_periods).astype(int)
            indices = np.arange(self.no_periods)
            self._test_ind, self._train_ind = np.split(indices, portion_split)
        else:
            portions = np.array([train_portion]) / s
            portion_split = (portions * self.no_periods).astype(int)
            indices = np.arange(self.no_periods)
            self._train_ind, self._test_ind = np.split(indices, portion_split)

        self._train_ind = self._train_ind[:-(self._window_size + 1)]
        # NOTE(zhengyao): change the logic here in order to fit both
        # reversed and normal version
        self._train_ind = list(self._train_ind)
        self._num_train_samples = len(self._train_ind)
        self._num_test_samples = len(self.test_indices)
    
        
    #get a loss function, which is minus the reward function
    def set_loss(self):
        #r_t = log(mu_t * y_t dot w_{t-1})
        
        
        return tf.reduce_mean(tf.log(self.pv_vector))
        
    
        

SyntaxError: unexpected EOF while parsing (<ipython-input-108-f807117a2d2f>, line 4)

In [49]:
from __future__ import division,absolute_import,print_function
import logging


class ReplayBuffer:
    def __init__(self, start_index, end_index, batch_size, coin_number, sample_bias=1.0):
        """
        :param start_index: start index of the training set on the global data matrices
        :param end_index: end index of the training set on the global data matrices
        """
        self.__coin_number = coin_number
        self.__experiences = [Experience(i) for i in range(start_index, end_index)]
        self.__is_permed = is_permed
        # NOTE: in order to achieve the previous w feature
        self.__batch_size = batch_size
        self.__sample_bias = sample_bias
        logging.debug("buffer_bias is %f" % sample_bias)

    def append_experience(self, state_index):
        self.__experiences.append(Experience(state_index))
        logging.debug("a new experience, indexed by %d, was appended" % state_index)

    def __sample(self, start, end, bias):
        """
        @:param end: is excluded
        @:param bias: value in (0, 1)
        """
        # TODO: deal with the case when bias is 0
        ran = np.random.geometric(bias)
        while ran > end - start:
            ran = np.random.geometric(bias)
        result = end - ran
        return result

    def next_experience_batch(self):
        # First get a start point randomly
        batch_start = self.__sample(0, len(self.__experiences) - self.__batch_size,
                                    self.__sample_bias)
        batch = self.__experiences[batch_start:batch_start+self.__batch_size]
        return batch


class Experience:
    def __init__(self, state_index):
        self.state_index = int(state_index)


In [53]:
rb = ReplayBuffer(start_index=0,
                                               end_index=300,
                                               sample_bias=5e-5,
                                               batch_size=30,
                                               coin_number=11,
                                               is_permed=False)
batch = [exp.state_index for exp in rb.next_experience_batch()]

In [59]:
np.random.rand(500, 11, 3).values

AttributeError: 'numpy.ndarray' object has no attribute 'values'