<a href="https://colab.research.google.com/github/csana23/NETvisor-Writer/blob/master/src/DuelingDeepQ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

Tue Aug 25 18:24:20 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.57       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   69C    P8    33W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Creating model, agent and memory management classes

In [1]:
# dueling_dqn_lstm.py
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import numpy as np

class DuelingDeepQNetwork(keras.Model):
    # fc = fully connected, fc_dims: number of units, neurons
    def __init__(self, n_actions, fc1_dims, fc2_dims):
        super(DuelingDeepQNetwork, self).__init__()

        keras.backend.set_floatx('float64')
        self.leaky_relu1 = keras.layers.LeakyReLU()
        self.lstm1 = keras.layers.LSTM(units=64, input_shape=(64,2), return_sequences=True, dtype='float64', activation=None)
        self.leaky_relu2 = keras.layers.LeakyReLU()
        self.lstm2 = keras.layers.LSTM(units=64, input_shape=(64,2), return_sequences=True, dtype='float64', activation=None)
        self.V = keras.layers.Dense(1, activation=None)
        self.A = keras.layers.Dense(n_actions, activation=None)

        print('Network created')

    def call(self, state, training=True):
        x = self.leaky_relu1(state)
        x = self.lstm1(x)
        x = self.leaky_relu2(x)
        x = self.lstm2(x)

        # define behaviour during training
        if training == True:
            V = self.V(x)
            A = self.A(x)

            Q = (V + (A - tf.math.reduce_mean(A, axis=1, keepdims=True)))

            return Q

        # during testing i only need the advantage model
        else:
            A = self.A(x)

            return A

    @tf.function
    def advantage(self, state):
        x = self.leaky_relu1(state)
        x = self.lstm1(x)
        x = self.leaky_relu2(x)
        x = self.lstm2(x)
        A = self.A(x)

        return A

class ReplayBuffer():
    def __init__(self, max_size, input_shape):
        self.mem_size = max_size
        self.mem_cntr = 0

        self.state_memory = np.zeros((self.mem_size, *input_shape),
                                        dtype=np.float64)
        self.new_state_memory = np.zeros((self.mem_size, *input_shape),
                                        dtype=np.float64)
        self.action_memory = np.zeros(self.mem_size, dtype=np.int64)
        self.reward_memory = np.zeros(self.mem_size, dtype=np.float64)
        self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool)

    def store_transition(self, state, action, reward, state_, done):
        index = self.mem_cntr % self.mem_size
        self.state_memory[index] = state
        self.new_state_memory[index] = state_
        self.action_memory[index] = action
        self.reward_memory[index] = reward
        self.terminal_memory[index] = done

        self.mem_cntr += 1

    def sample_buffer(self, batch_size):
        max_mem = min(self.mem_cntr, self.mem_size)
        batch = np.random.choice(max_mem, batch_size, replace=False)

        states = self.state_memory[batch]
        new_states = self.new_state_memory[batch]
        actions = self.action_memory[batch]
        rewards = self.reward_memory[batch]
        dones = self.terminal_memory[batch]

        return states, actions, rewards, new_states, dones

class Agent():
    def __init__(self, lr, gamma, n_actions, epsilon, batch_size, input_dims, epsilon_dec=1e-3, eps_end=0.01, mem_size=10000, fname='model', fc1_dims=128, fc2_dims=128, replace=100, testing=False, model=None):

        if testing == True:
            self.action_space = np.array([-1,0,1])
            self.gamma = gamma
            self.epsilon = epsilon
            self.eps_dec = epsilon_dec
            self.eps_min = eps_end
            self.fname = fname
            self.replace = replace
            self.batch_size = batch_size

            self.learn_step_counter = 0
            self.memory = ReplayBuffer(mem_size, input_dims)
            self.q_eval = model
            self.q_next = model

            # set learning rate and optimizer
            self.q_eval.compile(optimizer=Adam(learning_rate=lr),
                                loss='mean_squared_error')
            # just a formality, won't optimize network
            self.q_next.compile(optimizer=Adam(learning_rate=lr),
                                loss='mean_squared_error')
            
            # keeping track of chosen actions
            self.chosen_actions = []

            print('Testing agent created')

        else:

            # action space definition [-1,0,1], ezt az [i for i in range(n_actions)]-t majd ki kell venni, oszt n_actions = 3
            self.action_space = np.array([-1,0,1])
            self.gamma = gamma
            self.epsilon = epsilon
            self.eps_dec = epsilon_dec
            self.eps_min = eps_end
            self.fname = fname
            self.replace = replace
            self.batch_size = batch_size

            self.learn_step_counter = 0
            self.memory = ReplayBuffer(mem_size, input_dims)
            self.q_eval = DuelingDeepQNetwork(n_actions, fc1_dims, fc2_dims)
            self.q_next = DuelingDeepQNetwork(n_actions, fc1_dims, fc2_dims)

            # set learning rate and optimizer
            self.q_eval.compile(optimizer=Adam(learning_rate=lr),
                                loss='mean_squared_error')
            # just a formality, won't optimize network
            self.q_next.compile(optimizer=Adam(learning_rate=lr),
                                loss='mean_squared_error')
            
            # keeping track of chosen actions
            self.chosen_actions = []

            print('Training agent created')
        
        
    def store_transition(self, state, action, reward, new_state, done):
        self.memory.store_transition(state, action, reward, new_state, done)

    def choose_action(self, observation, testing=False):
        if testing == False:
            if np.random.random() < self.epsilon:
                action = np.random.choice(self.action_space)
                # converting to native Python type
                action = action.item()
                print('actionE:', action)

                action_dicti = {'Epsilon': action}
                self.chosen_actions.append(action_dicti)
            else:
                # state = np.array([observation])
                state = observation
                actions = self.q_eval.advantage(state) # advantage volt itten eredetileg
                action = tf.math.argmax(actions, axis=1).numpy()[0]

                # get index of max value inside ndarray
                max_idx = np.where(action == np.amax(action))

                # lets see if max_idx consist of multiple elements or just one
                # if it has multiple element than i choose one randomly
                action_idx = 0

                # max_idx[0] is a 'list' (1D ndarray)

                if len(max_idx[0] > 1):
                    action_idx = int(np.random.choice(max_idx[0], 1))
                else:
                    action_idx = int(max_idx[0])

                # na nézzük a pozíciókat, indexet (0,1,2)
                if action_idx == 0:
                    action = -1
                elif action_idx == 1:
                    action = 0
                elif action_idx == 2:
                    action = 1
                print('actionM:', action)

                action_dicti = {'Model': action}
                self.chosen_actions.append(action_dicti)
        
        # ha tesztelés van, akkor rögtön amodellbe küldje a state-t
        elif testing == True:
            # state = np.array([observation])
            state = observation
            actions = self.q_eval.advantage(state) # advantage volt itten eredetileg
            action = tf.math.argmax(actions, axis=1).numpy()[0]

            # get index of max value inside ndarray
            max_idx = np.where(action == np.amax(action))

            # lets see if max_idx consist of multiple elements or just one
            # if it has multiple element than i choose one randomly
            action_idx = 0

            # max_idx[0] is a 'list' (1D ndarray)

            if len(max_idx[0] > 1):
                action_idx = int(np.random.choice(max_idx[0], 1))
            else:
                action_idx = int(max_idx[0])

            # na nézzük a pozíciókat, indexet (0,1,2)
            if action_idx == 0:
                action = -1
            elif action_idx == 1:
                action = 0
            elif action_idx == 2:
                action = 1
            print('actionM:', action)

            action_dicti = {'Model': action}
            self.chosen_actions.append(action_dicti)
            

        return action

    def learn(self):
        if self.memory.mem_cntr < self.batch_size:
            return

        if self.learn_step_counter % self.replace == 0:
            self.q_next.set_weights(self.q_eval.get_weights())

        states, actions, rewards, states_, dones = self.memory.sample_buffer(self.batch_size)

        q_pred = self.q_eval(states)
        q_next = tf.math.reduce_max(self.q_next(states_), axis=1, keepdims=True).numpy()
        q_target = np.copy(q_pred)

        # improve on my solution!
        for idx, terminal in enumerate(dones):
            if terminal:
                q_next[idx] = 0.0
            q_target[idx, actions[idx]] = rewards[idx] + self.gamma*q_next[idx]

        self.q_eval.train_on_batch(x=states, y=q_target) # states

        self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_min else self.eps_min

        self.learn_step_counter += 1

    def reward_function(self, a_t1, sigma_tgt, sigma_t1, r_t, bp, p_t1, sigma_t2, a_t2):
        R_t = a_t1 * (sigma_tgt/sigma_t1) * r_t - bp * p_t1 * abs((sigma_tgt/sigma_t1) * a_t1 - (sigma_tgt/sigma_t2) * a_t2)

        return R_t

    def save_model(self):
        # define full path for model to save to
        self.q_eval.save(self.fname, save_format='tf', overwrite=True)

    def load_model(self):
        self.q_eval = load_model(self.fname)
        return self.q_eval

## Ticker and data index settings 

In [2]:
# whether i want to use combined dataset or not
combined = True

if combined:
    ticker = 'combined'
    data_index = 115840

    # limit - minus_value: az adott ticker indexének dinamikus megtalálása
    idx = 50 # Shell
    batch_size_basic = 64
    minus_value = batch_size_basic - idx # Shell esetén 14
else:
    ticker = 'AMD'
    data_index = 3968
    minus_value = 1

print(ticker)
print(data_index)
print(minus_value)

combined
115840
14


## Getting data, preprocessing

In [4]:
# main_keras_dueling_dqn_lstm.py
import pandas as pd
import numpy as np
from sklearn import preprocessing
import math

# connect to Drive
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# AAPL
# data_link = 'https://drive.google.com/file/d/1YSTQDwmzZzLoUd8O9gAy6_2JOo7R2qRx/view?usp=sharing'
# id = '1YSTQDwmzZzLoUd8O9gAy6_2JOo7R2qRx'

# VOO
# data_link = 'https://drive.google.com/file/d/15xvN9J4iaFPmgzqAbxkrdCqja3zpJnsK/view?usp=sharing'
# id = '15xvN9J4iaFPmgzqAbxkrdCqja3zpJnsK'

# AMZN
# data_link = 'https://drive.google.com/file/d/1o54V2UYiZrKgBwxgQ1_5Y470mboSbhyC/view?usp=sharing'
# id = '1o54V2UYiZrKgBwxgQ1_5Y470mboSbhyC'

# AMD
# data_link = 'https://drive.google.com/file/d/14YifDjDJqF89U6jjVy9S_Sdu-yiy-gdj/view?usp=sharing'
# id = '14YifDjDJqF89U6jjVy9S_Sdu-yiy-gdj'
# print(id)

# combined
data_link = 'https://drive.google.com/file/d/1BGLrDsIU2H45mX4dNnffUCte8vzUaLqg/view?usp=sharing'
id = '1BGLrDsIU2H45mX4dNnffUCte8vzUaLqg'
print(id)

downloaded = drive.CreateFile({'id':id}) 

if combined:
    print('...combined data chosen...')
    data_file = 'combined.csv'
    downloaded.GetContentFile(data_file)

    # iterate csv file and getting train batches
    data = pd.read_csv(data_file)

    data_train = pd.DataFrame(data[:data_index])
    data_train = data_train.sort_values(by=['date', 'ticker'], ascending=(True, True))
else:
    print('...conventional individual stock data chosen...')
    data_file = ticker + '_Yahoo.csv'
    downloaded.GetContentFile(data_file)

    # iterate csv file and getting train batches
    data = pd.read_csv(data_file)

    # data[:1792] for VOO
    data_train = pd.DataFrame(data[:data_index])
    data_train = data_train.sort_values(by=['id'], ascending=True)

# normalize train data by adding normalized columns
data_normalizer = preprocessing.MinMaxScaler()

close_array = np.array(data_train['close'])
close_reshaped = close_array.reshape(-1, 1)

data_train['close_normalized'] = data_normalizer.fit_transform(close_reshaped)

volume_array = np.array(data_train['volume'])
volume_reshaped = volume_array.reshape(-1, 1)

data_train['volume_normalized'] = data_normalizer.fit_transform(volume_reshaped)

# adding helper columns for usage in reward function - only using the normalized values
data_train['r_t'] = data_train['close_normalized'].diff().fillna(0)
data_train['sigma_t'] = data_train['r_t'].ewm(span=60).std().fillna(0)

print(data_train.head())
print(data_train.tail())

'''
# normalize train data by adding normalized columns
data_normalizer = preprocessing.MinMaxScaler()

close_array = np.array(data_train['close'])
close_reshaped = close_array.reshape(-1, 1)

data_train['close_normalized'] = data_normalizer.fit_transform(close_reshaped)

volume_array = np.array(data_train['volume'])
volume_reshaped = volume_array.reshape(-1, 1)

data_train['volume_normalized'] = data_normalizer.fit_transform(volume_reshaped)

# adding helper columns for usage in reward function - only using the normalized values
data_train['r_t'] = data_train['close_normalized'].diff().fillna(0)
data_train['sigma_t'] = data_train['r_t'].ewm(span=60).std().fillna(0)
print('head of data_train:')
print(data_train.head())
'''

1BGLrDsIU2H45mX4dNnffUCte8vzUaLqg
...combined data chosen...
         date      close      volume  ... volume_normalized       r_t   sigma_t
0  2010-06-29   9.148929  1133344800  ...          0.602523  0.000000  0.000000
1  2010-06-29  22.382669    17382600  ...          0.009241  0.012589  0.008902
2  2010-06-29  38.650002     6533100  ...          0.003473  0.015475  0.008178
3  2010-06-29  26.900000    22062500  ...          0.011729 -0.011178  0.012433
4  2010-06-29   7.480000    43861400  ...          0.023318 -0.018474  0.014863

[5 rows x 9 columns]
              date       close    volume  ... volume_normalized       r_t   sigma_t
115835  2017-09-05  103.010002   6337500  ...          0.003369 -0.010379  0.174444
115836  2017-09-05   47.360001  10005100  ...          0.005319 -0.052941  0.171815
115837  2017-09-05   50.099998  31251300  ...          0.016614  0.002607  0.168977
115838  2017-09-05   79.800003  12068500  ...          0.006416  0.028254  0.166275
115839  2017-09-0

"\n# normalize train data by adding normalized columns\ndata_normalizer = preprocessing.MinMaxScaler()\n\nclose_array = np.array(data_train['close'])\nclose_reshaped = close_array.reshape(-1, 1)\n\ndata_train['close_normalized'] = data_normalizer.fit_transform(close_reshaped)\n\nvolume_array = np.array(data_train['volume'])\nvolume_reshaped = volume_array.reshape(-1, 1)\n\ndata_train['volume_normalized'] = data_normalizer.fit_transform(volume_reshaped)\n\n# adding helper columns for usage in reward function - only using the normalized values\ndata_train['r_t'] = data_train['close_normalized'].diff().fillna(0)\ndata_train['sigma_t'] = data_train['r_t'].ewm(span=60).std().fillna(0)\nprint('head of data_train:')\nprint(data_train.head())\n"

## Instantiate training agent

In [5]:
# change model name if data changes
# model_name = '/content/gdrive/My Drive/Colab Notebooks/saved_files/model_' + ticker
model_name = '/content/gdrive/My Drive/Colab Notebooks/saved_files/model_combined'
print(model_name)

agent = Agent(lr=0.0001, gamma=0.3, n_actions=3, epsilon=1, batch_size=64, input_dims=(64,2), 
              epsilon_dec=1e-3, eps_end=0.01, mem_size=5000, fname=model_name,
              fc1_dims=64, fc2_dims=64, replace=1000)

/content/gdrive/My Drive/Colab Notebooks/saved_files/model_combined
Network created
Network created
Training agent created


## Helper variables and constants

In [6]:
eps_history = []

# constants
sigma_tgt = 0.03
reward = 0
L = len(data_train)

print('L:', L)

done = False
round = 0

# tenyleges nyereseg: close * (1-bp)
bp = 0.001

transactions = pd.DataFrame(columns=['action', 'close', 'value', 'cost', 'qty', 'crt_blc', 'round'])

# budget 
current_balance = 100000

L: 115840


testing

In [None]:
'''
batch_start = 500
batch_end = 564

batch_raw = data_train[batch_start:batch_end]

# filter
batch = batch_raw[['close_normalized', 'volume_normalized']]

# okkay, i got the data - the state
# convert it to numpy array, then it becomes the observation
observation = np.array(batch)
observation_3d = observation.reshape(1, 64, 2)

# berakni a modellbe, várni az outputot
# action = agent.choose_action(observation_3d)
actions = agent.q_eval.advantage(observation_3d)
action = tf.math.argmax(actions, axis=1).numpy()[0]
print('ndarray action:', action)

# get index of max value inside ndarray
max_idx = np.where(action == np.amax(action))

# lets see if max_idx consist of multiple elements or just one
# if it has multiple element than i choose one randomly
action_idx = 0

# max_idx[0] is a 'list' (1D ndarray)

if len(max_idx[0] > 1):
    action_idx = int(np.random.choice(max_idx[0], 1))
else:
    action_idx = int(max_idx[0])

# na nézzük a pozíciókat, indexet (0,1,2)
if action_idx == 0:
    action = -1
elif action_idx == 1:
    action = 0
elif action_idx == 2:
    action = 1

print('Muthafuckin action is:', action)
'''

ndarray action: [ 0 28  0]
Muthafuckin action is: 0


## Training model

In [9]:
batch_start = 0
batch_end = agent.batch_size

while batch_end <= L:
    limit = min(batch_end, L) # bruhhhh this is it

    print('batch_end:', batch_end)
    print('round:', round)
    
    # ha a limit az L-t választja, akkor azt jelenti, hogy végig értünk az adathalmazon, done = True
    # nem tanulunk itten
    if limit == L:
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('limit:', limit, 'L:', L)
        batch_raw = data_train[batch_start:limit]

        # filter
        batch = batch_raw[['close_normalized', 'volume_normalized']]

        # okkay, i got the data - the state
        # convert it to numpy array, then it becomes the observation
        observation = np.array(batch)
        observation_3d = observation.reshape(1, 64, 2)

        # berakni a modellbe, várni az outputot
        action = agent.choose_action(observation_3d)
        print('action in non-learning:', action)

        # megvan az action, kezeljük le ([-1,0,1])
        # itten kell a rewardokat kiokoskodni

        # sell
        if action == -1:
            # done = True
            transactions = transactions.sort_values(by=['round'])
            last_sell_index = transactions.loc[transactions['action'] == -1].last_valid_index()

            close = 0
            value = 0
            cost = 0
            qty = 0

            if last_sell_index == None:
                valid_records = transactions.loc[transactions['action'] == 1]

                if valid_records.empty:
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = 0
                    cost = 0
                    qty = 0
                else:
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    # qty = valid_records['qty'].sum()
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    qty = valid_records['qty'].sum()
                    value = qty * close
                    cost = value * bp
                    
                    current_balance = current_balance + (value - cost)

                    # profit += close


            else:
                valid_interval_data = transactions.iloc[last_sell_index:len(transactions)]

                # filter valid_interval_data where action == 1
                valid_records = valid_interval_data.loc[valid_interval_data['action'] == 1]

                if valid_records.empty:
                    close = 0
                    value = 0
                    cost = 0
                    qty = 0
                else:
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    # qty = valid_records['qty'].sum()
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    qty = valid_records['qty'].sum()
                    value = qty * close
                    cost = value * bp
                    
                    current_balance = current_balance + (value - cost)

                    # profit += close

            # okkay do dict
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': qty, 'crt_blc': current_balance, 'round': round}

              # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions = transactions.append(s, ignore_index=True)
            #print('transactions')
            #print(transactions)

            # # calculate reward - this is far from finished i think
            # reward = agent.reward_function(a_t1=transactions.iloc[len(transactions)-2] if len(transactions) >= 2 else 0, sigma_tgt=sigma_tgt, sigma_t1=data_train['sigma_t'].iloc[limit-2], r_t=data_train['r_t'].iloc[limit-1], bp=0.0020, p_t1=data_train['close'].iloc[limit-2], sigma_t2=data_train['sigma_t'].iloc[limit-3], a_t2=transactions.iloc[len(transactions)-3] if len(transactions) >= 3 else 0)

            # done = True

            # # run state through model
            # agent.learn()

        # hold
        elif action == 0:
              # get current number of stocks (qty i hold = hold_qty)
            # get index of last sell
            transactions = transactions.sort_values(by=['round'])
            last_sell_index = transactions.loc[transactions['action'] == -1].last_valid_index()
            hold_qty = 0

            # close = value of held stocks
            close = 0
            value = 0
            cost = 0

            # last_sell_index == None: nem volt még eladás, akkor mi legyen
            if last_sell_index == None:
                # ha nincsen sell, akkor minden okés, az egész transactionst filterezzük
                valid_records = transactions.loc[transactions['action'] == 1]
                
                # ha nincsen buy
                if valid_records.empty:
                    hold_qty = 0
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = 0
                    cost = 0
                else:
                    # hold_qty = valid_records['qty'].sum()
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    hold_qty = valid_records['qty'].sum()
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = valid_records['value'].sum()
                    cost = 0
            else:
                # else: ha volt eladás, akkor mi legyen
                
                # last index in df might be: df.last_valid_index()
                # more logic needed
                # disregard hold actions, only count buys
                # hold_qty = transactions['qty'].iloc[last_sell_index:len(transactions)-1].sum(axis=1)
                valid_interval_data = transactions.iloc[last_sell_index:len(transactions)]

                # filter valid_interval_data where action == 1
                valid_records = valid_interval_data.loc[valid_interval_data['action'] == 1]

                # ha nincsen buy 
                if valid_records.empty:
                    hold_qty = 0
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = 0
                    cost = 0
                else:
                    # get sum of these records' qty
                    # hold_qty = valid_records['qty'].sum()
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    hold_qty = valid_records['qty'].sum()
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = hold_qty * close
                    cost = 0

            # dict to record action parameters
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': hold_qty, 'crt_blc': current_balance, 'round': round}

            # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions = transactions.append(s, ignore_index=True)

        # buy
        elif action == 1:
            close = data_train['close'].iloc[limit-minus_value]
            print(data_train['ticker'].iloc[limit-minus_value])

            buy_qty = math.floor((current_balance / 5) / close)
            value = buy_qty * close
            cost = value * bp

            current_balance = current_balance - (value + cost)

            # dict to record action parameters
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': buy_qty, 'crt_blc': current_balance, 'round': round}

            # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions = transactions.append(s, ignore_index=True)

        break
    
    # a limit belefér az adathalmazban itt folytathatjuk a batch-et, done = False
    else:
        batch_raw = data_train[batch_start:limit]

        # filter
        batch = batch_raw[['close_normalized', 'volume_normalized']]

        # okkay, i got the data - the state
        # convert it to numpy array, then it becomes the observation
        observation = np.array(batch)
 
        observation_3d = observation.reshape(1, 64, 2)

        # berakni a modellbe, várni az outputot
        action = agent.choose_action(observation_3d)
        print('action in learning:', action)

        # sell
        if action == -1:
            transactions = transactions.sort_values(by=['round']) # ascending or not?

            last_sell_index = transactions.loc[transactions['action'] == -1].last_valid_index()
            #print('last_sell_index:', last_sell_index)

            close = 0
            value = 0
            cost = 0
            qty = 0

            # még nem volt eladás
            if last_sell_index == None:
                valid_records = transactions.loc[transactions['action'] == 1]
                
                # nincsen vétel sem
                if valid_records.empty:
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = 0
                    cost = 0
                    qty = 0
                else:
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    qty = valid_records['qty'].sum()
                    value = qty * close
                    cost = value * bp
                    
                    current_balance = current_balance + (value - cost)

                    # profit += close


            else:
                valid_interval_data = transactions.iloc[last_sell_index:len(transactions)] #-1

                # filter valid_interval_data where action == 1
                valid_records = valid_interval_data.loc[valid_interval_data['action'] == 1]

                if valid_records.empty:
                    close = 0
                    value = 0
                    cost = 0
                    qty = 0
                else:
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    qty = valid_records['qty'].sum()
                    value = qty * close
                    cost = value * bp
                    
                    current_balance = current_balance + (value - cost)

                    # profit += close

            # okkay do dict
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': qty, 'crt_blc': current_balance, 'round': round}

              # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions = transactions.append(s, ignore_index=True)

            # calculate reward - this is far from finished i think
            reward = agent.reward_function(a_t1=transactions['action'].iloc[len(transactions)-2] if len(transactions) >= 2 else 0, sigma_tgt=sigma_tgt, sigma_t1=data_train['sigma_t'].iloc[limit-2], r_t=data_train['r_t'].iloc[limit-1], bp=0.0020, p_t1=data_train['close'].iloc[limit-2], sigma_t2=data_train['sigma_t'].iloc[limit-3], a_t2=transactions['action'].iloc[len(transactions)-3] if len(transactions) >= 3 else 0)
            print('reward 0')
            print(reward)
            # reward = float(reward)

            done = False

            # false new observation_ for memory management
            false_batch_start = batch_start + 1
            false_batch_end = batch_end + 1

            false_limit = min(false_batch_end, L)

            false_batch_raw = data_train[false_batch_start:false_limit]

            false_batch = false_batch_raw[['close_normalized', 'volume_normalized']]
            
            observation_ = np.array(false_batch)
            observation_3d_ = observation_.reshape(1, 64, 2)

            # store transition
            agent.store_transition(observation_3d, action, reward, observation_3d_, done)

            # run state through model
            agent.learn()

            # reset false_batch_raw
            false_batch_raw = false_batch_raw.drop(false_batch_raw.index, inplace=True)
            false_batch_start = 0
            false_batch_end = 0

        # hold
        elif action == 0:
            transactions = transactions.sort_values(by=['round'])
            last_sell_index = transactions.loc[transactions['action'] == -1].last_valid_index()
            hold_qty = 0

            # close = value of held stocks
            close = 0
            value = 0
            cost = 0

            # last_sell_index == None: nem volt még eladás, akkor mi legyen
            if last_sell_index == None:
                # ha nincsen sell, akkor minden okés, az egész transactionst filterezzük
                valid_records = transactions.loc[transactions['action'] == 1]
                
                # ha nincsen buy
                if valid_records.empty:
                    hold_qty = 0
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = 0 # hold_qty * close = 0
                    cost = 0 # value * bp = 0
                else:
                    hold_qty = valid_records['qty'].sum()
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = valid_records['value'].sum()
                    cost = 0
            else:
                # hold_qty = transactions['qty'].iloc[last_sell_index:len(transactions)-1].sum(axis=1)
                valid_interval_data = transactions.iloc[last_sell_index:len(transactions)]

                # filter valid_interval_data where action == 1
                valid_records = valid_interval_data.loc[valid_interval_data['action'] == 1]

                # ha nincsen buy 
                if valid_records.empty:
                    hold_qty = 0
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = 0
                    cost = 0
                else:
                    # get sum of these records' qty
                    hold_qty = valid_records['qty'].sum()
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    close = data_train['close'].iloc[limit-minus_value]
                    print(data_train['ticker'].iloc[limit-minus_value])
                    value = hold_qty * close
                    cost = 0

            # current_balance nem változik

            # dict to record action parameters
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': hold_qty, 'crt_blc': current_balance, 'round': round}

            # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions = transactions.append(s, ignore_index=True)

            # calculate reward - this is far from finished i think
            reward = agent.reward_function(a_t1=transactions['action'].iloc[len(transactions)-2] if len(transactions) >= 2 else 0, sigma_tgt=sigma_tgt, sigma_t1=data_train['sigma_t'].iloc[limit-2], r_t=data_train['r_t'].iloc[limit-1], bp=0.0020, p_t1=data_train['close'].iloc[limit-2], sigma_t2=data_train['sigma_t'].iloc[limit-3], a_t2=transactions['action'].iloc[len(transactions)-3] if len(transactions) >= 3 else 0)
            print('reward 0')
            print(reward)
            # reward = float(reward)
            
            done = False

            # false new observation_ for memory management
            false_batch_start = batch_start + 1
            false_batch_end = batch_end + 1

            false_limit = min(false_batch_end, L)

            false_batch_raw = data_train[false_batch_start:false_limit]

            false_batch = false_batch_raw[['close_normalized', 'volume_normalized']]

            observation_ = np.array(false_batch)
            observation_3d_ = observation.reshape(1, 64, 2)

            # store transition
            agent.store_transition(observation_3d, action, reward, observation_3d_, done)

            # run state through model
            agent.learn()

            # reset false_batch
            false_batch_raw = false_batch_raw.drop(false_batch_raw.index, inplace=True)
            false_batch_start = 0
            false_batch_end = 0

        # buy
        elif action == 1:
            # dict to record action parameters
            close = data_train['close'].iloc[limit-minus_value]
            print(data_train['ticker'].iloc[limit-minus_value])

            buy_qty = math.floor((current_balance / 5) / close)
            value = buy_qty * close
            cost = value * bp

            current_balance = current_balance - (value + cost)

            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': buy_qty, 'crt_blc': current_balance, 'round': round}

            # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions = transactions.append(s, ignore_index=True)

            # calculate reward - this is far from finished i think
            reward = agent.reward_function(a_t1=transactions['action'].iloc[len(transactions)-2] if len(transactions) >= 2 else 0, sigma_tgt=sigma_tgt, sigma_t1=data_train['sigma_t'].iloc[limit-2], r_t=data_train['r_t'].iloc[limit-1], bp=0.0020, p_t1=data_train['close'].iloc[limit-2], sigma_t2=data_train['sigma_t'].iloc[limit-3], a_t2=transactions['action'].iloc[len(transactions)-3] if len(transactions) >= 3 else 0)
            print('reward 0')
            print(reward)
            # reward = float(reward)

            done = False

            # false new observation_ for memory management
            false_batch_start = batch_start + 1
            false_batch_end = batch_end + 1

            false_limit = min(false_batch_end, L)

            false_batch_raw = data_train[false_batch_start:false_limit]

            false_batch = false_batch_raw[['close_normalized', 'volume_normalized']]

            observation_ = np.array(false_batch)
            observation_3d_ = observation.reshape(1, 64, 2)

            # store transition
            agent.store_transition(observation_3d, action, reward, observation_3d_, done)

            # run state through model
            agent.learn()
            
            # reset false_batch
            false_batch_raw = false_batch_raw.drop(false_batch_raw.index, inplace=True)
            false_batch_start = 0
            false_batch_end = 0

        # at the end get new batch (state), ez az eltolásos módszer
        batch_start += 1  
        batch_end += 1

        # record epsilon value 
        eps_history.append(agent.epsilon)
    round = round + 1

    print('current_balance:', current_balance)

print('Training done!')


batch_end: 64
round: 246
actionE: -1
action in learning: -1


KeyError: ignored

## Debug training module

In [None]:
# %debug
print(agent.chosen_actions)

[{'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': 1}, {'Epsilon': 0}, {'Epsilon': -1}, {'Epsilon': -1}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': 0}, {'Epsilon': -1}, {'Epsilon': -1}, {'Epsilon': -1}, {'Epsilon': 1}, {'Epsilon': 1}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': 0}, {'Epsilon': 0}, {'Epsilon': -1}, {'Epsilon': 0}, {'Epsilon': 0}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': 0}, {'Epsilon': -1}, {'Epsilon': 1}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': -1}, {'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': 1}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': 1}, {'Epsilon': 0}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': 0}, {'Epsilon': 0}, {'Epsilon': 1}, {'Epsilon': -1}, {'Epsilon': -1}, {'Epsilon': 0}, {'Epsilon': -1}, {'Epsilon': 0}, {'Epsilon': 0}, {'Epsilon': 0}, {'Ep

## Save trained model and transactions log

In [None]:
# god help me to save this shit
from google.colab import drive
drive.mount('/content/gdrive')

agent.save_model()

transactions_file = '/content/gdrive/My Drive/Colab Notebooks/saved_files/transactions_' + ticker + '.csv'
print('transactions_file:', transactions_file)

# save transactions and profit to file
transactions.to_csv(transactions_file, index=False)

# add current_balance to file
print(current_balance)

with open(transactions_file, 'a') as trans_file:
    text = '\n' + 'Current balance:' + str(current_balance)
    trans_file.write(text)
print('Write OK')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/Colab Notebooks/saved_files/model_VOO/assets
transactions_file: /content/gdrive/My Drive/Colab Notebooks/saved_files/transactions_VOO.csv
6293.091272817989
Write OK


## Testing model

In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/gdrive')

# load model
model = agent.load_model()

# ez köll
model.summary()

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Model: "dueling_deep_q_network_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
leaky_re_lu_4 (LeakyReLU)    multiple                  0         
_________________________________________________________________
lstm_4 (LSTM)                multiple                  17152     
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    multiple                  0         
_________________________________________________________________
lstm_5 (LSTM)                multiple                  33024     
_________________________________________________________________
dense_4 (Dense)              multiple                  65        
_________________________________________________________________
dense_5 (Dense)              multiple      

In [None]:
model.advantage

<tensorflow.python.saved_model.function_deserialization.RestoredFunction at 0x7f6c8a04fda0>

## Prepare test data

In [None]:
# load test data
data_test = pd.DataFrame(data[data_index:])

# reset index of df to start with 0
data_test = data_test.reset_index(drop=True)
data_test = data_test.sort_values(by=['id'], ascending=True)

close_array_test = np.array(data_test['close'])
close_reshaped_test = close_array_test.reshape(-1, 1)

data_test['close_normalized'] = data_normalizer.fit_transform(close_reshaped_test)

volume_array_test = np.array(data_test['volume'])
volume_reshaped_test = volume_array_test.reshape(-1, 1)

data_test['volume_normalized'] = data_normalizer.fit_transform(volume_reshaped_test)

print('data_test head')
print(data_test.head())
print(data_test.tail())

data_test head
         date  close    volume    id  close_normalized  volume_normalized
0  2013-10-10   3.79  24019400  3968          0.023962           0.073893
1  2013-10-11   3.83  17384600  3969          0.024404           0.053481
2  2013-10-14   3.97  63648400  3970          0.025950           0.195806
3  2013-10-15   4.02  52027500  3971          0.026502           0.160056
4  2013-10-16   4.09  34138300  3972          0.027275           0.105022
            date      close  ...  close_normalized  volume_normalized
1750  2020-09-23  74.730003  ...          0.807310           0.130215
1751  2020-09-24  75.820000  ...          0.819346           0.177017
1752  2020-09-25  78.059998  ...          0.844081           0.148300
1753  2020-09-28  79.480003  ...          0.859762           0.147683
1754  2020-09-29  81.769997  ...          0.885049           0.189409

[5 rows x 6 columns]


## Helper variables and methods

In [None]:
L_test = len(data_test)

round_test = 0

# transactions made on test dataset
transactions_test = pd.DataFrame(columns=['action', 'close', 'value', 'cost', 'qty', 'crt_blc', 'round'])

# test model batch by batch
# reset budget
current_balance = 100000

## Test advantage, call and other methods

In [None]:
# creating testing agent
agent_test = Agent(lr=0.0001, gamma=0.3, n_actions=3, epsilon=1, batch_size=64, input_dims=(64,2), 
              epsilon_dec=1e-3, eps_end=0.01, mem_size=5000, fname=model_name,
              fc1_dims=64, fc2_dims=64, replace=1000, testing=True, model=model)

Testing agent created


test tetst tsttststst

In [None]:
'''
def choose_action_test(state):
  # actions = agent.q_eval.predict(state)
  # na itten kell ez:
  actions = model.predict(state)
  print(actions)
  # actions = model.predict(state)

  action = tf.math.argmax(actions, axis=1).numpy()[0]
  action = np.mean(action)
  action = int(action.item())

  return action
'''

start = 0
end = 64
batch_raw = data_test[start:end]

# filter
batch = batch_raw[['close_normalized', 'volume_normalized']]
observation = np.array(batch)
observation_3d = observation.reshape(1, 64, 2)

# na itten jön az agent metódusa
# action = choose_action_test(observation_3d)
action = agent_test.choose_action(observation_3d, testing=True)
print(action)

# action = choose_action_test(observation_3d)

actionM: -1
-1


## Run pre-trained model on test data

In [None]:
batch_start = 0
batch_end = agent_test.batch_size

while batch_end <= L_test:
    limit = min(batch_end, L_test) # bruhhhh this is it

    print('batch_end:', batch_end)
    print('round_test:', round_test)
    
    # ha a limit az L-t választja, akkor azt jelenti, hogy végig értünk az adathalmazon, done = True
    # nem tanulunk itten
    if limit == L_test:
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('HELLO')
        print('limit:', limit, 'L:', L_test)
        batch_raw = data_test[batch_start:limit]

        # filter
        batch = batch_raw[['close_normalized', 'volume_normalized']]

        # okkay, i got the data - the state
        # convert it to numpy array, then it becomes the observation
        observation = np.array(batch)
        observation_3d = observation.reshape(1, 64, 2)

        action = agent_test.choose_action(observation_3d, testing=True)
        print('action in non-learning:', action)

        # sell
        if action == -1:
            # done = True
            transactions_test = transactions_test.sort_values(by=['round'])
            last_sell_index = transactions_test.loc[transactions_test['action'] == -1].last_valid_index()

            close = 0
            value = 0
            cost = 0
            qty = 0

            if last_sell_index == None:
                valid_records = transactions_test.loc[transactions_test['action'] == 1]

                if valid_records.empty:
                    close = data_test['close'].iloc[limit-1]
                    value = 0
                    cost = 0
                    qty = 0
                else:
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    # qty = valid_records['qty'].sum()
                    close = data_test['close'].iloc[limit-1]
                    qty = valid_records['qty'].sum()
                    value = qty * close
                    cost = value * bp
                    
                    current_balance = current_balance + (value - cost)

                    # profit += close


            else:
                valid_interval_data = transactions_test.iloc[last_sell_index:len(transactions_test)]

                # filter valid_interval_data where action == 1
                valid_records = valid_interval_data.loc[valid_interval_data['action'] == 1]

                if valid_records.empty:
                    close = 0
                    value = 0
                    cost = 0
                    qty = 0
                else:
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    # qty = valid_records['qty'].sum()
                    close = data_test['close'].iloc[limit-1]
                    qty = valid_records['qty'].sum()
                    value = qty * close
                    cost = value * bp
                    
                    current_balance = current_balance + (value - cost)

                    # profit += close

            # okkay do dict
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': qty, 'crt_blc': current_balance, 'round': round_test}

              # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions_test = transactions_test.append(s, ignore_index=True)

        # hold
        elif action == 0:
              # get current number of stocks (qty i hold = hold_qty)
            # get index of last sell
            transactions_test = transactions_test.sort_values(by=['round'])
            last_sell_index = transactions_test.loc[transactions_test['action'] == -1].last_valid_index()
            hold_qty = 0

            # close = value of held stocks
            close = 0
            value = 0
            cost = 0

            # last_sell_index == None: nem volt még eladás, akkor mi legyen
            if last_sell_index == None:
                # ha nincsen sell, akkor minden okés, az egész transactionst filterezzük
                valid_records = transactions_test.loc[transactions_test['action'] == 1]
                
                # ha nincsen buy
                if valid_records.empty:
                    hold_qty = 0
                    close = data_test['close'].iloc[limit-1]
                    value = 0
                    cost = 0
                else:
                    # hold_qty = valid_records['qty'].sum()
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    hold_qty = valid_records['qty'].sum()
                    close = data_test['close'].iloc[limit-1]
                    value = valid_records['value'].sum()
                    cost = 0
            else:
                # hold_qty = transactions['qty'].iloc[last_sell_index:len(transactions)-1].sum(axis=1)
                valid_interval_data = transactions_test.iloc[last_sell_index:len(transactions_test)]

                # filter valid_interval_data where action == 1
                valid_records = valid_interval_data.loc[valid_interval_data['action'] == 1]

                # ha nincsen buy 
                if valid_records.empty:
                    hold_qty = 0
                    close = data_test['close'].iloc[limit-1]
                    value = 0
                    cost = 0
                else:
                    # get sum of these records' qty
                    # hold_qty = valid_records['qty'].sum()
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    hold_qty = valid_records['qty'].sum()
                    close = data_test['close'].iloc[limit-1]
                    value = hold_qty * close
                    cost = 0

            # dict to record action parameters
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': hold_qty, 'crt_blc': current_balance, 'round': round_test}

            # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions_test = transactions_test.append(s, ignore_index=True)

        # buy
        elif action == 1:
            close = data_test['close'].iloc[limit-1]

            buy_qty = math.floor((current_balance / 5) / close)
            value = buy_qty * close
            cost = value * bp

            current_balance = current_balance - (value + cost)

            # dict to record action parameters
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': buy_qty, 'crt_blc': current_balance, 'round': round_test}

            # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions_test = transactions_test.append(s, ignore_index=True)

        break
    
    # a limit belefér az adathalmazban itt folytathatjuk a batch-et, done = False
    else:
        batch_raw = data_test[batch_start:limit]

        # filter
        batch = batch_raw[['close_normalized', 'volume_normalized']]

        observation = np.array(batch)

        observation_3d = observation.reshape(1, 64, 2)

        # berakni a modellbe, várni az outputot
        action = agent_test.choose_action(observation_3d, testing=True)
        print('action in learning:', action)

        # sell
        if action == -1:
            # sell everything back to the last sell that was a buy
            # get index of last sell
            transactions_test = transactions_test.sort_values(by=['round']) # ascending or not?
            print('transactions_test df in -1')
            print(transactions_test)
            last_sell_index = transactions_test.loc[transactions_test['action'] == -1].last_valid_index()
            print('last_sell_index:', last_sell_index)

            close = 0
            value = 0
            cost = 0
            qty = 0

            # még nem volt eladás
            if last_sell_index == None:
                valid_records = transactions_test.loc[transactions_test['action'] == 1]
                
                # nincsen vétel sem
                if valid_records.empty:
                    close = data_test['close'].iloc[limit-1]
                    value = 0
                    cost = 0
                    qty = 0
                else:
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    close = data_test['close'].iloc[limit-1]
                    qty = valid_records['qty'].sum()
                    value = qty * close
                    cost = value * bp
                    
                    current_balance = current_balance + (value - cost)

                    # profit += close


            else:
                valid_interval_data = transactions_test.iloc[last_sell_index:len(transactions_test)] #-1

                # filter valid_interval_data where action == 1
                valid_records = valid_interval_data.loc[valid_interval_data['action'] == 1]

                if valid_records.empty:
                    close = 0
                    value = 0
                    cost = 0
                    qty = 0
                else:
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    close = data_test['close'].iloc[limit-1]
                    qty = valid_records['qty'].sum()
                    value = qty * close
                    cost = value * bp
                    
                    current_balance = current_balance + (value - cost)

                    # profit += close

            # okkay do dict
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': qty, 'crt_blc': current_balance, 'round': round_test}

              # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions_test = transactions_test.append(s, ignore_index=True)

        # hold
        elif action == 0:
            # get current number of stocks (qty i hold = hold_qty)
            # get index of last sell
            transactions_test = transactions_test.sort_values(by=['round'])
            last_sell_index = transactions_test.loc[transactions_test['action'] == -1].last_valid_index()
            hold_qty = 0

            # close = value of held stocks
            close = 0
            value = 0
            cost = 0

            # last_sell_index == None: nem volt még eladás, akkor mi legyen
            if last_sell_index == None:
                # ha nincsen sell, akkor minden okés, az egész transactionst filterezzük
                valid_records = transactions_test.loc[transactions_test['action'] == 1]
                
                # ha nincsen buy
                if valid_records.empty:
                    hold_qty = 0
                    close = data_test['close'].iloc[limit-1]
                    value = 0 # hold_qty * close = 0
                    cost = 0 # value * bp = 0
                else:
                    hold_qty = valid_records['qty'].sum()
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    close = data_test['close'].iloc[limit-1]
                    value = valid_records['value'].sum()
                    cost = 0
            else:
                # hold_qty = transactions['qty'].iloc[last_sell_index:len(transactions)-1].sum(axis=1)
                valid_interval_data = transactions_test.iloc[last_sell_index:len(transactions_test)]

                # filter valid_interval_data where action == 1
                valid_records = valid_interval_data.loc[valid_interval_data['action'] == 1]

                # ha nincsen buy 
                if valid_records.empty:
                    hold_qty = 0
                    close = data_test['close'].iloc[limit-1]
                    value = 0
                    cost = 0
                else:
                    # get sum of these records' qty
                    hold_qty = valid_records['qty'].sum()
                    # close = (valid_records['close'] * valid_records['qty']).sum()
                    close = data_test['close'].iloc[limit-1]
                    value = hold_qty * close
                    cost = 0

            # current_balance nem változik

            # dict to record action parameters
            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': hold_qty, 'crt_blc': current_balance, 'round': round_test}

            # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions_test = transactions_test.append(s, ignore_index=True)


        # buy
        elif action == 1:
            # dict to record action parameters
            close = data_test['close'].iloc[limit-1]

            buy_qty = math.floor((current_balance / 5) / close)
            value = buy_qty * close
            cost = value * bp

            current_balance = current_balance - (value + cost)

            dicti = {'action': action, 'close': close, 'value': value, 'cost': cost, 'qty': buy_qty, 'crt_blc': current_balance, 'round': round_test}

            # convert dict to pandas Series
            s = pd.Series(dicti)

            # add Series to transactions df - df = df.append
            transactions_test = transactions_test.append(s, ignore_index=True)

        # at the end get new batch (state), ez az eltolásos módszer
        batch_start += 1  
        batch_end += 1

        # lets not record epsilon value 
        # eps_history.append(agent.epsilon)
    round_test = round_test + 1

    print('current_balance:', current_balance)

print('Testing done!')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
round_test: 1180
actionM: -1
action in learning: -1
transactions_test df in -1
      action      close          value  ...     qty        crt_blc   round
0        1.0   4.170000   19999.320000  ...  4796.0   79980.680680     0.0
1        0.0   4.130000   19999.320000  ...  4796.0   79980.680680     1.0
2        1.0   4.300000   15996.000000  ...  3720.0   63968.684680     2.0
3       -1.0   4.470000   38066.520000  ...  8516.0  101997.138160     3.0
4       -1.0   0.000000       0.000000  ...     0.0  101997.138160     4.0
...      ...        ...            ...  ...     ...            ...     ...
1175     1.0  30.100000    1745.800000  ...    58.0    7106.319179  1175.0
1176     1.0  32.209999    1417.239956  ...    44.0    5687.661983  1176.0
1177     1.0  30.480000    1127.760000  ...    37.0    4558.774223  1177.0
1178    -1.0  32.720001  163436.404995  ...  4995.0  167831.742813  1178.0
1179    -1.0   0.000000       0

## Save test results and transactions log

In [None]:
# god help me to save this shit
from google.colab import drive
drive.mount('/content/gdrive')

# agent.save_model()

transactions_test_file = '/content/gdrive/My Drive/Colab Notebooks/saved_files/transactions_test_' + ticker + '.csv'
transactions_test_file = '/content/gdrive/My Drive/Colab Notebooks/saved_files/transactions_test_AMD_data_VOO_model.csv'

# save transactions and profit to file
transactions_test.to_csv(transactions_test_file, index=False)

# add current_balance to file
print(current_balance)

with open(transactions_test_file, 'a') as trans_file:
    text = '\n' + 'Current balance:' + str(current_balance)
    trans_file.write(text)
print('Write OK')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
27542.04949144795
Write OK
