In [1]:
import pandas as pd
import random
import numpy as np
from random import shuffle

BUY_ACTION_TOP = 0.4
SELL_ACTION_LOW = 0.6
T1_start = 10
T2_start = 1000
MARKET = 'EURUSD_4H'
dataset = pd.read_csv('./Data/{}_Cleaned.csv'.format(MARKET))
MAX_DAYS_IN_POSITION = 20


class Environment():
  def __init__(self, config):
    self.dataset = dataset
    self.config = config
    self.time = 0
    self.maxtime = len(self.dataset)
    self.orders = []

  # actions : [sell witch token, percent, period]
  # 1 : token A (€)
  # 2 : token B ($)
  # 0 : do nothing
  def take_action(self, action, budget):
    # print('Start budget at day #{} : {}'.format(self.time, budget))
    price = self.dataset.loc[self.time, 'open']
    token_a_change, token_b_change = self.rebalance()
    budget[0] += token_a_change
    budget[1] += token_b_change
    budget_update = budget
    terminal = False
    future_networth = 0
    current_networth = budget[0] * price + budget[1]
    reward = 0
    # print('Budget after rebalance at day #{} : {}'.format(self.time, budget))

    if 0 <= action[0] < BUY_ACTION_TOP: # euro -> dollar
      percent = action[0] / BUY_ACTION_TOP
      percent = 1 - percent
      token_a = budget[0]
      token_b = budget[1]
      token_a_to_sell = token_a * percent # we have number of 'token a' that agent wants to sell
      token_b_to_buy = token_a_to_sell * price
      budget_update = [token_a - token_a_to_sell, token_b + token_b_to_buy]
      duration = int(action[1] * MAX_DAYS_IN_POSITION)
      self.orders.append([action[0], token_b_to_buy, duration])
      future_networth = self.future(action[0], token_b_to_buy, self.time+duration, budget)
      reward = future_networth - current_networth
      # print('Budget after take action(1) at day #{} : {} with {} percent'.format(self.time, budget_update, percent))

    elif SELL_ACTION_LOW < action[0] <= 1: # dollar -> euro
      percent = (action[0] - SELL_ACTION_LOW) / BUY_ACTION_TOP
      token_a = budget[0]
      token_b = budget[1]
      token_b_to_sell = token_b * percent # we have number of 'token b' that agent wants to sell
      token_a_to_buy = token_b_to_sell / price
      budget_update = [token_a + token_a_to_buy, token_b - token_b_to_sell]
      duration = int(action[1] * MAX_DAYS_IN_POSITION)
      self.orders.append([action[0], token_a_to_buy, duration])
      future_networth = self.future(action[0], token_a_to_buy, self.time+duration, budget)
      reward = future_networth - current_networth
      # print('Budget after take action(2) at day #{} : {} with {} percent'.format(self.time, budget_update, percent))


    self.time += 1
    next_state = self.next_state(budget_update)

    if self.time == self.maxtime:
      terminal = True
    self
    return budget_update, next_state, (reward), terminal
      

  def reset(self):
    self.time = 0
    self.orders = []
    budget = [T1_start, T2_start]
    s = np.array(self.next_state(budget))
    return s

  def future(self, action, token, time, budget):
    token_a_change = 0
    token_b_change = 0
    price = self.dataset.loc[time, 'open']
    if 0 <= action <= BUY_ACTION_TOP:
      token_a_change = token / price
      token_b_change = -1 * token
    elif SELL_ACTION_LOW <= action <= 1:
      token_b_change = token * price
      token_a_change = -1 * token

    token_a_future = budget[0] + token_a_change
    token_b_future = budget[1] + token_b_change
    future_networth = token_a_future * price + token_b_future
    return future_networth


  def get_networth(self, budget):
    price = self.dataset.loc[self.time, 'open']
    return budget[0] * price + budget[1]
  
  
  def rebalance(self):
    token_a_change = 0
    token_b_change = 0
    for item in self.orders:
      if self.time == item[2]:
        price = self.dataset.loc[self.time, 'open']
        if 0 <= item[0] <= BUY_ACTION_TOP:
          token_a_change = item[1] / price
          token_b_change = -1 * item[1]
        elif SELL_ACTION_LOW <= item[0] <= 1:
          token_b_change = item[1] * price
          token_a_change = -1 * item[1]

    return token_a_change, token_b_change

  def next_state(self, budget):
    open = self.dataset.loc[self.time, 'open']
    close = self.dataset.loc[self.time, 'close']
    high = self.dataset.loc[self.time, 'high']
    low = self.dataset.loc[self.time, 'low']
    body_atr = self.dataset.loc[self.time , 'bodyatr_buck']
    up_atr = self.dataset.loc[self.time , 'upperatr_buck']
    low_atr = self.dataset.loc[self.time , 'loweratr_buck']
    # obs = [open, close, high, low, body_atr, up_atr, low_atr, budget[0], budget[1]]
    obs = [close, body_atr, up_atr, low_atr, budget[0], budget[1]]
    return obs



In [2]:
from scipy import stats


class Agent():
  def __init__(self, init_budget):
    self.budget = init_budget

  def generate_action(self):
    action = [0, 0]
    # rand = random.random()
    mean = 0.5
    std = 0.2
    desired_mean = 0.5
    desired_std = 0.1
    range_min = 0
    range_max = 1
    random_number = self.transform_normal_sample(mean, std, desired_mean, desired_std, range_min, range_max)
    # print('Random: {}'.format(random_number))
    rand = self.unormal(random_number)
    if 0 <= rand <= BUY_ACTION_TOP:
      action[0] = rand
      action[1] = random.random()
    elif SELL_ACTION_LOW <= rand < 1:
      action[0] = rand
      action[1] = random.random()
    else:
      action[0] = rand
      action[1] = 0
    return action
  
  def transform_normal_sample(self, mean, std, desired_mean, desired_std, range_min, range_max):
    # Generate one sample from the original normal distribution
    sample = np.random.normal(mean, std)

    # Apply standardization
    transformed_sample = (sample - mean) / std

    # Adjust for desired mean and standard deviation
    transformed_sample = transformed_sample * desired_std + desired_mean

    # Clip the value to the desired range
    transformed_sample = np.clip(transformed_sample, range_min, range_max)

    return transformed_sample



  def unormal(self, num):
    if 0 < num < 0.5:
      return 0.5 - num
    elif 0.5 <= num < 1:
      return 1.5 - num
    elif num < 0:
      return 0
    return 1
    

In [3]:
config = {}
env = Environment(config)
init_budget = [T1_start, T2_start]
agent = Agent(init_budget)

# todo: Loop for iteration and collect 
trajectory_no = 5000
timestep_no = 300
trajectories = []

# random episodes
for i in range(3200):
    starting_point = random.randint(1, len(dataset) - timestep_no - 20)
    env.time = starting_point
    traj_dict = {}
    obs = []
    next_observation = []
    action = []
    reward = []
    terminal = []
    
    for j in range(timestep_no):
        obs.append(env.next_state(agent.budget))
        act = agent.generate_action()
        action.append(act)
        bu, ns, r, t = env.take_action(act, agent.budget)
        reward.append(r)
        terminal.append(t)
        next_observation.append(ns)
        agent.budget = bu

    obs_np = np.array(obs)
    obs_norm = obs_np/obs_np.max(axis=0)
    next_obs_np = np.array(next_observation)
    next_obs_norm = next_obs_np/next_obs_np.max(axis=0)
    reward_np = np.array(reward)
    # reward_norm = reward_np/reward_np.max(axis=0)
    
    traj_dict = {
        'observations' : obs_norm.tolist(),
        'next_observations' : next_obs_norm.tolist(),
        'actions' : action,
        'rewards': reward,
        'terminals' : terminal
    }

    trajectories.append(traj_dict)
    env.reset()
    networth = env.get_networth(agent.budget) 
    agent.budget = init_budget
    print(i)
    print('Final Budget: {}'.format(networth))

# good episodes
for i in range(900):
    starting_point = random.randint(1, len(dataset) - timestep_no - 20)
    env.time = starting_point
    traj_dict = {}
    obs = []
    next_observation = []
    action = []
    reward = []
    terminal = []
    reward_tresh = random.randint(-15, 3)

    done_loop = False
    j = 0
    if_counter = 0
    while not done_loop:
        act = agent.generate_action()
        # print(env.time)
        bu, ns, r, t = env.take_action(act, agent.budget)

        if (r < reward_tresh):
            env.time += -1
            if_counter += 1
            if if_counter < 20:
                continue
            else:
                if_counter = 0
        obs.append(env.next_state(agent.budget))
        action.append(act)
        reward.append(r)
        terminal.append(t)
        next_observation.append(ns)
        agent.budget = bu
        j += 1
        if j == timestep_no :
            done_loop = True
    # for k in range(len(reward)-2, -1, -1):
    #     reward[k] += reward[k+1]
    obs_np = np.array(obs)
    obs_norm = obs_np/obs_np.max(axis=0)
    next_obs_np = np.array(next_observation)
    next_obs_norm = next_obs_np/next_obs_np.max(axis=0)
    reward_np = np.array(reward)
    # reward_norm = reward_np/reward_np.max(axis=0)
    
    traj_dict = {
        'observations' : obs_norm.tolist(),
        'next_observations' : next_obs_norm.tolist(),
        'actions' : action,
        'rewards': reward,
        'terminals' : terminal
    }

    trajectories.append(traj_dict)
    env.reset()
    networth = env.get_networth(agent.budget) 
    agent.budget = init_budget
    print(i)
    print('Final Budget: {}'.format(networth))

# bad episodes
for i in range(900):
    starting_point = random.randint(1, len(dataset) - timestep_no - 20)
    env.time = starting_point
    traj_dict = {}
    obs = []
    next_observation = []
    action = []
    reward = []
    terminal = []
    reward_tresh = random.randint(-3, 15)
    # print(reward_tresh)

    done_loop = False
    j = 0
    if_counter = 0
    while not done_loop:
        act = agent.generate_action()
        bu, ns, r, t = env.take_action(act, agent.budget)
        if (r > reward_tresh):
            env.time += -1
            if_counter += 1
            if if_counter < 20:
                continue
            else:
                if_counter = 0
        obs.append(env.next_state(agent.budget))
        action.append(act)
        reward.append(r)
        terminal.append(t)
        next_observation.append(ns)
        agent.budget = bu
        j += 1
        # print(j)
        if j == timestep_no :
            done_loop = True

    obs_np = np.array(obs)
    # print(obs)
    # print('prev: {}'.format(reward))
    # for k in range(len(reward)-2, -1, -1):
    #     reward[k] += reward[k+1]
    # print('after: {}'.format(reward))
    obs_norm = obs_np/obs_np.max(axis=0)
    next_obs_np = np.array(next_observation)
    next_obs_norm = next_obs_np/next_obs_np.max(axis=0)
    reward_np = np.array(reward)
    # reward_norm = reward_np/reward_np.max(axis=0)
    
    traj_dict = {
        'observations' : obs_norm.tolist(),
        'next_observations' : next_obs_norm.tolist(),
        'actions' : action,
        'rewards': reward,
        'terminals' : terminal
    }

    trajectories.append(traj_dict)
    env.reset()
    networth = env.get_networth(agent.budget) 
    agent.budget = init_budget
    print(i)
    print('Final Budget: {}'.format(networth))


random.shuffle(trajectories)
# shuffle(trajectories)
# for ii, sublist in enumerate(trajectories): shuffle(trajectories[ii])

print(trajectories[0])

import csv
filename = './Data/{}_trajectories_5.csv'.format(MARKET)

myFile = open(filename, 'w')
writer = csv.writer(myFile)
writer.writerow(['observations', 'next_observations', 'actions', 'rewards', 'dones'])
for dictionary in trajectories:
    writer.writerow(dictionary.values())
myFile.close()

0
Final Budget: 2077.791955321202
1
Final Budget: 2470.2269128035214
2
Final Budget: 2316.794987277519
3
Final Budget: 2107.5415878353156
4
Final Budget: 2132.1529453516177
5
Final Budget: 2359.522840887553
6
Final Budget: 2153.9190835532877
7
Final Budget: 2136.2646603178537
8
Final Budget: 2211.969061888828
9
Final Budget: 1986.762740285744
10
Final Budget: 2308.51646838138
11
Final Budget: 2080.855416531046
12
Final Budget: 2361.6126656026963
13
Final Budget: 2137.2214026938664
14
Final Budget: 2049.4697546065013
15
Final Budget: 2040.55449844503
16
Final Budget: 2081.931584755539
17
Final Budget: 2144.1686498610425
18
Final Budget: 2041.779734463041
19
Final Budget: 2118.6084190160427
20
Final Budget: 2203.683965702529
21
Final Budget: 2256.61683370289
22
Final Budget: 2142.0599908414533
23
Final Budget: 2149.579244189622
24
Final Budget: 2134.2655723822313
25
Final Budget: 2364.6077562677947
26
Final Budget: 2177.535578840999
27
Final Budget: 2309.6926263616906
28
Final Budget: 20