In [66]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from macro_agent import MacroAgent, ReplayMemory, QNetwork, Transition
import torch
import torch.optim
import torch.nn as nn
from mpl_finance import candlestick_ohlc
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import datetime as datetime
from datetime import datetime
import matplotlib.dates as mdates
from tqdm import tqdm

In [95]:
CAPACITY = 500
INIT_EPSILON = 0.4
GAMMA_DISCOUNT = 0.8
EPOCH_COUNT = 500
MINI_BATCH_SIZE = 10
WINDOW_SIZE = 20
ACTIONS={"HOLD":0, "SELL":1, "BUY":2}

In [97]:
class Environment:
    
    def __init__(self, all_data):
        self._all_data = all_data
        self.current_market = all_data.iloc[0]
        self._pointer = 0

        self.done = False
        pass

    def taken_action(self):
        self.current_market = self._all_data.iloc[self._pointer + 1]
        self._pointer += 1
        if self._pointer >= len(self._all_data):
            self.done = True
        pass
    
    def get_price(self):
        return self.current_market['Close']

    def get_change_zscore(self, column):
        start_idx = np.max([0, self._pointer - WINDOW_SIZE])
        frame = self._all_data.iloc[start_idx:self._pointer]
        market_now = self._all_data.iloc[self._pointer]
        PC = market_now[column] / np.mean(frame[column]) - 1

        PCs = np.empty_like(frame[column])
        for i in range(start_idx, self._pointer):
            start_frame = np.max([0, i - 20])
            end_frame = i
            PCs[i - start_idx] = self._all_data.iloc[end_frame][column] / np.mean(
                self._all_data.iloc[start_frame:end_frame][column]) - 1

        z_score_price_change = (PC - np.mean(PCs)) / np.std(PCs) 
        return z_score_price_change

    def get_EMA(self, t):
        start_idx = np.max([0, self._pointer - WINDOW_SIZE])
        g = 2 * self._all_data.iloc[t] / (WINDOW_SIZE + 1)
        ex = (100 - 2/(WINDOW_SIZE + 1))
        EMA = g + np.mean(self._all_data.iloc[start_idx:t]) * ex
        return EMA
        

    def get_indicators(self):
        start_idx = np.max([0, self._pointer - WINDOW_SIZE])
        market_now = self._all_data.iloc[self._pointer]
        frame = self._all_data.iloc[start_idx:self._pointer]
        # price
        z_score_price = (market_now['Close'] - 
            np.mean(frame['Close'])) / np.std(frame['Close'])
        # price change
        z_score_price_change = self.get_change_zscore('Close')
        # volume
        z_score_volume = (market_now['Volume'] - 
            np.mean(frame['Close'])) / np.std(frame['Close'])
        # volume change
        z_score_volume_change = self.get_change_zscore('Volume')
        # Volatility
        volatility = (self.get_EMA(self._pointer) - 
        self.get_EMA(self._pointer - WINDOW_SIZE))/ self.get_EMA(self._pointer - WINDOW_SIZE)


def decay_epsilon(cur_epsilon):
    return cur_epsilon * 0.9

In [72]:
def load_data():
    market_data = pd.read_json('RESULT.json')
    market_data.rename(columns={1:'Open',2:'High', 3:'Low', 4:'Close', 5:'Volume'}, inplace=True)
    market_data[0] = market_data[0].transform(datetime.fromtimestamp)
    market_data.set_index([0], inplace=True)
    market_data.sort_index(inplace=True)
    market_data = market_data[(market_data.index >= '2018-11-15 00:00:00') & (market_data.index <= '2018-11-17 17:06:00')]
    return market_data

def get_train_data(market_data):
    return market_data[market_data.index <= '2018-11-16 00:00:00']

def get_test_data(market_data):
    return market_data[market_data.index >= '2018-11-16 00:00:00']
    

In [5]:
replay_memory = ReplayMemory(CAPACITY)
macro_agent = MacroAgent()
optimizer = torch.optim.Adam(macro_agent.q_network.parameters())
criterion = nn.MSELoss()

In [6]:
for epoch in tqdm(range(EPOCH_COUNT)):
    done = False
    cur_epsilon = INIT_EPSILON
    environment = Environment([])
    while not done:
        macro_agent.q_network.eval()
        decision = np.random.rand()
        if decision < cur_epsilon:
            # epsilon
            action = np.random.choice(3)
        else:
            # 1-epsilon
            action = np.argmax(macro_agent.q_network())
        

        environment.taken_action()
        cur_price = environment.current_price
        if action == ACTIONS['SELL']:
            earning = np.sum(np.ones_like(macro_agent.assets) * cur_price - macro_agent.assets)
        elif action == ACTIONS['BUY']:
            np.append(macro_agent.assets, cur_price)
            
        s_next = environment.current_state['state']
        r_current = environment.current_state['reward']
        done = environment.current_state['done']

        replay_memory.push(environment.current_state)
        
        macro_agent.q_network.train()
        batch = replay_memory.sample(MINI_BATCH_SIZE)
        
        q = np.empty(MINI_BATCH_SIZE)
        for i, object in enumerate(batch):
            if not object['done']:
                q[i] = object['reward'] + GAMMA_DISCOUNT * macro_agent.q_network.forward()
            else:
                q[i] = object['reward']
        
        loss = criterion(q, macro_agent.q_network.forward())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        cur_epsilon = decay_epsilon(cur_epsilon)

TypeError: __init__() missing 1 required positional argument: 'all_data'

1


1.5841396395949376

8.881784197001253e-17

In [86]:
STD

0.000526356444078581

In [80]:
z_score_price_change

2.207746151691497

In [81]:
(market_now['Close'] - 
            np.mean(frame['Close'])) / np.std(frame['Close'])

2.207746151691383

In [None]:
fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.2)
candlestick_ohlc(ax,quotes = zip(mdates.date2num(market_data[0].transform(lambda x: datetime.fromtimestamp(x))),
                         market_data[1], market_data[2],
                         market_data[3], market_data[4]),
                 width=0.6)
plt.show()

In [74]:
market_data.iloc[10]['Close']

6347.99