# **CODE SETUP (importing basic packages, reading data)**

In [1]:
#Installing missing packages
!pip install yfinance
!pip install poloniex
!pip install enviroment

!pip install tensorflow
!pip install tf_agents
!pip install keras
!pip install keras-rl2

!pip install process_data
!pip install normalizer

!pip install random
!pip install gym
!pip install math
!pip install pathlib

Collecting yfinance
  Downloading yfinance-0.1.70-py2.py3-none-any.whl (26 kB)
Collecting lxml>=4.5.1
  Downloading lxml-4.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.4 MB)
[K     |████████████████████████████████| 6.4 MB 6.0 MB/s 
Collecting requests>=2.26
  Downloading requests-2.27.1-py2.py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 366 kB/s 
Installing collected packages: requests, lxml, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests~

In [2]:
# Basic Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import yfinance as yf
import datetime


In [3]:
#Setting up dates
start = datetime.datetime(2020,1,1) 
end = datetime.datetime(2021,11,30)

# **BTC Data Import and Normalization**
Sorry can't the github ref we're using doesn't show how to normalize using yfinance; it uses poloniex. Feel free to change this part tho if u find a way to normalize the data !!!

In [4]:
# Getting BTC Data from Poloniex

from poloniex import Poloniex
from time import time
polo = Poloniex()

# period = 1800 means 30 min intervals; period = 3600 means 1 hr intervals
# x in start=int(time()) - 60 * 60 * 24 *30 * x) is no. of months

# Test data -- getting 3 months of data from present 
candles = polo.returnChartData('USDT_BTC', period=1800, start=int(time()) - 60 * 60 * 24 *30 * 3)

# Train data -- getting 12 months of data from start of test 
# candles = polo.returnChartData('USDT_BTC', period=1800, start=int(time()) - 60 * 60 * 24 *30 * 15, 
#                                end=int(time()) - 60 * 60 * 24 *30 * 3)

df = pd.DataFrame.from_dict(candles)
dates = pd.to_datetime(df['date'],unit='s')

results = pd.DataFrame(dates.apply(lambda x: x.strftime('%Y-%m-%d %I-%p')))
results['close'] = df.close
results['open'] = df.open
results['low'] = df.low
results['high'] = df.high
results['volume'] = df.volume
results.to_csv('/content/data/test/BTC_USD_30min.csv')

In [5]:
# Normalizing the Data using sklearn

from rl.core import Processor
from rl.util import WhiteningNormalizer
from sklearn.preprocessing import MinMaxScaler, StandardScaler

ADDITIONAL_STATE = 4
# class NormalizerProcessor(Processor):
class NormalizerProcessor:
    def __init__(self):
        self.scaler = StandardScaler()
        self.normalizer = None

    def process_state_batch(self, batch):
        batch_len = batch.shape[0]
        k = []
        for i in range(batch_len):
            observe = batch[i][..., :-ADDITIONAL_STATE]
            observe = self.scaler.fit_transform(observe)
            agent_state = batch[i][..., -ADDITIONAL_STATE:]
            temp = np.concatenate((observe, agent_state),axis=1)
            temp = temp.reshape((1,) + temp.shape)
            k.append(temp)
        batch = np.concatenate(tuple(k))
        return batch

# **Feature Extraction**

In [6]:
# Extracting features from the data

from sklearn.preprocessing import MinMaxScaler

class FeatureExtractor:
    def __init__(self, df):
       self.df = df
       self.open = df['open'].astype('float')
       self.close = df['close'].astype('float')
       self.high = df['high'].astype('float')
       self.low = df['low'].astype('float')
       self.volume = df['volume'].astype('float')

    def add_bar_features(self):
        #stationary candle
        self.df['bar_hc'] = self.high - self.close
        self.df['bar_ho'] = self.high - self.open
        self.df['bar_hl'] = self.high - self.low
        self.df['bar_cl'] = self.close - self.low
        self.df['bar_ol'] = self.open - self.low
        self.df['bar_co'] = self.close - self.open
        self.df['ret_mean'] = self.df['close'].rolling(50).mean() - self.df['close']
        self.df['ret_std'] = self.df['ret_mean']/self.df['ret_mean'].rolling(50).std()
        #self.df['bar_mov3'] = self.df['close'] - self.df['close'].shift(3)
        #self.df['bar_mov9'] = self.df['close'] - self.df['close'].shift(9)
        return self.df

# **Creating the model environment (trading rules, helper functions)**

In [7]:
import process_data
import random
import gym
from gym import spaces
from gym.utils import seeding
import math
from pathlib import Path

# position constant
LONG = 0
FLAT = 2

# action constant
BUY = 0
SELL = 1
HOLD = 2

In [8]:
class OhlcvEnv(gym.Env):
    def __init__(self, window_size, path, show_trade=True):
        self.show_trade = show_trade
        self.path = path
        self.actions = ["BUY",  "SELL", "HOLD"]
        self.fee = 0.0005
        self.seed()
        self.file_list = []
        # load_csv
        self.load_from_csv()

        # n_features
        self.window_size = window_size
        self.n_features = self.df.shape[1]
        self.shape = (self.window_size, self.n_features+4)

        # defines action space
        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)

    # data is taken from https://www.cryptodatadownload.com/data/northamerican/
    def load_from_csv(self):
        if(len(self.file_list) == 0):
            self.file_list = [x.name for x in Path(self.path).iterdir() if x.is_file()]
            self.file_list.sort()
        self.rand_episode = self.file_list.pop()
        
        dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %I-%p')

        raw_df= pd.read_csv(self.path + self.rand_episode, 
            parse_dates=['date'], date_parser=dateparse)

        raw_df = raw_df.sort_values(by='date').reset_index(drop=True)
        #extractor = process_data.FeatureExtractor(raw_df)
        extractor = FeatureExtractor(raw_df)
        self.df = extractor.add_bar_features() # bar features o, h, l, c ---> C(4,2) = 4*3/2*1 = 6 features

        ## selected manual fetuares
        feature_list = [
            'bar_hc',
            'bar_ho',
            'bar_hl',
            'bar_cl',
            'bar_ol',
            'bar_co', 'ret_std']
        #feature_list = ['ret_std']
        self.df.dropna(inplace=True) # drops Nan rows
        self.closingPrices = self.df['close'].values
        self.returnStd = self.df['ret_std'].values
        self.dates = self.df['date'].values
        self.df = self.df[feature_list].values

    def render(self, mode='human', verbose=False):
        return None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):

        if self.done:
            return self.state, self.reward, self.done, {}
        self.reward = 0

        self.closingPrice = float(self.closingPrices[self.current_tick])
        self.datetime= self.dates[self.current_tick]

        # action comes from the agent
        # 0 buy, 1 sell, 2 hold
        # single position can be opened per trade
        # valid action sequence would be
        # LONG : buy - hold - hold - sell
        # invalid action sequence is just considered hold
        # (e.g.) "buy - buy" would be considred "buy - hold"
        self.action = HOLD  # hold
        if action == BUY: # buy
            if self.position == FLAT: # if previous position was flat
                self.position = LONG # update position to long
                self.action = BUY # record action as buy
                self.entry_price = self.closingPrice # maintain entry price
        elif action == SELL: # vice versa for short trade
            if  self.position == LONG:
                self.position = FLAT
                self.action = SELL
                self.exit_price = self.closingPrice
                self.reward += ((self.exit_price - self.entry_price)/self.entry_price + 1)*(1-self.fee)**2 - 1
                self.krw_balance = self.krw_balance * (1.0 + self.reward)
                self.n_long += 1
                self.entry_price = 0

        # [coin + krw_won] total value evaluated in krw won
        if(self.position == LONG):
            temp_reward = ((self.closingPrice - self.entry_price)/self.entry_price + 1)*(1-self.fee)**2 - 1
            new_portfolio = self.krw_balance * (1.0 + temp_reward)
        else:
            temp_reward = 0
            new_portfolio = self.krw_balance

        self.portfolio = new_portfolio
        self.current_tick += 1
        if(self.show_trade and self.current_tick%100 == 0):
            print("Tick: {0}/ Portfolio (krw-won): {1}".format(self.current_tick, self.portfolio))
            print("Long: {0}".format(self.n_long))
        self.updateState()
        self.history.append((self.action, self.datetime, self.closingPrice, self.portfolio, self.reward))
        if (self.current_tick > (self.df.shape[0]) - self.window_size-1):
            self.done = True
            self.reward = self.get_profit() # return reward at end of the game
        return self.state, self.reward, self.done, {}

    def get_profit(self):
        if(self.position == LONG):
            profit = ((self.closingPrice - self.entry_price)/self.entry_price + 1)*(1-self.fee)**2 - 1
        else:
            profit = 0
        return profit

    def reset(self):
        # self.current_tick = random.randint(0, self.df.shape[0]-1000)
        self.current_tick = 0
        print("start episode ... {0} at {1}" .format(self.rand_episode, self.current_tick))

        # positions
        self.n_long = 0

        # clear internal variables
        self.history = [] # keep buy, sell, hold action history
        self.krw_balance =  10000 # initial balance, u can change it to whatever u like
        self.portfolio = float(self.krw_balance) # (coin * current_price + current_krw_balance) == portfolio
        self.profit = 0

        self.action = HOLD
        self.position = FLAT
        self.done = False

        self.updateState() # returns observed_features +  opened position(LONG/SHORT/FLAT) + profit_earned(during opened position)
        return self.state

    def updateState(self):
        def one_hot_encode(x, n_classes):
            return np.eye(n_classes)[x]
        prev_position = self.position
        one_hot_position = one_hot_encode(prev_position,len(self.actions))
        profit = self.get_profit()
        # append two
        self.state = np.concatenate((self.df[self.current_tick], one_hot_position, [profit]))
        return self.state

    def save_history(self, name = 'trade_history.csv'):
        df = pd.DataFrame(self.history, columns=['action','ts','price','portfolio','reward'])
        df.to_csv(name)

# **Main Code**

In [18]:
# import keras
# from keras.models import Sequential
# from keras.layers import Dense, Activation, Flatten, CuDNNLSTM, LSTM, Conv1D, MaxPooling1D
# from keras.optimizers import Adam

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, LSTM, Conv1D, MaxPooling1D
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.optimizers import Adam

# keras-rl agent
import keras
from rl.agents.dqn import DQNAgent
# from tf_agents.agents.dqn import dqn_agent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.memory import SequentialMemory

# Obsolete -- find new
# trader environment
# from enviroment import OhlcvEnv
# custom normalizer
# from normalizer import NormalizerProcessor

def create_model(shape, nb_actions):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=6, padding="same", activation="tanh",input_shape=shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=32, kernel_size=3, padding="same", activation="tanh"))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    #model.add(Dense(nb_actions, activation='sigmoid'))
    model.add(Dense(nb_actions, activation='softmax'))
    return model

def main():
    # OPTIONS
    ENV_NAME = 'OHLCV-v0'
    TIME_STEP = 20

    # Get the environment and extract the number of actions.
    PATH_TRAIN = "./data/train/"
    PATH_TEST = "./data/test/"
    env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN)
    env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST)

    # random seed
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    model = create_model(shape=env.shape, nb_actions=nb_actions)
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics!
    memory = SequentialMemory(limit=1000, window_length=TIME_STEP)
    # policy = BoltzmannQPolicy()
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
            attr='eps', value_max=1., value_min=.2, value_test=.05, nb_steps=3000)
    # policy = EpsGreedyQPolicy()

    # enable the dueling network
    # you can specify the dueling_type to one of {'avg','max','naive'}
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=200,
                   enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy)
                    # processor=NormalizerProcessor())

    # dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=200,
    #            target_model_update=1e-2, policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['crossentropy'])

    while True:
        # train
        # dqn.load_weights('model/duel_dqn_weights-a-2.h5f')
        dqn.fit(env, nb_steps=17511 * 10, nb_max_episode_steps=17511,
          visualize=False, verbose=2)
        # dqn.save_weights('model/duel_dqn_weights-a-2.h5f', overwrite=True)
        #try:
            # validate
        info = dqn.test(env_test, nb_episodes=1, visualize=True)

        env.save_history()
        # n_long, n_short, total_reward, portfolio = info['n_trades']['long'], info['n_trades']['short'], info[
        #     'total_reward'], int(info['portfolio'])
        # np.array([info]).dump(
        #     './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format(ENV_NAME, portfolio, n_long, n_short,
        #                                                             total_reward))
        # dqn.save_weights(
        #     './model/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format(ENV_NAME, portfolio, n_long, n_short, total_reward),
        #     overwrite=True)
        # except KeyboardInterrupt:
        #     continue
        break

# **Execute**

In [19]:
if __name__ == '__main__':
  main()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 20, 64)            4288      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 10, 64)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 10, 32)            6176      
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 5, 32)            0         
 1D)                                                             
                                                                 
 flatten (Flatten)           (None, 160)               0         
                                                                 
 dense (Dense)               (None, 3)                 4

  super(Adam, self).__init__(name, **kwargs)


Training for 175110 steps ...
start episode ... BTC_USD_30min.csv at 0


  updates=self.state_updates,


Tick: 100/ Portfolio (krw-won): 9832.875107231877
Long: 18
Tick: 200/ Portfolio (krw-won): 9916.206776231853
Long: 33
Tick: 300/ Portfolio (krw-won): 9626.425751601886
Long: 47
Tick: 400/ Portfolio (krw-won): 9071.808862115195
Long: 68
Tick: 500/ Portfolio (krw-won): 9046.481365319602
Long: 85
Tick: 600/ Portfolio (krw-won): 8988.544879600988
Long: 103
Tick: 700/ Portfolio (krw-won): 8813.473410469693
Long: 120
Tick: 800/ Portfolio (krw-won): 8831.22601052435
Long: 139
Tick: 900/ Portfolio (krw-won): 8706.510619583582
Long: 156
Tick: 1000/ Portfolio (krw-won): 8440.143269340755
Long: 168
Tick: 1100/ Portfolio (krw-won): 8291.578643621286
Long: 186
Tick: 1200/ Portfolio (krw-won): 8338.540817604202
Long: 198
Tick: 1300/ Portfolio (krw-won): 8317.466924316937
Long: 218
Tick: 1400/ Portfolio (krw-won): 8204.078799086354
Long: 238
Tick: 1500/ Portfolio (krw-won): 8173.9634845860855
Long: 253
Tick: 1600/ Portfolio (krw-won): 7935.090335555682
Long: 266
Tick: 1700/ Portfolio (krw-won): 7906.

# **Scratch**

In [None]:
dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %I-%p')

raw_df= pd.read_csv('data/test/BTC_USD_1h.csv', 
    parse_dates=['date'], date_parser=dateparse)
raw_df = raw_df.sort_values(by='date').reset_index(drop=True)

print(raw_df[0:10])