In [1]:
import random
import itertools

import gym
import numpy as np

from keras.layers.core import Dense
from keras.layers import InputLayer
from keras.models import Sequential
from sklearn import ensemble

import lstm

Using TensorFlow backend.


projects/harambe-6/locations/global/keyRings/harambe-6-dev/cryptoKeys/harambe-6-dev-key


In [2]:
def reward_agent(env, num_episodes=500):
    q_table = np.zeros((5, 2))
    y = 0.95
    eps = 0.5
    lr = 0.8
    decay_factor = 0.999
    for i in range(num_episodes):
        s = env.reset()
        eps *= decay_factor
        done = False
        while not done:
            # print(s)
            # print(q_table)
            # select the action with highest cummulative reward
            if np.random.random() < eps or np.sum(q_table[s, :]) == 0:
                a = np.random.randint(0, 2)
            else:
                a = np.argmax(q_table[s, :])
            # pdb.set_trace()
            # print(a)
            new_s, r, done, _ = env.step(a)
            q_table[s, a] += r + lr * (y * np.max(q_table[new_s, :]) - q_table[s, a])
            s = new_s
    return q_table

In [3]:
def reward_agent_keras(env, num_episodes=500):
    model = Sequential()
    model.add(InputLayer(batch_input_shape=(1, 5)))
    model.add(Dense(10, activation='sigmoid'))
    model.add(Dense(2, activation='linear'))
    model.compile(loss='mse', optimizer='adam', metrics=['mae'])

    y = 0.95
    eps = 0.5
    decay_factor = 0.999
    r_avg_list = []
    for i in range(num_episodes):
        s = env.reset()
        eps *= decay_factor
        done = False
        r_sum = 0
        while not done:
            if np.random.random() < eps:
                a = np.random.randint(0, 2)
            else:
                a = np.argmax(model.predict(np.identity(5)[s:s + 1]))
            new_s, r, done, _ = env.step(a)
            target = r + y * np.max(model.predict(np.identity(5)[new_s:new_s + 1]))
            target_vec = model.predict(np.identity(5)[s:s + 1])[0]
            target_vec[a] = target
            model.fit(np.identity(5)[s:s + 1], target_vec.reshape(-1, 2), epochs=1, verbose=0)
            s = new_s
            r_sum += r
        r_avg_list.append(r_sum / num_episodes)
    return r_avg_list

In [4]:
env = gym.make("NChain-v0")
print(reward_agent(env, 100))

[[55.71752222 55.87102867]
 [55.18226486 57.02198783]
 [67.0745028  56.97468817]
 [73.20648333 59.37902097]
 [80.54734582 62.49020854]]


In [None]:
keras_env = gym.make("NChain-v0")
print(reward_agent_keras(keras_env, 100))

In [6]:
def init_account(cash=100000):
    return {
        "cash": cash,
        "portfolio": {}
    }


def buy_all(account, share_price, symbol):
    num_shares = 0 if account["cash"] <= 0 else account["cash"] // share_price
    account["cash"] -= num_shares * share_price
    account["portfolio"][symbol] = num_shares
    return account


def short_all(account, share_price, symbol):
    num_shares = 0 if account["cash"] <= 0 else account["cash"] // share_price
    account["cash"] += num_shares * share_price
    account["portfolio"][symbol] = -num_shares
    return account


def close(account, share_price, symbol):
    num_shares = account["portfolio"][symbol]

    if num_shares > 0:
        # selling
        account["cash"] += num_shares * share_price
    else:
        # buying back short
        account["cash"] -= -num_shares * share_price
        
    account["portfolio"][symbol] = 0
    return account

def get_account_value(account, current_prices):
    value = account["cash"]
    
    for symbol in account["portfolio"]:
        value += account["portfolio"][symbol] * current_prices[symbol]
        
    return value
    

In [7]:
def calculate_action(clf, state, pred_diff, denorm_diff, eps):
    if 0 <= state <= 3:
        predictions = [clf.predict([[pred_diff, denorm_diff, i]])[0] for i in range(1, 4)]

        if random.random() < eps or any(p == 0 for p in predictions):
            return random.randint(1, 3)
        else:
            return predictions.index(max(predictions)) + 1
    else:
        predictions = [clf.predict([[pred_diff, denorm_diff, i]])[0] for i in range(0, 2)]

        if random.random() < eps or any(p == 0 for p in predictions):
            return random.randint(0, 1)
        else:
            return predictions.index(max(predictions))


def add_reward(clf, conditions, rewards, pred_diff, denorm_diff, action, reward):
    # print(f"add reward for {action}: {reward}")
    np.append(conditions, [pred_diff, denorm_diff, action])
    np.append(rewards, [reward])
    return clf.fit(conditions, rewards)

In [8]:
def simulate_trades(frames, model, action_mapping):
    account = init_account()
    last_state = 4
    # 0 = close, 1 = do nothing, 2 = buy, 3 = short
    last_action = 1
    last_account_value = get_account_value(account, {})

    # clf = ensemble.GradientBoostingRegressor()
    # clf.fit(conditions, rewards)

    for i, current_frame in enumerate(frames[1:]):
        previous_frame = frames[i-1]
        
        current_normalized_frame = lstm.normalize_frame(current_frame)
        previous_normalized_frame = lstm.normalize_frame(previous_frame)
    
        current_prediction = lstm.predict_sequences_multiple(model, [current_normalized_frame])
        previous_prediction = lstm.predict_sequences_multiple(model, [previous_normalized_frame])
        
        current_prediction_denorm = lstm.denormalize_dim(current_prediction[0][0], current_frame[0][0])
        
        pred_diff = current_prediction[0][0] - previous_prediction[0][0]

        
        has_position = "XYZ" in account["portfolio"] and account["portfolio"]["XYZ"] != 0
        # TODO: split coefficient
        share_price = float(current_frame[-1][0])
        
#         if pred_diff > 0 and current_prediction_denorm > share_price:
#             if not has_position:
#                 print("BUY")
#                 account = buy_all(account, share_price, "XYZ")
#         elif pred_diff < 0 and current_prediction_denorm < share_price:
#             if not has_position:
#                 print("SHORT")
#                 account = short_all(account, share_price, "XYZ")
#         elif has_position:
#             print("CLOSING")
#             account = close(account, share_price, "XYZ")

        if not has_position:
            # can only hold, buy, or short
            if pred_diff > 0:
                if current_prediction_denorm > share_price:
                    state = 0
                else:
                    state = 1
            else:
                if current_prediction_denorm > share_price:
                    state = 2
                else:
                    state = 3
        else:
            # can only close or hold
            state = 4
        
        action = action_mapping[state]
#         print(state)
#         print(action)
#         print(reward_table)
        
        if action == 0:
            # close
            account = close(account, share_price, "XYZ")
        elif action == 2:
            # buy
            account = buy_all(account, share_price, "XYZ")
        elif action == 3:
            # short
            account = short_all(account, share_price, "XYZ")

        # reward = get_account_value(account, {"XYZ": share_price}) - last_account_value
        # reward += 0.8 * (0.95 * np.max(reward_table[state, :]) - reward_table[last_state, last_action])
#         clf = add_reward(
#             clf,
#             conditions,
#             rewards,
#             pred_diff,
#             current_prediction_denorm - share_price,
#             last_action,
#             reward
#         )

        last_state = state
        last_action = action
        last_account_value = get_account_value(account, {"XYZ": share_price})
        
            
    return account

In [9]:
SYMBOL="MSFT"

# get testing data
times = lstm.get_time_series_daily(SYMBOL, ["1. open"], outputsize="full")
vectors = lstm.times_to_vectors(times, include_time=False)[::-1]

# TODO: split at dates
train_vectors, test_vectors = lstm.partition_data(vectors, partition_coefficient=0.8)

train_frames = lstm.get_frames(train_vectors, 15, with_target=True)
test_frames = lstm.get_frames(test_vectors, 15, with_target=False)

train_no_dates = [[[col for col in vector] for vector in frame] for frame in train_frames]
normalized_train = lstm.normalize_frames(train_no_dates)
x_train, y_train = lstm.seperate_xy(normalized_train)

In [10]:
# setup model
model = lstm.setup_lstm_model(x_train, y_train)

compilation time :  0.01971912384033203
Train on 4055 samples, validate on 214 samples
Epoch 1/1


In [17]:
num_states = 5
num_actions = 4
action_mappings = list(
    filter(
        lambda p: not any(a < 1 for a in p[:4]) and p[4] < 2,
        itertools.permutations([i for i in range(num_states)])
    )
)
print(action_mappings)

max_mapping = action_mappings[0]
max_account_value = 0

for mapping in action_mappings:
    account = simulate_trades(test_frames, model, mapping)
    account_value = get_account_value(account, {"XYZ": float(test_frames[-1][-1][0])})
    
    print(account_value)
    
    if account_value > max_account_value:
        max_account_value = account_value
        max_mapping = mapping

# print(max_account_value)
# print(reward_table)
print(max_mapping)
print(max_account_value)

[(0, 0, 0, 0, 0), (0, 0, 0, 0, 1), (0, 0, 0, 1, 1), (0, 0, 1, 1, 1), (0, 1, 1, 1, 1), (1, 1, 1, 1, 1)]


KeyError: 'XYZ'