In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from scipy.special import softmax
import math
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
import os
import torch.nn.functional as F

# Defining constants

In [2]:
DATASETS = [
    '/home/uriel/Documentos/Dataset/cryptocurrencypricehistory/bitcoin_price.csv',
    '/home/uriel/Documentos/Dataset/cryptocurrencypricehistory/ethereum_price.csv',
    '/home/uriel/Documentos/Dataset/cryptocurrencypricehistory/litecoin_price.csv'
    ]

CHECKPOINTS_BASES = [
    'checkpoints/bitcoin_simulator',
    'checkpoints/ethereum_simulator',
    'checkpoints/litecoin_simulator'
]

VALIDATION_REGISTERS = 90
SEQUENCES_LENGTH = 30
PREDICTIONS_LENGTH = 1


# Read datasets

# Read data

In [3]:
dfs = []

for dataset in DATASETS:
    dfs.append(pd.read_csv(dataset))

## Transforming data

In [4]:
MONTHS = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

In [5]:
for idx, df in enumerate(dfs):
    df = df.add_suffix('_{}'.format(idx))
    df['Year'] = df['Date_{}'.format(idx)].apply(lambda x: int(x.split(',')[1]))
    df['Month'] = df['Date_{}'.format(idx)].apply(lambda x: x.split(',')[0].split()[0])
    df['Month'] = df['Month'].apply(lambda x: MONTHS.index(x) + 1)
    df['Day'] = df['Date_{}'.format(idx)].apply(lambda x: int(x.split(',')[0].split()[1]))
    dfs[idx] = df.sort_values(['Year', 'Month', 'Day'], ascending=True)

## Join data

In [6]:
df = pd.concat([df.set_index(['Year', 'Month', 'Day']) for df in dfs], axis=1).reset_index()
df.head()

Unnamed: 0,Year,Month,Day,Date_0,Open_0,High_0,Low_0,Close_0,Volume_0,Market Cap_0,...,Close_1,Volume_1,Market Cap_1,Date_2,Open_2,High_2,Low_2,Close_2,Volume_2,Market Cap_2
0,2013,4,28,"Apr 28, 2013",135.3,135.98,132.1,134.21,-,1500520000,...,,,,"Apr 28, 2013",4.3,4.4,4.18,4.35,-,73773400
1,2013,4,29,"Apr 29, 2013",134.44,147.49,134.0,144.54,-,1491160000,...,,,,"Apr 29, 2013",4.37,4.57,4.23,4.38,-,74952700
2,2013,4,30,"Apr 30, 2013",144.0,146.93,134.05,139.0,-,1597780000,...,,,,"Apr 30, 2013",4.4,4.57,4.17,4.3,-,75726800
3,2013,5,1,"May 01, 2013",139.0,139.89,107.72,116.99,-,1542820000,...,,,,"May 01, 2013",4.29,4.36,3.52,3.8,-,73901200
4,2013,5,2,"May 02, 2013",116.38,125.6,92.28,105.21,-,1292190000,...,,,,"May 02, 2013",3.78,4.04,3.01,3.37,-,65242700


In [7]:
df = df[~df.isnull().any(axis=1)]

In [8]:
len(df)

929

## Partitioning data

In [9]:
columns = []

for idx in range(len(DATASETS)):
    columns.extend(['Open_{}'.format(idx), 'Close_{}'.format(idx)])
    

values = df[columns].values

In [10]:
split_idx = -VALIDATION_REGISTERS

train = values[:split_idx]
test = values[split_idx:]

print('Train data size: {}'.format(train.shape))
print('Test data size: {}'.format(test.shape))

Train data size: (839, 6)
Test data size: (90, 6)


In [11]:
print('Train NAN values:{} \t Test NAN values:{}'.format(
    np.count_nonzero(np.isnan(train)),
    np.count_nonzero(np.isnan(test))
))

Train NAN values:0 	 Test NAN values:0


## Build sequences

In [12]:
def build_sequences(data, sequences_length, predictions_length):
    inputs = []
    outputs = []
    
    for idx in range(len(data) - sequences_length - predictions_length):
        # Preprocess iputs ad outputs
        # Normalizing
        inps = data[idx: idx + sequences_length]
        outs = data[idx + sequences_length: idx + sequences_length + predictions_length]
        # Inputs
        inputs.append( inps )
        # Outputs
        outputs.append( outs )
        
    return np.array(inputs), np.array(outputs)

In [13]:
x_train = []
y_train = []

x_test = []
y_test = []

for idx in range(len(DATASETS)):
    x_train_seq, y_train_seq = build_sequences(train[:, 2 * idx: 2 * idx + 2], SEQUENCES_LENGTH, PREDICTIONS_LENGTH )
    x_train.append(x_train_seq)
    y_train.append(y_train_seq)
    
    x_test_seq, y_test_seq = build_sequences(test[:, 2 * idx: 2 * idx + 2], SEQUENCES_LENGTH, PREDICTIONS_LENGTH )
    x_test.append(x_test_seq)
    y_test.append(y_test_seq)
    

## Normalizing data

In [14]:
def normalize_data(x, y):
    
    x = x[:, :, 1] /  x[:, :, 0]
    y = y[:, :, 1] / y[:, :, 0]
    
        
    x = np.expand_dims(x, axis=-1)
    y = np.expand_dims(y, axis=-1)
    
    
    return x, y

In [15]:
for idx in range(len(DATASETS)):
    x_train[idx], y_train[idx] = normalize_data(x_train[idx], y_train[idx])
    x_test[idx], y_test[idx] = normalize_data(x_test[idx], y_test[idx])

In [16]:
for idx in range(len(DATASETS)):
    print('--------------- Dataset {} -------------------'.format(idx))
    print(x_train[idx][:3], y_train[idx][:3])

--------------- Dataset 0 -------------------
[[[1.00301356]
  [0.93300922]
  [1.01516544]
  [0.99619557]
  [1.02288719]
  [0.98440503]
  [0.9921106 ]
  [1.00586832]
  [0.98501111]
  [0.98716921]
  [0.998027  ]
  [0.81836157]
  [1.00447556]
  [1.03724108]
  [0.98818781]
  [0.99024327]
  [0.99040715]
  [0.9228004 ]
  [1.05493407]
  [1.01688581]
  [0.99433754]
  [1.02981753]
  [0.99235586]
  [0.99504132]
  [1.00414648]
  [0.99070616]
  [1.00548173]
  [0.99066806]
  [1.01355514]
  [1.02093831]]

 [[0.93300922]
  [1.01516544]
  [0.99619557]
  [1.02288719]
  [0.98440503]
  [0.9921106 ]
  [1.00586832]
  [0.98501111]
  [0.98716921]
  [0.998027  ]
  [0.81836157]
  [1.00447556]
  [1.03724108]
  [0.98818781]
  [0.99024327]
  [0.99040715]
  [0.9228004 ]
  [1.05493407]
  [1.01688581]
  [0.99433754]
  [1.02981753]
  [0.99235586]
  [0.99504132]
  [1.00414648]
  [0.99070616]
  [1.00548173]
  [0.99066806]
  [1.01355514]
  [1.02093831]
  [1.02116064]]

 [[1.01516544]
  [0.99619557]
  [1.02288719]
  [0.

# Definitions for agent

## Simulators components

In [17]:
class PricePredictor(nn.Module):        
    def __init__(self, dropout=0.2):        
        super(PricePredictor, self).__init__()
        
        self.batch_norm_0 = nn.BatchNorm1d(SEQUENCES_LENGTH)
        self.rnn_0 = nn.GRU(x_train[0].shape[2], 35, batch_first=True, num_layers=1) 
        self.linear_0 = nn.Linear(35, 64)
        #self.activation_0 = nn.Tanh()
        self.dropout_0 = nn.Dropout(dropout)
        
        self.batch_norm_1 = nn.BatchNorm1d(SEQUENCES_LENGTH)
        self.rnn_1 = nn.GRU(64, 35, batch_first=True, num_layers=1) 
        self.linear_1 = nn.Linear(35, y_train[0].shape[2])
        self.activation_1 = nn.Sigmoid()        
        
    def forward(self, src):  
        output = src
        output = self.batch_norm_0(output)
        output, _ = self.rnn_0(output)
        output = self.linear_0(output)
        #output = self.activation_0(output)
        output = self.dropout_0(output)
        
        
        output = self.batch_norm_1(output)
        output, _ = self.rnn_1(output)
        output = self.linear_1(output[:, -PREDICTIONS_LENGTH:])
        output = self.activation_1(output)
        
        # output = output.flatten()
        
        return output

In [18]:
MODEL_STATE_DICT = 'model_state_dict'
OPTIMIZER_STATE_DICT = 'optimizer_state_dict'
EPOCH_STATE = 'epoch'
TRAIN_LOSS_HISTORY_STATE = 'train_loss_history'
VAL_LOSS_HISTORY_STATE = 'val_loss_history'
BEST_VAL_LOSS_STATE = 'best_val_loss'

In [19]:
def load_model_state(filename, model, optimizer=None):
    checkpoint = torch.load(filename)
    model.load_state_dict(checkpoint[MODEL_STATE_DICT])
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint[OPTIMIZER_STATE_DICT])
    last_epoch = checkpoint[EPOCH_STATE]
    train_loss_history = checkpoint[TRAIN_LOSS_HISTORY_STATE]
    val_loss_history = checkpoint[VAL_LOSS_HISTORY_STATE]
    best_val_loss = checkpoint[BEST_VAL_LOSS_STATE]

    return model, optimizer, last_epoch, train_loss_history, val_loss_history, best_val_loss

In [20]:
def save_model_state(filename, model, optimizer, epoch, train_loss_history, val_loss_history, best_val_loss):
    torch.save({
        MODEL_STATE_DICT: model.state_dict(),
        OPTIMIZER_STATE_DICT: optimizer.state_dict(),
        EPOCH_STATE: epoch,
        TRAIN_LOSS_HISTORY_STATE: train_loss_history,
        VAL_LOSS_HISTORY_STATE: val_loss_history,
        BEST_VAL_LOSS_STATE: best_val_loss
    }, filename)

## Agent components

In [21]:
class Simulator():
    def __init__(self):
        pass
    
    def get_cost(self):
        raise NotImplementedError('get_next_cost not implemented yet')   

In [22]:
class PriceSimulator(Simulator):
    def __init__(self, model):
        super(PriceSimulator, self).__init__()
        self.model = model
        self.model.eval()
        self.invested_fractions = 0
        
    def get_ratio(self, last_variations):
        variations = last_variations / last_variations[0, 0, 0] - 1
        variations = torch.from_numpy( variations ).float().to(device)
        
        with torch.no_grad():
            
            prediction = self.model(variations).cpu().numpy()
            prediction = prediction.squeeze(axis=(1, 2))
            #prediction = prediction + 1) * last_variations[0, 0, 0]
            # Scale variation to [0, 2]
            prediction = prediction * 2
            
        
        return prediction

In [23]:
class Environment():
    def __init__(self, x, y, simulators, batch_size=10, initial_amount=1000, max_loss=0.3):
        self.x = x
        self.y = y
        self.simulators = simulators
        self.initial_amount = initial_amount
        self.current_amount = None
        self.batch_size = batch_size
        self.min_amount = initial_amount * (1 - max_loss)
        
        self.indices = None
        self.state = None        
        self.steps = None
        
    def restart(self):
        self.steps = 0
        self.current_amount = self.initial_amount
        self.indices = np.arange(0, self.batch_size)
        predictions = []
        
        for idx, simulator in enumerate(self.simulators):
            prediction = simulator.get_ratio(self.x[idx][self.indices])
            predictions.append(prediction)
            
        predictions.append(np.ones(predictions[0].shape))
                
        predictions = np.array(predictions).T
        invested = np.array([[0] * len(simulators) + [self.initial_amount]] * self.batch_size)
        amounts = np.array([self.initial_amount] * self.batch_size)
        self.state = (predictions, invested, amounts)
        return self.state
    
    def naive_evaluate(self, actions):
        repeated = np.repeat(actions, self.batch_size, axis=0).reshape(
                    actions.shape[0], 
                    self.batch_size, 
                    actions.shape[1]
                    )
        
        
        q_values = []
        
        for action in repeated:
            q_value = self.get_fee(self.state, action)
            next_state = self.get_next_state(action)
            q_value = q_value + self.get_cost(next_state)
            q_values.append(q_value)
            
        return np.array(q_values)    
            
    
    def evaluate(self, actions):
        
        repeated = np.repeat(actions, self.batch_size, axis=0).reshape(
                    actions.shape[0], 
                    self.batch_size, 
                    actions.shape[1]
                    )
        
        fees = (repeated[:, :, :-1] > 0).sum(axis=-1)
        
        next_states = np.stack( [self.state[2]] * actions.shape[0] )
        
        asset_investment = repeated * next_states[:, :, np.newaxis]
        
        real_ratios = np.array([ 
            self.y[idx][self.indices].squeeze((1, 2)) for idx in range(len(simulators)) ] + 
            [np.ones(self.y[0][self.indices].shape).squeeze((1, 2))]
        ).T
        
        real_ratios = np.stack([real_ratios] * actions.shape[0])
        costs = next_states - (asset_investment * real_ratios).sum(axis=-1)
        
        q_values = fees + costs
        
        del repeated
        del fees
        del next_states
        del asset_investment
        del real_ratios
        del costs
        
        return q_values
    
    def step(self, action):
        self.steps += 1
        
        q_value = self.get_fee(self.state, action) 
        next_state = self.get_next_state(action)
        q_value = q_value + self.get_cost(next_state)        
        self.state, self.indices = self.update_state(next_state, q_value)
        
        done = (self.state[2] < self.min_amount).sum() > 0 or (self.steps + 1) * self.batch_size >= self.x[0].shape[0]
        
        return self.state, q_value, done, self.steps
        
    def get_next_state(self, action):        
        invested = action * self.state[2][:, np.newaxis]
        return (self.state[0], np.array(invested), self.state[2])
    
    def update_state(self, next_state, q_value):
        indices = np.arange(
            self.steps * self.batch_size, 
            min((self.steps + 1) * self.batch_size, self.x[0].shape[0])
        )
        predictions = []
        
        for idx, simulator in enumerate(self.simulators):
            prediction = simulator.get_ratio(self.x[idx][indices])
            predictions.append(prediction)
        
        predictions.append(np.ones(predictions[0].shape))
        predictions = np.array(predictions).T
        amount = self.state[2] - q_value
        
        return (predictions, next_state[1], amount), indices
        
            
    #Dollar per movement
    def get_fee(self, state, action):
        return (action[:, :-1] > 0).sum(axis=-1)
        
    def get_cost(self, next_state): 
        
        real_ratios = np.array([ 
            self.y[idx][self.indices].squeeze() for idx in range(len(simulators)) ] + 
            [np.ones(self.y[0][self.indices].shape).squeeze()]
        )
        
        return next_state[2] - (next_state[1] * real_ratios.T).sum(axis=-1)

## Environment

In [24]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [25]:
simulators = []

for checkpoint in CHECKPOINTS_BASES:
    model, _, _, _, _, _ = load_model_state(os.path.join(checkpoint, 'best.pt'), PricePredictor())
    model.to(device)
    simulators.append(PriceSimulator(model))

In [26]:
STEP = 100
actions = []

for i in range(0, 100 + STEP, STEP):
    for j in range(0, 100 + STEP, STEP):
        for k in range(0, 100 + STEP, STEP):
            for l in range(0, 100 + STEP, STEP):
                if (i + j + k + l) == 100:
                    actions.append([i / 100, j / 100, k / 100, l / 100])

In [27]:
actions = np.array(actions)

In [28]:
actions.shape

(4, 4)

In [31]:
INITIAL_AMOUNT = 1000
Q_FACTOR_SCALER = 100

env = Environment(x_test, y_test, simulators, batch_size=1, initial_amount=INITIAL_AMOUNT, max_loss=1)
state = env.restart()
state

(array([[1.2894907, 1.0272001, 1.0842849, 1.       ]]),
 array([[   0,    0,    0, 1000]]),
 array([1000]))

In [32]:
state[0]

array([[1.2894907, 1.0272001, 1.0842849, 1.       ]])

In [33]:
actions

array([[0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [1., 0., 0., 0.]])

In [34]:
state_action_map = {0:3, 1:2, 2:1, 1:2}

In [36]:
np.argmax(state[0])

0

In [38]:
action = actions[state_action_map[np.argmax(state[0])]]
action

array([1., 0., 0., 0.])

## Real

In [40]:
INITIAL_AMOUNT = 1000
Q_FACTOR_SCALER = 10
model.eval()
env = Environment(x_test, y_test, simulators, batch_size=1, initial_amount=INITIAL_AMOUNT, max_loss=1)
state = env.restart()

for epoch in range(x_test[0].shape[0]):
    action = actions[state_action_map[np.argmax(state[0])]]
    state, q_value, done, step = env.step([action])
        
    print('Day:{}    Action:{}    Current amount:{}'.format(
        step,
        pred_action, 
        state[2]
        ), flush=True)

    if done:
        state = env.restart()
        break

TypeError: list indices must be integers or slices, not tuple