# Tools module

In [1]:
import numpy as np
import theano
import theano.tensor as T

import matplotlib.pyplot as plt
%matplotlib inline



In [57]:
np.arange(1, 5+1)

array([1, 2, 3, 4, 5])

In [84]:
def simple_moving_average(input, moving_average_window) :
    cumsums = input.cumsum(axis=0)
    ma_arr = (cumsums[moving_average_window:] - cumsums[:-moving_average_window]) / moving_average_window
    return T.concatenate([cumsums[:moving_average_window] / (T.arange(1, moving_average_window+1).reshape(shape=(-1, 1))),\
                          ma_arr])

ma_var = T.matrix('(t, whatever) matrix')
moving_average_window = T.scalar('moving_average_window', dtype='int64')
simple_moving_average_fun = theano.function([ma_var, moving_average_window], simple_moving_average(ma_var, moving_average_window),\
                                           allow_input_downcast=True)

In [92]:
def FIP(input, fip_window):
    geq = (input[1:] - input[:-1] > 0).astype('float32')
    geq = T.concatenate([T.zeros((1, input.shape[1])), geq])
    cumsums = geq.cumsum(axis=0)
    ma_arr = (cumsums[fip_window:] - cumsums[:-fip_window]) / fip_window
    return T.concatenate([cumsums[:fip_window] / (T.arange(1, fip_window+1).reshape(shape=(-1, 1))),\
                          ma_arr])


fip_var = T.matrix('(t, whatever) matrix')
fip_window = T.scalar('fip_window', dtype='int64')
FIP_fun = theano.function([fip_var, fip_window], FIP(fip_var, fip_window),\
                                           allow_input_downcast=True)

In [None]:
### Slicing dataset

In [None]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [None]:
def dinamic_train_cut(features_tensor, target, i, window, offset=0):
    kh = i - offset
    train_tensor = np.copy(features_tensor[(kh - window + 1):(kh + 1), :])
    target = np.copy(target[(kh - window + 1):(kh + 1), :])
    return train_tensor, target

In [None]:
from sklearn.utils import shuffle
def ranker_unpack(features_tensor, target, sampling_size):
    X = np.empty((0, features_tensor.shape[-1]))
    y = np.empty(0,)
    for i in range(100, target.shape[0]):
        daily_target_slice = target[i, :]
        idxs = np.where(np.isfinite(daily_target_slice))[0]
        daily_target_slice = daily_target_slice[idxs]
        daily_features_slice = features_tensor[i, idxs, :]
        slice_argsort = daily_target_slice.argsort().argsort()
        tops = np.where(slice_argsort > idxs.shape[0] - 1 - sampling_size)[0]
        bottoms = np.where(slice_argsort < sampling_size)[0]
        X = np.concatenate([X, daily_features_slice[tops, :] - daily_features_slice[bottoms, :]])
        X = np.concatenate([X, -daily_features_slice[tops, :] + daily_features_slice[bottoms, :]])
        y = np.concatenate([y, np.ones(tops.shape[0])])
        y = np.concatenate([y, np.zeros(tops.shape[0])])

    return  shuffle(X, y)
    

In [None]:
def regressor_unpack(features_tensor, target, sampling_size):
    X = np.empty((0, features_tensor.shape[-1]))
    y = np.empty(0,)
    for i in range(100, target.shape[0]):
        daily_target_slice = target[i, :]
        idxs = np.where(np.isfinite(daily_target_slice))[0]
        daily_target_slice = daily_target_slice[idxs]
        daily_features_slice = features_tensor[i, idxs, :]
        slice_argsort = daily_target_slice.argsort().argsort()
        tops = np.where(slice_argsort > idxs.shape[0] - 1 - sampling_size)[0]
        bottoms = np.where(slice_argsort < sampling_size)[0]
        X = np.concatenate([X, daily_features_slice[tops, :], daily_features_slice[bottoms, :]])
        y = np.concatenate([y, daily_target_slice[tops], daily_target_slice[bottoms]])

    return shuffle(X, y)
    

In [None]:
def shares_norebalance(shares):
    for col in range(shares.shape[1]):
        for i in range(shares.shape[0]):
            if (not ((i == 0 or shares[i, col] == 0 and shares[i - 1, col] != 0) or\
                                        (shares[i, col] != 0 and shares[i - 1, col] == 0) or\
                                        (shares[i, col] * shares[i - 1, col] < 0))):
                shares[i, col] = shares[i - 1, col]
    return shares

### Reading files

In [None]:
def read_data(filename, args, start_idx=None):
    pd_df = pd.read_csv(filename, **args)
    if start_idx:
        pd_df = pd_df.loc[pd_df.index > start_idx]
    return np.array(pd_df)


### Numpy tools

In [None]:
def relative_scaler(data_matrix):
    return (data_matrix - np.nanmean(data_matrix, axis=1, keepdims=True)) / np.nanstd(data_matrix, axis=1, keepdims=True)

In [None]:
def weighted_futret(returns, window):
    perf = np.empty(shape=returns.shape, dtype='float32')
    perf[:] = np.nan
    alpha = -np.log(0.5) / window * 2
    weights = np.exp(-alpha * np.array(range(window)))
    weights /= weights.sum()
    
    for i in range(perf.shape[0] - window):
        prop_returns = returns[(i+1):(i+window+1), :] * weights.reshape(-1, 1)
        perf[i, :] = prop_returns.sum(axis=0)

    return perf

In [None]:
def numpy_matrix_sma(data_matrix, n):
    df = np.copy(data_matrix)
    for i in range(df.shape[-1]):
        arr = df[:, i]
        idxs = np.where(np.isfinite(df[:, i]))[0]
        arr = arr[idxs].reshape(-1, 1)
        df[:, i] = np.nan
        if arr.shape[0] >= n:
            df[idxs[n-1:], i] = simple_moving_average_fun(arr, n).ravel()[n-1:]
    return df

def numpy_matrix_fip(data_matrix, n):
    df = np.copy(data_matrix)
    for i in range(df.shape[-1]):
        arr = df[:, i]
        idxs = np.where(np.isfinite(df[:, i]))[0]
        arr = arr[idxs].reshape(-1, 1)
        df[:, i] = np.nan
        if arr.shape[0] >= n:
            df[idxs[n-1:], i] = FIP_fun(arr, n).ravel()[n-1:]
    return df