In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.losses import kld
from tensorflow.keras.utils import to_categorical

from CustomModels import utils 
from CustomModels import MyModel_list
import CustomModels.metrics as metrics
import CustomModels.rank_metrics as rank_metrics

from tensorflow.keras import Model
# from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dense,Input,BatchNormalization,Dropout#,CuDNNLSTM,CuDNNGRU
# from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

from sklearn.model_selection import train_test_split
import time

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))


# 2. Neural network model

 Setting up necessary functions and sanity-checking dataset, actual thing starts at chapter 1. <br>
 By Sokolov Gleb <br>
 Part 2 of job application @ ООО ИК QBF

## 2.0 Setup

In [50]:
tf.__version__,tf.keras.__version__

('1.12.0', '2.1.6-tf')

In [2]:
# wrapper for tf - to - keras metrics usage
def as_keras_metric(method):
    import functools
    @functools.wraps(method)
    def wrapper(self, args, **kwargs):
        """ Wrapper for turning tensorflow metrics into keras metrics """
        value, update_op = method(self, args, **kwargs)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([update_op]):
            value = tf.identity(value)
        return value
    return wrapper

In [3]:
# experimenting with metrics, custom tf metrics is a no go at the moment for Keras 
ndcg_metric = as_keras_metric(metrics.make_ranking_metric_fn(metrics.RankingMetricKey().DCG))

In [4]:
# Testing set at given linear regression problem,things are getting explained in part 1.
m = 1000
n = 5
f = 1
qs = ['date_'+str(i) for i in range(m)]
qs = np.array(sorted(qs*n))
scores = np.random.random(m*n)
features = (2*scores+np.random.normal(0,.1,m*n)).reshape((f,-1))
#features = np.random.random(m*n*f).reshape((f,-1))
features = [fi.astype('float32') for fi in features]
feature_names = ['F'+str(i+1) for i in range(f)]

In [5]:
#Sample dataframe data example 
df = pd.DataFrame(dict(zip(['Q_id','Score',*feature_names],[qs,scores,*features])))
df.head(6)

Unnamed: 0,Q_id,Score,F1
0,date_0,0.985924,1.882903
1,date_0,0.154298,0.313252
2,date_0,0.799794,1.71373
3,date_0,0.335992,0.63729
4,date_0,0.175667,0.494688
5,date_1,0.667605,1.312209


In [6]:
# either this or next block should be run
#groupng stocks at each trading day (Queue)
d = df.groupby('Q_id').groups.values()
X = np.array([df.loc[g,feature_names].values.T[0] for g in d])
y = np.array([df.loc[g,'Score'].values for g in d])

In [7]:
# Do simple all in one multi-dimensional space
X = df[feature_names].values
y = df.Score.values.reshape((-1,1))

## 2.1. Adaptation of ListNet for ranking stocks

First of all, there is no Keras version of Listnet that I found, so lets get educated and implement maths. <br>


To adapt our problem to ranking scheme, lets get along with great paper for listwise ranking learning: ["Learning to Rank: From Pairwise Approach to Listwise Approach"](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-2007-40.pdf).<br>


In training, we have a list of stocks for each trading day: $ D = \{d^{(1)},d^{(2)}, ... , d^{(n)} \}$. Each day associated with a list of stocks $ S^{(i)} = \{s^{(i)}_1, s^{(i)}_2, ... , s^{(i)}_m \}$. Furthermore, each stock $S^{(i)}$ is associated with list of independent ranking scores $ y^{(i)} = \{y^{(i)}_1, y^{(i)}_2, ... , y^{(i)}_m \}$ <br>

For now i will go only with $S^{(i)}$ data, droping possible impact of $D$. In the other way it will recuire redoing tensorflow computation graph from scratch, and this research is more of display of skill kinda work.  So $x^{(i)}_j = \Psi (s^{(i)}_j)$, $i = 1,2,\dots,n, j = 1,2,\dots,m$. <br>

Each list of features $x^{(i)}= (x^{(i)}_1, x^{(i)}_2, ... , x^{(i)}_m)$ and the corresponding list of given ranks $ y^{(i)} =  (y^{(i)}_1, y^{(i)}_2, ... , y^{(i)}_m )$ then form an "instance". Training set can be denoted as $ \mathcal{T} = \big\{ (x^{(i)},y^{(i)}) \big\}^m_{i=1}$

We then create a ranking function $f$ for each feature vector $x^{(i)}_j$ (corresponding to stock $s^{(i)}_j$) it outputs a score $f(x^{(i)}_j)$. For list of feature vectors we obtain a list of scores $z^{(i)} = \big(f(x^{(i)}_1),f(x^{(i)}_2),\dots,f(x^{(i)}_m) \big)$

The objective of learning is formalized as minimization of the total losses with respect to the training data.
$$ \sum_{i=1}^{m} L(y^{(i)},z^{(i)})$$
where $L$ is a listwise loss function. I choose to use Jensen-Shannon divergence as loss function:
$$ D_{js}(P,Q) = \frac{1}{2} D_{kl}(P,M) + \frac{1}{2} D_{kl}(Q,M)$$
$$M = \frac{1}{2}(P+Q) $$
$$D_{kl}(P,Q) = - \sum_{i} P(i) \log \bigg(\frac{Q(i)}{P(i)} \bigg) $$
where P and Q are dicrete probability distributions.


Model wrapped in its own class, object-oriented-style ( @ `./CustomModels/MyModel_list.py`). Class had some flexible options on how to construct NN model, such as : 

1. Test/Validation ratio
2. Usage of regularization (Elastic, l1 and l2)
3. Usage of Dropout layer
4. Number of layers
5. Number of neurons (in first layer, rest will decay as n//2)
6. Epochs, window size, batch size

All test are logged in Tensorboard format, stored @ `./data/logs/`

At last, lets check our hardware, for running tf on GPU we need NVIDIA-CUDA graphics card, all drivers preinstalled. Im running Linux Manjaro with bumblebee switch, notebook should be started as <br>`[user]$ optirun jupyter-notebook`

## 2.2 NN graph (via Tensorboard)
<img src = 'graph2.png'>

In [57]:
TEST_SPLIT,VAL_SPLIT = .1, .1
N_NEURONS = 512
LAYERS = 2
FEATURES = 3
BATCH = 128
EPOCHS = 5
DROPOUT = .2
TICKER = 'TECH_SECTOR'

In [58]:
# market ranks data
with open('./data/ranks.pkl','rb') as f:
    market = pickle.load(f)
# on my hardware it runs kinda slow, so i will stick with just one sector
# if needed, model can be wraped into function, like it is in ./RNN_1.5*
market = market.Technology.dropna()
market = market.reindex(sorted(market.columns),axis=1)

In [59]:
market = market.iloc[:,:40]

In [60]:
X = market.loc[:,(slice(None),('P/E_rank','PEG_rank','mom_rank'))].values
y = market.loc[:,(slice(None),('r_sum_rank'))].values

In [61]:
stock_model = MyModel_list.StockPred(X,y,
                                    test_ratio=TEST_SPLIT,
                                    val_ratio=VAL_SPLIT,
                        ticker=TICKER,
                        use_reg = True,
                        neurons=N_NEURONS,
                        layers=LAYERS,
                        features=FEATURES,
                        batch_size=BATCH,
                        epochs=EPOCHS)

In [62]:
stock_model.gen_model(use_dropout=DROPOUT,use_norm=True)
stock_model.model.summary()
acc = stock_model.compile(verbose=1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 3)                 0         
_________________________________________________________________
Dense_512 (Dense)            (None, 512)               2048      
_________________________________________________________________
Dropout_0.2_0 (Dropout)      (None, 512)               0         
_________________________________________________________________
Normalization_0 (BatchNormal (None, 512)               2048      
_________________________________________________________________
Dense_256 (Dense)            (None, 256)               131328    
_________________________________________________________________
Dropout_0.2_1 (Dropout)      (None, 256)               0         
_________________________________________________________________
Normalization_1 (BatchNormal (None, 256)               1024      
__________

In [63]:
y_pred = stock_model.test_model().ravel()


In [37]:
plt.plot(stock_model.y_test[1::20])
plt.plot(y_pred[1::20])

NameError: name 'plt' is not defined

Links
1. Original paper: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-2007-40.pdf

2. JSD: https://en.wikipedia.org/wiki/Jensen–Shannon_divergence
3. KLD: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
4. ELU over ReLU https://arxiv.org/abs/1511.07289
5. Batch Normalization https://arxiv.org/abs/1502.03167

## 2.3 Tests

In [None]:
ls = []
for i in np.arange(0,100,2):
    t = 100
    a = np.random.random(t)#np.arange(t)/10
    b = a[:]#np.arange(t)/10

    k=i
    b = np.append(b[:k],np.random.random(t-k)/10)

    a = tf.constant(a)
    b = tf.constant(b)

    t = jsd(a,b)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        ls.append(sess.run(t))

In [None]:
plt.plot(ls)