In [None]:
%pylab inline

import gc
import os
import pickle
import h5py
import json
import random
import time

import numpy as np
import pandas as pd
import lightgbm as lgb

from sklearn.metrics import mean_squared_error
from datetime import datetime, timedelta

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)
pd.options.mode.chained_assignment = None

In [None]:
dataset = pd.read_csv('../data/training.csv', sep = ';')

In [None]:
dataset.columns = ['ID', 'TS',
                  'BID0', 'VBID0', 'BID1', 'VBID1', 'BID2', 'VBID2', 'BID3', 'VBID3', 'BID4', 'VBID4', 
                  'BID5', 'VBID5', 'BID6', 'VBID6', 'BID7', 'VBID7', 'BID8', 'VBID8', 'BID9', 'VBID9',
                  'ASK0', 'VASK0', 'ASK1', 'VASK1', 'ASK2', 'VASK2', 'ASK3', 'VASK3', 'ASK4', 'VASK4',
                  'ASK5', 'VASK5', 'ASK6', 'VASK6', 'ASK7', 'VASK7', 'ASK8', 'VASK8', 'ASK9', 'VASK9', 'Y']

In [None]:
tea = dataset[dataset.ID == 'TEA']

In [None]:
tea

In [None]:
asksall  = np.array(np.vstack((tea['ASK0'], tea['ASK1'])).T, dtype = float64)
bidsall  = np.array(np.vstack((tea['BID0'], tea['BID1'])).T, dtype = float64)
yall     = np.array(tea['Y'])

In [None]:
lenall = len(asksall)
Xall = np.zeros((lenall, 4))

Xall[:, 0] = asksall[:, 0] # ASK0
Xall[:, 1] = bidsall[:, 0] # BID0
Xall[:, 2] = asksall[:, 1] # ASK1
Xall[:, 3] = bidsall[:, 1] # BID1

Xall[np.isnan(Xall)] = 0.
yall[np.isnan(yall)] = 0.

In [None]:
assert(0 == np.count_nonzero(np.isnan(Xall)))
assert(0 == np.count_nonzero(np.isnan(yall)))

In [None]:
figsize(16, 8)

subplot(2, 1, 1)
plot(tea.BID0)
plot(tea.ASK0)

subplot(2, 1, 2)
plot(tea['Y'])

In [None]:
lgbparam                 = {}
lgbparam['metric']       = 'rmse'
lgbparam['application']  = 'regression'
lgbparam['nthread']      = 4
lgbparam['seed']         = 1
lgbnumround              = 100

In [None]:
sep = int(0.5 * lenall)

In [None]:
print(sep, lenall - sep)

In [None]:
lgbtrain = lgb.Dataset(Xall[:sep], label=yall[:sep])
lgbtest  = lgb.Dataset(Xall[sep:], label=yall[sep:])

In [None]:
def rmse(ytrue, ypred):
    return np.sqrt(mean_squared_error(ytrue, ypred))

In [None]:
model = lgb.train(lgbparam, lgbtrain, lgbnumround, valid_sets=[lgbtrain, lgbtest], verbose_eval=10)
ypred = model.predict(Xall)

if sep != 0:
    score1 = rmse(yall[:sep], ypred[:sep])
    score2 = rmse(yall[sep:], ypred[sep:])
    
    print(score1)
    print(score2)

In [None]:
model_filename = 'my_model.txt'

In [None]:
# Save pre-trained model
model.save_model(model_filename)

In [None]:
# Load pre-trained model
loaded_model = lgb.Booster(model_file=model_filename)

In [1]:
from keras.models import load_model
model = load_model('weights.04-0.04.hdf5')

In [2]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_21 (InputLayer)        (None, 10, 262)           0         
_________________________________________________________________
lstm_21 (LSTM)               (None, 10, 100)           145200    
_________________________________________________________________
dense_36 (Dense)             (None, 10, 1)             101       
Total params: 145,301
Trainable params: 145,301
Non-trainable params: 0
_________________________________________________________________
