In [16]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.tseries.offsets import BDay
# from hsmm_core.hmm import hmm_engine
# from hsmm_core.observation_models import ExpIndMixDiracGauss
# from hsmm_core.data_utils import load_data, TradingHoursye
# from hsmm_core.data_utils import load_data, TradingHours
# from hsmm_core.feature_spaces import hmm_features
# from hsmm_core.hmm import hmm_calibration
# from hsmm_core.data_utils import load_data, TradingHours
# from hsmm_core.labelling import DataLabellingSimple
# from hsmm_core.consts import ThresholdMethod, LabellingChoice
###############
from hsmm_core.observation_models import ExpIndMixDiracGauss
from hsmm_core.feature_spaces import hmm_features
from hsmm_core.hsmm_runner import HmmCalibration

from hsmm_core.hmm import hmm_impl

from hsmm_core.data_utils import DataLoader, TradingHours
from hsmm_core.labelling import DataLabellingSimple
from hsmm_core.consts import ThresholdMethod, LabellingChoice
import time
n_hidden_states = 2
##############
import pickle
from hsmm_core.consts import InitialisationMethod
import datetime as dt
plt.style.use('ggplot')
%matplotlib inline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,  GradientBoostingClassifier
from sklearn.linear_model import RidgeClassifierCV
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
sc = StandardScaler()

In [17]:
def remove_nans(features_tuple, labels, idx=1):
    # not the cleanest but useful
    # function to clean up nans as I seem to use it a lot, so better to have one function
    # combines the features and labels and removes rows with nans across so we dont lose the ordering
    # returns features and labels
    features_df = pd.concat([features_tuple[0], features_tuple[1], features_tuple[2], \
                             features_tuple[3]], axis=1, sort=False)
    labels_only = labels.drop(columns=['ReturnTradedPrice', 'Duration', 'states', 'TradedTime',
                                       'TradedPrice', 'ticker'], axis=1)
    df_concat = pd.concat([features_df, labels_only.iloc[:, 0:idx]], axis=1, sort='False')
    # only using 1st set of labels- but we can re-write this a bit
    df_x_nan = df_concat.dropna()  # dropping all nans
    label_column_loc_ = df_x_nan.shape[1] - 1  # location of labels column in the clean df
    labels_ = df_x_nan.iloc[:, label_column_loc_:label_column_loc_ + 1]  # keep pure labels
    features_ = df_x_nan.drop(df_x_nan.columns[label_column_loc_], axis=1)  # keeping the features only

    return features_, labels_ #return features and labels in the X,y order that scikit takes the input


In [18]:
class FitModels(object):
    def __init__(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    # # Train a SVM classification model

    def ridge_clf(self, cv_folds=5):
        model_ridge_clf = RidgeClassifierCV(alphas=np.arange(0.1, 1000, 0.1), \
                                            cv=KFold(cv_folds), normalize=True).fit(self.X_train,
                                                                                    self.y_train.values.ravel())
        # check if class_weight should be used as 'balanced'

        return model_ridge_clf

    def svm_clf(self, kernel_choice):
        param_grid = dict(kernel=[str(kernel_choice)],
                          C=[1, 5, 10, 25, 50, 100],
                          gamma=[0.0001, 0.001, 0.01, 0.02, 0.05, 0.01])
        svc = SVC(class_weight='balanced')
        clf = GridSearchCV(svc, param_grid)
        clf.fit(self.X_train, np.asanyarray(self.y_train).reshape(self.y_train.shape[0]))

        return clf

    def gradient_boost_clf(self, learning_rate=0.25):
        # this needs to be written properly- but this is somewhat optimised#
        GBR = GradientBoostingClassifier(n_estimators=3000, learning_rate=learning_rate,
                                         max_depth=4, max_features='sqrt',
                                         min_samples_leaf=15, min_samples_split=10)

        gb_boost_clf = GBR.fit(self.X_train, self.y_train)

        return gb_boost_clf

    def gp_clf(self):
        kernel = 1.0 * RBF([1.0])  # isotropic
        gpc_rbf_isotropic = GaussianProcessClassifier(kernel=kernel).fit(self.X_train, self.y_train)
        # hyperparameters are optimised by default
        return gpc_rbf_isotropic

    def random_forest_clf(self, no_est=100):
        rfc = RandomForestClassifier(n_estimators=no_est, max_depth=4, n_jobs=-1, warm_start=True)
        rfc.fit(X_train, y_train)

        return rfc

    def run_cv(self, clf_class, **kwargs):
        # Construct a kfolds object
        kf = KFold(len(self.y_train), n_folds=10, shuffle=True)
        y_pred = self.y_train.copy()

        # Iterate through folds
        for train_index, test_index in kf:
            X_train_local, X_test_local = self.X_train[train_index], self.X_train[test_index]
            y_train_local = self.y_train[train_index]
            # Initialize a classifier with key word arguments
            clf = clf_class(**kwargs)
            clf.fit(self.X_train, self.y_train)
            y_pred[test_index] = clf.predict(X_test_local)
        return y_pred

In [19]:

ticker = 'SYNT_2states'


data_dir = os.getenv('FINANCE_DATA')
features_path=os.path.join(data_dir,'features_models/features/')
labels_path= os.path.join(data_dir,'features_models/labels')

ticker_labels_path = os.path.join(labels_path,ticker+'/NON_DIRECTIONAL')

if not os.path.exists(os.path.join(data_dir, ticker)):
    os.makedirs(os.path.join(data_dir, ticker))
    
if not os.path.exists(ticker_labels_path):
    os.makedirs(ticker_labels_path)

    ####paths####
main_path = '/home/ak/Data/features_models/'

models_path=os.path.join(main_path,'models')
ticker_models_path = os.path.join(models_path, ticker)
# hmm_models_path = os.path.join(models_path,'hmm_models')
# features_ticker_path = os.path.join(features_path, ticker)
# predictions_path = os.path.join(main_path, 'predictions')
if not os.path.exists(ticker_models_path):
    os.makedirs(ticker_models_path)


In [20]:
features_path

'/media/ak/WorkDrive/Data/features_models/features/'

In [21]:
ticker_labels_path

'/media/ak/WorkDrive/Data/features_models/labels/SYNT_2states/NON_DIRECTIONAL'

In [22]:
#  Parameters -setting up the HMM etc
no_states = 2
sigmas = [0.05, 0.002]  # fast and slow
# Duration is measured in seconds for now (to be revised). lambda units are seconds^{-1}
# so here we consider

lambdas = [1. / 35., 1. / 10.]
weights = [0.1, 0.6]

obs_model = ExpIndMixDiracGauss(no_states)
obs_model.set_up_initials(priors={'sigmas': sigmas, 'lambdas': lambdas, 'weights': weights})

hmm_engine = hmm_impl(obs_model, no_states)

# set up some priors
tpm = np.array([[0.4, 0.6], [0.7, 0.3]])
pi = np.array([0.4, 0.6])
hmm_engine.set_up_initials(priors={'tpm': tpm, 'pi': pi})


In [23]:
no_dates = 30  # <-- this is the number of days you want
start_date = pd.datetime(2017, 6, 1)
dummy_dates = [start_date + BDay(i) for i in range(no_dates)]

no_points = 5000

rng = np.random.RandomState(1234)
trd_hours_filter = TradingHours.all_trading_day

# silly hack, add 1 millisecond so that the initial timestamp is printed with milliseconds and does not
# break the parsing of Timestamps when loading

morning_start = dt.time(8, 0, 0, 1)

initial_price = 100

for dd in dummy_dates:
    random_states = hmm_engine.sample_states(rng=rng, length=no_points)
    observation_points = obs_model.sample_data(no_points, rng=rng, state=random_states)
    # The first duration is always zero
    observation_points[0, 0] = 0.

    file_path = os.path.join(data_dir, ticker)
    file_name = '.'.join([dd.strftime('%Y%m%d'), 'csv'])

    data_to_save = pd.DataFrame({'states': random_states,
                                 'Duration': observation_points[:, 0],
                                 'ReturnTradedPrice': observation_points[:, 1],
                                 })
    data_to_save['TradedTime'] = pd.Series()

    # Now calculate the Traded prices and traded times in reverse order as to what would happen
    # with real data.
    # data_to_save.loc[0, 'TradedTime'] = dt.datetime.combine(dd.date(), morning_start)
    data_to_save['TradedTime'] = data_to_save['Duration'].cumsum().apply(lambda dur:
                                                                         (dt.datetime.combine(dd.date(),
                                                                                              morning_start) + \
                                                                          dt.timedelta(seconds=dur))  .time())

    data_to_save['TradedPrice'] = initial_price * (1. + data_to_save['ReturnTradedPrice']).cumprod()
    data_to_save.to_csv(os.path.join(file_path, file_name), index=False)

print "ok-produced data" # can remove this a bit later

ok-produced data


In [24]:
ticker

'SYNT_2states'

In [25]:
data_loader_init = {
        'trading_hours_filter': TradingHours.only_mkt_hours
    }

hmm_init = {
    'obs_model_name': 'CensoredExpIndMixDiracGauss',
    'em_obs_init_method': InitialisationMethod.cluster,
    'em_hidden_init_method': InitialisationMethod.uniform,
    'no_hidden_states': no_states,
    'update_tag': 'tpsml'
}

data_loader = DataLoader(**data_loader_init)
# keep the hash of the data loader to uniquely identify how the data was loaded ( perhaps a dollar clock was
# used), as this affects the calibration of the hmm
data_loader_hash = data_loader.data_loader_hash()

data = data_loader.load_trades_data(ticker, start_date=start_date, end_date=dummy_dates[-1])

hmm_calibration_engine = HmmCalibration(init_params=hmm_init)
hmm_calibration_engine.run_calibration_all_data(ticker, data, data_loader_hash,
                                                force_recalc=False, use_multiprocessing=False,
                                                n_processes=2)

# Create the hmm feature engine and for every change the hmm model in the features engine
features_engine = hmm_features()

In [26]:

# Create Labels ###

window = 25
threshold = 0.1

labelling_method_params = [{

    'labelling_method': LabellingChoice.price_move_in_window,
    'rolling_window': window,
    # Uncomment below if you want to check a price move only above a certain level
    'updown_threshold': threshold,  # this is multiplied by 100
    'threshold_method': ThresholdMethod.arbitrary,
}]

for label_init in labelling_method_params:
    print label_init
    labeller = DataLabellingSimple(label_init)
    labeller.label_training_data(data)

{'rolling_window': 25, 'labelling_method': 'PrMov', 'updown_threshold': 0.1, 'threshold_method': 'arbitrary'}


In [27]:
  for date, date_data in data.iteritems():
        date_data.to_csv(os.path.join(ticker_labels_path, str(date)+'.csv'))


In [28]:
stored_hmm, _ = hmm_calibration_engine.get_calibrated_hmm(ticker, date, data_loader_hash)

In [30]:
for date, date_data in data.iteritems():
        stored_hmm, _ = hmm_calibration_engine.get_calibrated_hmm(ticker, date, data_loader_hash)
    
        features_engine.hmm = stored_hmm
        print "doing date:",data[date]
        features_load = features_engine.generate_features(data[date])

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       1  08:00:00.000001   100.000000   
1     119.100307           0.003699       0  08:01:59.100308   100.369866   
2      27.057158           0.000000       0  08:02:26.157466   100.369866   
3       8.505154          -0.028357       0  08:02:34.662620    97.523646   
4      64.233908           0.046480       0  08:03:38.896528   102.056514   
5      80.855782          -0.052958       0  08:04:59.752310    96.651778   
6       0.272857           0.000000       1  08:05:00.025167    96.651778   
7      24.463791           0.000000       0  08:05:24.488958    96.651778   
8       5.048958           0.000865       1  08:05:29.537916    96.735410   
9       2.306107           0.000000       1  08:05:31.844023    96.735410   
10      3.984119          -0.016358       0  08:05:35.828142    95.153058   
11      4.503969          -0.019325       0  08:05:40.332112    

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       1  08:00:00.000001   100.000000   
1       3.445632           0.011874       0  08:00:03.445633   101.187357   
2       6.772934           0.000000       1  08:00:10.218568   101.187357   
3       0.831942           0.000000       1  08:00:11.050510   101.187357   
4      10.384612           0.000000       0  08:00:21.435121   101.187357   
5      13.351862           0.062778       0  08:00:34.786984   107.539704   
6      41.639662          -0.032246       0  08:01:16.426646   104.071965   
7      19.587408          -0.069183       0  08:01:36.014053    96.871952   
8      12.445722          -0.021343       0  08:01:48.459776    94.804398   
9       3.174699          -0.033841       0  08:01:51.634475    91.596170   
10     17.024946           0.000824       1  08:02:08.659420    91.671624   
11      0.395233          -0.070098       0  08:02:09.054653    

doing date:        Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0      0.000000           0.000000       1  08:00:00.000001   100.000000   
1     13.923823          -0.004502       1  08:00:13.923824    99.549836   
2      1.617750          -0.002909       1  08:00:15.541574    99.260247   
3     31.357224           0.000000       1  08:00:46.898798    99.260247   
4      0.210988          -0.065306       0  08:00:47.109787    92.777945   
5     16.825242           0.000000       1  08:01:03.935028    92.777945   
6      8.981754           0.000000       1  08:01:12.916782    92.777945   
7     34.010839           0.011628       0  08:01:46.927622    93.856810   
8     20.893971           0.000000       1  08:02:07.821593    93.856810   
9     59.090624          -0.002204       0  08:03:06.912217    93.649915   
10     5.529673           0.000000       1  08:03:12.441890    93.649915   
11     0.604086           0.000000       1  08:03:13.045976    93.649915   


doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       1  08:00:00.000001   100.331241   
1      31.685834           0.000000       1  08:00:31.685835   100.331241   
2      24.003325          -0.066471       0  08:00:55.689160    93.662126   
3      16.257371           0.111394       0  08:01:11.946531   104.095546   
4      47.855575           0.007663       0  08:01:59.802106   104.893246   
5       8.710358          -0.042456       0  08:02:08.512464   100.439873   
6      11.263783          -0.021530       0  08:02:19.776247    98.277399   
7      20.569050          -0.002454       1  08:02:40.345297    98.036189   
8     107.336197           0.012769       0  08:04:27.681494    99.288059   
9       0.770847           0.000000       1  08:04:28.452341    99.288059   
10      3.715651           0.000000       1  08:04:32.167992    99.288059   
11     26.181800           0.046519       0  08:04:58.349792   1

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       0  08:00:00.000001   105.330292   
1       5.970328           0.000000       1  08:00:05.970329   105.330292   
2       0.995310          -0.000078       1  08:00:06.965639   105.322107   
3      21.914990           0.013169       0  08:00:28.880629   106.709055   
4       2.364787          -0.002636       1  08:00:31.245417   106.427725   
5      19.535789          -0.006395       0  08:00:50.781206   105.747104   
6       2.973894           0.004542       1  08:00:53.755100   106.227435   
7      11.469284           0.000000       1  08:01:05.224384   106.227435   
8      18.777107          -0.027648       0  08:01:24.001491   103.290441   
9       6.293111           0.000000       1  08:01:30.294601   103.290441   
10     39.972897           0.044190       0  08:02:10.267498   107.854863   
11     19.221515          -0.019153       0  08:02:29.489013   1

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       0  08:00:00.000001   102.929389   
1       0.335164           0.001894       1  08:00:00.335165   103.124292   
2      57.721582           0.092549       0  08:00:58.056747   112.668327   
3       1.527991          -0.004576       1  08:00:59.584738   112.152798   
4      19.113179          -0.000683       1  08:01:18.697917   112.076185   
5      15.391681          -0.017811       0  08:01:34.089598   110.080008   
6       7.170095           0.001219       1  08:01:41.259692   110.214154   
7      52.456835           0.043282       0  08:02:33.716528   114.984415   
8       0.132327           0.000000       1  08:02:33.848854   114.984415   
9      15.365635          -0.019910       0  08:02:49.214490   112.695114   
10     23.595915           0.006906       0  08:03:12.810405   113.473384   
11      9.276511           0.000123       1  08:03:22.086916   1

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       0  08:00:00.000001    90.941133   
1      21.905311          -0.050154       0  08:00:21.905312    86.380093   
2       4.272653           0.000000       1  08:00:26.177965    86.380093   
3       1.923772           0.000000       1  08:00:28.101737    86.380093   
4      29.022969           0.091799       0  08:00:57.124706    94.309679   
5      20.888958          -0.029098       0  08:01:18.013664    91.565468   
6       4.341674           0.000630       1  08:01:22.355338    91.623136   
7      22.260320          -0.010729       0  08:01:44.615658    90.640101   
8      10.859123           0.000000       1  08:01:55.474780    90.640101   
9      41.179792          -0.029595       0  08:02:36.654573    87.957610   
10     56.684900           0.065776       0  08:03:33.339473    93.743124   
11      3.052203           0.000000       1  08:03:36.391676    

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       0  08:00:00.000001   102.437010   
1      77.162689          -0.015501       0  08:01:17.162690   100.849140   
2      52.151621          -0.054327       0  08:02:09.314311    95.370338   
3      17.056541           0.000000       0  08:02:26.370851    95.370338   
4       2.736925           0.000000       1  08:02:29.107776    95.370338   
5       8.323291           0.053288       0  08:02:37.431067   100.452471   
6      30.694687          -0.015871       0  08:03:08.125754    98.858221   
7       3.242119          -0.002614       0  08:03:11.367873    98.599797   
8      26.719491           0.000000       1  08:03:38.087365    98.599797   
9       2.164635           0.000000       1  08:03:40.251999    98.599797   
10      3.756088           0.016537       0  08:03:44.008087   100.230296   
11      3.674267          -0.032732       0  08:03:47.682354    

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       1  08:00:00.000001    99.927373   
1      18.861995           0.037684       0  08:00:18.861996   103.693004   
2      33.282283          -0.029008       0  08:00:52.144280   100.685116   
3      27.231647           0.000000       1  08:01:19.375927   100.685116   
4      14.880709           0.001506       1  08:01:34.256636   100.836777   
5       1.585075           0.086653       0  08:01:35.841712   109.574603   
6       1.095288           0.019189       0  08:01:36.937000   111.677199   
7      30.878264          -0.008176       0  08:02:07.815264   110.764176   
8       3.267761          -0.002183       1  08:02:11.083025   110.522417   
9       2.918976          -0.000332       1  08:02:14.002001   110.485725   
10     69.502704          -0.001191       0  08:03:23.504705   110.354117   
11     18.551573           0.000000       1  08:03:42.056278   1

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       1  08:00:00.000001   100.000000   
1      11.237666          -0.060204       0  08:00:11.237667    93.979578   
2      33.761705           0.024848       0  08:00:44.999372    96.314801   
3       4.331867           0.018625       0  08:00:49.331239    98.108649   
4       8.551870           0.000000       1  08:00:57.883109    98.108649   
5     112.980354           0.015297       0  08:02:50.863463    99.609447   
6       1.218897           0.003084       1  08:02:52.082360    99.916611   
7       2.147849           0.000000       1  08:02:54.230209    99.916611   
8      55.947253           0.007420       0  08:03:50.177462   100.657970   
9      27.260311           0.062328       0  08:04:17.437772   106.931777   
10      2.973123           0.000828       1  08:04:20.410895   107.020279   
11    164.140329           0.062660       0  08:07:04.551224   1

doing date:        Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0      0.000000           0.000000       0  08:00:00.000001    93.611844   
1      0.526210           0.000000       1  08:00:00.526211    93.611844   
2      5.514935          -0.074608       0  08:00:06.041146    86.627614   
3      0.061212           0.000000       1  08:00:06.102358    86.627614   
4      6.893151           0.050353       0  08:00:12.995509    90.989534   
5      0.360299          -0.006300       1  08:00:13.355809    90.416336   
6      1.135536           0.002772       1  08:00:14.491345    90.667011   
7     42.819857           0.054577       0  08:00:57.311202    95.615388   
8      9.663437          -0.000512       1  08:01:06.974639    95.566461   
9      2.680312           0.077362       0  08:01:09.654951   102.959651   
10     0.037578           0.000000       1  08:01:09.692529   102.959651   
11     2.404898           0.000000       0  08:01:12.097427   102.959651   


doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       1  08:00:00.000001   100.000000   
1       2.216331           0.021335       0  08:00:02.216332   102.133522   
2       5.482925           0.000000       1  08:00:07.699257   102.133522   
3       6.448554           0.000000       1  08:00:14.147811   102.133522   
4      12.985278           0.000000       0  08:00:27.133089   102.133522   
5       7.680754           0.000000       1  08:00:34.813843   102.133522   
6       9.916677           0.000000       1  08:00:44.730520   102.133522   
7       3.623502           0.024523       0  08:00:48.354022   104.638108   
8      15.081375          -0.002626       1  08:01:03.435398   104.363378   
9      10.258445           0.000000       1  08:01:13.693842   104.363378   
10      6.936638           0.000182       1  08:01:20.630480   104.382393   
11     12.187416           0.000000       1  08:01:32.817897   1

doing date:        Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0      0.000000           0.000000       1  08:00:00.000001    99.909358   
1     20.645428           0.000000       1  08:00:20.645429    99.909358   
2     15.500623          -0.034258       0  08:00:36.146052    96.486697   
3      0.217982           0.048442       0  08:00:36.364034   101.160736   
4      0.729570           0.000000       1  08:00:37.093604   101.160736   
5      1.319444           0.032759       0  08:00:38.413048   104.474671   
6     19.006920           0.000000       1  08:00:57.419968   104.474671   
7     11.859119          -0.036912       0  08:01:09.279086   100.618297   
8      1.092529           0.000000       1  08:01:10.371615   100.618297   
9      1.207171           0.000000       1  08:01:11.578787   100.618297   
10    31.863261          -0.045432       0  08:01:43.442047    96.047050   
11    46.001615           0.028935       0  08:02:29.443663    98.826202   


doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       1  08:00:00.000001   100.168349   
1       8.023136          -0.009767       0  08:00:08.023137    99.189975   
2      10.988364           0.000000       1  08:00:19.011501    99.189975   
3       1.513458           0.019322       0  08:00:20.524959   101.106481   
4     107.538448           0.000276       0  08:02:08.063407   101.134381   
5      35.695825           0.045464       0  08:02:43.759232   105.732353   
6      55.557731           0.000000       1  08:03:39.316963   105.732353   
7       6.407005           0.000000       1  08:03:45.723968   105.732353   
8       6.636469           0.016286       0  08:03:52.360437   107.454342   
9       4.634664           0.000924       1  08:03:56.995101   107.553606   
10      2.837528          -0.076366       0  08:03:59.832630    99.340152   
11     10.496202          -0.054060       0  08:04:10.328831    

doing date:         Duration  ReturnTradedPrice  states       TradedTime  TradedPrice  \
0       0.000000           0.000000       0  08:00:00.000001    98.866543   
1      72.236440           0.034853       0  08:01:12.236441   102.312327   
2      10.739367           0.010934       0  08:01:22.975809   103.430975   
3      38.665886           0.076200       0  08:02:01.641695   111.312451   
4       1.857506          -0.000829       1  08:02:03.499200   111.220213   
5      25.523787           0.014962       0  08:02:29.022987   112.884264   
6       7.746660           0.024902       0  08:02:36.769647   115.695339   
7     167.148172          -0.031121       0  08:05:23.917819   112.094733   
8      21.971057           0.000000       1  08:05:45.888876   112.094733   
9      17.668508           0.101537       0  08:06:03.557384   123.476485   
10      7.093437          -0.006143       0  08:06:10.650821   122.717941   
11      1.135283          -0.001651       1  08:06:11.786104   1

### Feature Creation ###

In [None]:

n_hidden_states = no_states

init_params = {
    "obs_model_params": {
                                'obs_model_name': 'ExpIndMixDiracGauss',
                                'em_init_method': InitialisationMethod.cluster

    },
    "hidden_model_params": {
                                'no_hidden_states': no_states,
                                'pi':pi,
                                'tpm': tpm,
                                'em_init_method': InitialisationMethod.uniform
    },
    "update_tag": 'tpsml'
}


data = load_data(ticker, which_trading_hours=TradingHours.all_trading_day)

In [None]:
trd_hours_filter = TradingHours.all_trading_day
hmm_calibration_engine = hmm_calibration(no_parallel_procs=None,
                                         init_params=init_params)


trained_hmms = hmm_calibration_engine.hmm_fit_func(ticker, data, trd_hours_filter,
                                                   force_recalc=False)


for date, date_hmm in trained_hmms.iteritems():
    feature_engine = hmm_features(date_hmm)
    features = feature_engine.generate_features(data[date])

In [None]:
for date, date_data in data.iteritems():
  

## uncomment below:  for saving hmm models ##

In [None]:
# ###saving trained model hmms###
# seq_model = "_".join((str(ticker),str(n_hidden_states),'state',"trained","hmm","models", ".pickle"))
# print("saving the model:", seq_model)
# pickle.dump(init_params, open(os.path.join(models_path,seq_model), 'wb'))

## create labels ##

### Loading Labels###

In [None]:

non_directional = os.path.join(ticker_labels_path)
non_directional

In [None]:
data_dic = load_data(ticker, which_trading_hours=TradingHours.all_trading_day)
## clf fitting##
for date, date_hmm in trained_hmms.iteritems():
    feature_engine = hmm_features(date_hmm)
    features_load = feature_engine.generate_features(data_dic[date])
    labels_load= pd.read_csv(os.path.join(non_directional,str(date)+'.csv'))
    features, labels_clean = remove_nans(features_load, labels_load)
    x_std = sc.fit_transform(features.values.astype(np.float)) #fit & transform the features
    X_train, X_test, y_train, y_test = train_test_split( \
        x_std, labels_clean, test_size=0.05, random_state=1, stratify=labels_clean) #probably can get rid of this
    models_cls = FitModels(X_train, y_train)
    best_clfs = {#'SVC': models_cls.svm_clf(kernel_choice="rbf"),
                 #'RIDGE_clf': models_cls.ridge_clf()
                 #'GBOOST': models_cls.gradient_boost_clf(),
                 'GP_clf': models_cls.gp_clf()
                 #'RF_clf': models_cls.random_forest_clf(),
                 }
    # This is sequence for the name of the best classifiers.
    seq_clf = "_".join(("synthetic_data",str(date),labels_clean.columns.values[0],"clfs", ".pickle"))
    print("saving the classifiers:",seq_clf)
    pickle.dump(best_clfs, open(os.path.join(ticker_models_path,seq_clf), 'wb')) 

In [None]:
ticker_labels_path,

In [None]:
pd.read_csv(os.path.join(ticker_labels_path,str(date)+'.csv'))