In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats.mstats import winsorize
from statsmodels.tools.eval_measures import rmse
from sklearn.linear_model import Ridge

# Importing libraries
import os
import warnings
warnings.filterwarnings('ignore')

plt.style.use('fivethirtyeight')
# Above is a special style template for matplotlib, highly useful for visualizing time series data
from pylab import rcParams
from plotly import tools
# import plotly.plotly as py
# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode(connected=True)
# import plotly.graph_objs as go
# import plotly.figure_factory as ff
import statsmodels.api as sm
from numpy.random import normal, seed
from scipy.stats import norm
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.arima_model import ARIMA
import math
from sklearn.metrics import mean_squared_error

from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet

In [7]:
class Model:
    def get_target(self, df):
        df['return'] = (df['f24'].shift(-78) - df['f24']) / df['f24']
        df['return'] = df['return'].replace([np.inf, -np.inf], 0)
        return df[['return']]

    def prepare_features(self, df, lag):
        """
        :param df: this is the data you want to use to prepare the features for your model
        :return: X, a matrix of features (can be a numpy array or a pandas dataframe, your choice!)
        """
        # todo: implement this function - you can use some of the features given to you or you can build a batch of
        #  your own based on the data that you are given.
        # *** PLEASE ENSURE THAT DO NOT INTRODUCE A LOOKAHEAD IN THIS MATRIX ***
        # *** Bonus points for coding a function that tests against lookahead in X ***

        ## 1. Data Transformation
        return_type_columns = ['f0', 'f1', 'f2', 'f3', 'f11', 'f12']
        price_type_columns = ['f4', 'f5', 'f6', 'f7', 'f8', 'f9',
                              'f10', 'f13', 'f16', 'f17', 'f18', 'f19',
                              'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29',
                              'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39',
                              'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49',
                              'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 'f58', 'f59',
                              'f60', 'f61', 'f62', 'f63', 'f64', 'f65', 'f66', 'f67', 'f68', 'f69',
                              'f70', 'f71', 'f72', 'f73', 'f74', 'f75', 'f76']
        integer_type_columns = ['f14', 'f15', 'f77', 'f78']

        if self.feature_engineering == True:
            for col in price_type_columns:
                df[col] = (df[col] - df[col].shift(78)) / df[col].shift(78)

            for col in integer_type_columns:
                df[col] = (df[col] - df[col].shift(78)) / df[col].shift(78)

        X = df

        # handle case of infinity
        X = X.replace([np.inf], 1)
        X = X.replace([-np.inf], -1)

        ## 2. Drop highly correlated variables
        X.drop(columns=['f5', 'f6', 'f7', 'f9', 'f8'], inplace=True)
        X.drop(columns=['f25', 'f26', 'f28', 'f29', 'f31', 'f32', 'f34', 'f35'], inplace=True)
        X.drop(columns=['f37', 'f38', 'f39', 'f42', 'f44', 'f45', 'f47', 'f48'], inplace=True)
        X.drop(columns=['f51', 'f52', 'f54', 'f55', 'f57', 'f58', 'f60', 'f61'], inplace=True)
        X.drop(columns=['f65', 'f64', 'f67', 'f63', 'f71', 'f70', 'f74', 'f73'], inplace=True)

        ## 3. Outlier Treatment
        if self.outlier_treatment == True:
            window_size = 20 * 79  # 1 month (working days only)
            threshold = 2
            rolling_mean = X.rolling(window=window_size, min_periods=1).mean()
            rolling_std = X.rolling(window=window_size, min_periods=1).std()
            lower_bound = rolling_mean - threshold * rolling_std
            upper_bound = rolling_mean + threshold * rolling_std
            req_cols = list(X.columns)
            req_cols.remove('return')
            #         print("req_cols without 'return' ", req_cols)
            for column in req_cols:
                X[column] = X[column].clip(lower=lower_bound[column], upper=upper_bound[column], axis=0)

        # include lag columns
        if self.X_lag == True:
            for column_name in X.columns:
                for i in range(1, lag + 1):
                    lagged_column_name = f'{column_name}_lag_{i}'
                    X[lagged_column_name] = X[column_name].shift(i * 78)
        else:
            column_name = 'return'
            for i in range(1, lag + 1):
                lagged_column_name = f'{column_name}_lag_{i}'
                X[lagged_column_name] = X[column_name].shift(i * 78)

        # delete return columns
        del X['return']

        return X

    def read_data(self, path_to_data):
        data = pd.read_csv(path_to_data, index_col='time', parse_dates=['time'])
        data.index = pd.to_datetime(data.index, format='%d-%m-%Y %H:%M')
        data.sort_index(inplace=True)
        data = data.fillna(method='ffill')
        return data

    def fit(self, path_to_train_csv, *args, **kwargs):
        """
        ### AG:  TASKS
        ## Model Selection:
            Linear: Base Model
            Ridge : Handles Multicollinearity
            RandomForest (large number of uncorrelated features, fail if the potential y values lie outside)
            Time Series
        ## Train-Test Split to get the optimal model
        ## Train complete model
        ## Store the optimal model
        """

        # get the values from kwargs
        self.alphas = kwargs['alphas']
        self.lags = kwargs['lags']
        self.l1_ratio = kwargs['l1_ratio']
        self.X_lag = kwargs['X_lag']
        self.outlier_treatment = kwargs['outlier_treatment']
        self.feature_engineering = kwargs['feature_engineering']

        # Range of hyperparameters to test for Lasso, Ridge, and Elastic Net
        best_alpha = None
        best_l1_ratio = None
        best_model = None
        best_model_name = None
        best_lag = None
        lowest_rmse = float('inf')
        res_list = []

        # iterate across lags
        for lag in self.lags:
            self.train = self.read_data(path_to_train_csv)
            self.y = self.get_target(self.train)

            self.X = self.prepare_features(self.train, lag)

            ## AG: Drop missing values
            combined = pd.concat([self.X, self.y], axis=1)
            combined_clean = combined.dropna()
            self.X_clean = combined_clean.drop(columns='return')
            self.y_clean = combined_clean['return']

            total_length = len(self.X_clean)
            train_size = int(total_length * 0.8)
            X_train, y_train = self.X_clean[:train_size], self.y_clean[:train_size]
            X_test, y_test = self.X_clean[train_size:], self.y_clean[train_size:]

            for model_type in args:
                if model_type == 'ols':
                    model = LinearRegression()
                    model.fit(X_train, y_train)
                    #                     print("model.coef_: ",model.coef_)
                    y_pred = pd.Series(np.nan, index=X_test.index)
                    non_nan_rows = ~X_test.isnull().any(axis=1)
                    y_pred[non_nan_rows] = model.predict(X_test[non_nan_rows])
                    test_rmse = get_rmse(y_test, y_pred)
                    res_list.append(
                        {'model': model_type, 'lag': lag, 'alpha': None, 'l1_ratio': None, 'test_rmse': test_rmse})
                    if test_rmse < lowest_rmse:
                        best_alpha = None
                        best_l1_ratio = None
                        best_model = model
                        best_model_name = model_type
                        best_lag = lag
                        lowest_rmse = test_rmse

                else:
                    total_length = len(X_train)
                    train_size = int(total_length * 0.8)
                    X_train_fold, y_train_fold = X_train[:train_size], y_train[:train_size]
                    X_cv_fold, y_cv_fold = X_train[train_size:], y_train[train_size:]

                    # test for lasso model
                    if model_type == 'lasso':
                        lowest_cv_rmse = float('inf')
                        best_model_alpha = None
                        for alpha in self.alphas:
                            #                             print(alpha)
                            #                             print(X_train_fold,y_train_fold)
                            model = Lasso(alpha=alpha)
                            model.fit(X_train_fold, y_train_fold)
                            y_cv_pred = pd.Series(np.nan, index=X_cv_fold.index)
                            non_nan_rows = ~X_cv_fold.isnull().any(axis=1)
                            y_cv_pred[non_nan_rows] = model.predict(X_cv_fold[non_nan_rows])
                            cv_rmse = get_rmse(y_cv_fold, y_cv_pred)
                            if cv_rmse < lowest_cv_rmse:
                                best_model_alpha = alpha
                                lowest_cv_rmse = cv_rmse

                        # get the test_rmse for the model
                        model = Lasso(alpha=best_model_alpha)
                        model.fit(X_train, y_train)
                        y_pred = pd.Series(np.nan, index=X_test.index)
                        non_nan_rows = ~X_test.isnull().any(axis=1)
                        y_pred[non_nan_rows] = model.predict(X_test[non_nan_rows])
                        test_rmse = get_rmse(y_test, y_pred)

                        res_list.append({'model': model_type, 'lag': lag, 'alpha': best_model_alpha, 'l1_ratio': None,
                                         'test_rmse': test_rmse})
                        if test_rmse < lowest_rmse:
                            best_alpha = best_model_alpha
                            best_l1_ratio = None
                            best_model = model
                            best_model_name = model_type
                            best_lag = lag
                            lowest_rmse = test_rmse

                    # test for ridge model
                    if model_type == 'ridge':
                        lowest_cv_rmse = float('inf')
                        best_model_alpha = None
                        for alpha in self.alphas:
                            model = Ridge(alpha=alpha)
                            model.fit(X_train_fold, y_train_fold)
                            y_cv_pred = pd.Series(np.nan, index=X_cv_fold.index)
                            non_nan_rows = ~X_cv_fold.isnull().any(axis=1)
                            y_cv_pred[non_nan_rows] = model.predict(X_cv_fold[non_nan_rows])
                            cv_rmse = get_rmse(y_cv_fold, y_cv_pred)
                            #                         print("model: {}, alpha: {}, cv_rmse: {}".format(model_type,alpha,cv_rmse))
                            if cv_rmse < lowest_cv_rmse:
                                best_model_alpha = alpha
                                lowest_cv_rmse = cv_rmse

                        # get the test_rmse for the model
                        model = Ridge(alpha=best_model_alpha)
                        model.fit(X_train, y_train)
                        y_pred = pd.Series(np.nan, index=X_test.index)
                        non_nan_rows = ~X_test.isnull().any(axis=1)
                        y_pred[non_nan_rows] = model.predict(X_test[non_nan_rows])
                        test_rmse = get_rmse(y_test, y_pred)

                        res_list.append({'model': model_type, 'lag': lag, 'alpha': best_model_alpha, 'l1_ratio': None,
                                         'test_rmse': test_rmse})
                        if test_rmse < lowest_rmse:
                            best_alpha = best_model_alpha
                            best_l1_ratio = None
                            best_model = model
                            best_model_name = model_type
                            best_lag = lag
                            lowest_rmse = test_rmse

                    # test for ridge model
                    if model_type == 'elastic_net':
                        lowest_cv_rmse = float('inf')
                        best_model_l1_ratio = 0
                        best_model_alpha = None
                        for alpha in self.alphas:
                            for l1_ratio in self.l1_ratio:
                                model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
                                model.fit(X_train_fold, y_train_fold)
                                y_cv_pred = pd.Series(np.nan, index=X_cv_fold.index)
                                non_nan_rows = ~X_cv_fold.isnull().any(axis=1)
                                y_cv_pred[non_nan_rows] = model.predict(X_cv_fold[non_nan_rows])
                                cv_rmse = get_rmse(y_cv_fold, y_cv_pred)
                                #                             print("model: {}, alpha: {}, l1_ratio: {},cv_rmse: {}".format(model_type,alpha,l1_ratio,cv_rmse))
                                if cv_rmse < lowest_cv_rmse:
                                    best_model_alpha = alpha
                                    best_model_l1_ratio = l1_ratio
                                    lowest_cv_rmse = cv_rmse

                        # get the test_rmse for the model
                        model = ElasticNet(alpha=best_model_alpha, l1_ratio=best_model_l1_ratio)
                        model.fit(X_train, y_train)
                        y_pred = pd.Series(np.nan, index=X_test.index)
                        non_nan_rows = ~X_test.isnull().any(axis=1)
                        y_pred[non_nan_rows] = model.predict(X_test[non_nan_rows])
                        test_rmse = get_rmse(y_test, y_pred)

                        res_list.append({'model': model_type, 'lag': lag, 'alpha': best_model_alpha,
                                         'l1_ratio': best_model_l1_ratio, 'test_rmse': test_rmse})
                        if test_rmse < lowest_rmse:
                            best_alpha = best_model_alpha
                            best_l1_ratio = best_model_l1_ratio
                            best_model = model
                            best_model_name = model_type
                            best_lag = lag
                            lowest_rmse = test_rmse

        self.model = best_model
        self.model_name = best_model_name
        self.res_list = res_list
        self.lag = best_lag
        self.alpha = best_alpha
        self.l1_ratio = best_l1_ratio
        self.rmse = lowest_rmse

        return self

    def predict(self, path_to_test_csv, *args, **kwargs):
        # todo: read test csv
        # todo: do any operation you would like on it
        self.test = pd.read_csv(path_to_test_csv, index_col='time', parse_dates=['time'])
        self.test.index = pd.to_datetime(self.test.index, format='%d-%m-%Y %H:%M')
        self.test.sort_index(inplace=True)
        self.test = self.test.fillna(method='ffill')

        # todo: prepare features for the model predict
        self.y_test = self.get_target(self.test)
        #         print("self.lag: ",self.lag)
        self.X_test = self.prepare_features(self.test, self.lag)

        # todo: calculate your model prediction (call it ypred) using X and any other information you want to use
        ypred = pd.Series(np.nan, index=self.X_test.index)
        non_nan_rows = ~self.X_test.isnull().any(axis=1)
        ypred[non_nan_rows] = self.model.predict(self.X_test[non_nan_rows])

        # this follows the scikit-learn pattern by returning ypred
        return ypred


def get_rmse(ypred, ytest):
    combined = pd.concat([ypred, ytest], axis=1)
    combined_clean = combined.dropna()

    rmse_ = rmse(combined_clean[0], combined_clean['return'])
    return rmse_



if __name__ == '__main__':
    # filename = 'no_xlag_no_out_no_fe'
    fit_args = ['ols','lasso','ridge']  # todo: populate this as you see fit
    fit_kwargs = {'alphas':np.linspace(0,1,11),'lags':[0],'l1_ratio':np.linspace(0,1,11),'X_lag':False
                  ,
                  'outlier_treatment':True, 'feature_engineering':False}

    train_csv_path = 'train.csv'
    test_csv_path = 'test.csv'

    clf = Model()
    clf.fit(train_csv_path, *fit_args, **fit_kwargs)

    predict_args = []  # todo: populate this as you see fit
    predict_kwargs = {}  # todo: populate this as you see fit
    ypred = clf.predict(test_csv_path, *predict_args, **predict_kwargs)

    print(get_rmse(ypred, clf.y_test))

0.02862831674261307


In [8]:
clf.model

In [9]:
clf.train

Unnamed: 0_level_0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,return
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1
2015-11-18 09:30:00,0.000000,0.000000,0.000000,0.000000,,,,,,,,0.9609,0.0413,4836.975,1555.0,115.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,,,,-0.001000
2015-11-18 09:35:00,0.001689,-0.001088,0.000058,0.000615,85.97,,,,15.53,14.64,15.30,0.9901,0.0329,12362.375,2240.0,154.0,0.0592,0.0094,0.0018,0.0440,0.0019,0.0006,0.0003,0.0018,1.0017,1.0017,1.0010,1.0007,1.0017,0.9987,1.0030,1.0017,0.9974,1.0043,1.0017,0.9969,1.0048,0.9989,0.9993,0.9989,1.0004,0.9999,0.9976,1.0023,1.0001,0.9965,1.0036,1.0002,0.9961,1.0041,1.0001,1.0001,1.0000,1.0001,1.0001,0.9992,1.0009,1.0001,0.9984,1.0017,1.0001,0.9981,1.0019,1.0006,1.0006,1.0004,1.0002,1.0006,1.0002,1.0004,1.0006,1.0001,1.0005,1.0006,1.0000,1.0006,67368.450,11762.0,933.0,-0.001797
2015-11-18 09:40:00,0.004288,0.004110,0.001356,0.000040,85.90,,,,15.46,14.72,15.26,1.0194,0.0060,5613.975,1719.0,57.0,0.0191,0.0166,0.0011,0.0204,0.0010,0.0010,0.0002,0.0010,1.0060,1.0060,1.0058,1.0001,1.0060,1.0039,1.0021,1.0060,0.9998,1.0062,1.0060,0.9977,1.0084,1.0030,1.0030,1.0028,1.0002,1.0030,1.0010,1.0020,1.0030,0.9981,1.0050,1.0030,0.9966,1.0064,1.0014,1.0016,1.0014,1.0001,1.0022,1.0008,1.0014,1.0026,0.9992,1.0034,1.0027,0.9984,1.0044,1.0007,1.0007,1.0007,1.0000,1.0009,1.0006,1.0003,1.0012,1.0003,1.0009,1.0013,1.0001,1.0012,46947.150,7726.0,469.0,-0.009642
2015-11-18 09:45:00,0.000268,0.008612,0.006797,-0.001845,85.84,,,,15.74,14.98,15.54,1.0168,-0.0055,7265.750,1734.0,149.0,0.0453,0.0315,0.0021,0.0189,0.0019,0.0014,0.0004,0.0010,1.0063,1.0063,1.0060,1.0002,1.0063,1.0054,1.0009,1.0066,1.0017,1.0049,1.0067,0.9984,1.0083,1.0117,1.0117,1.0105,1.0011,1.0117,1.0067,1.0050,1.0117,1.0010,1.0107,1.0117,0.9975,1.0141,1.0082,1.0082,1.0074,1.0009,1.0082,1.0049,1.0033,1.0082,1.0013,1.0069,1.0082,0.9990,1.0092,0.9988,0.9990,0.9988,1.0002,0.9998,0.9988,1.0010,1.0007,0.9988,1.0019,1.0012,0.9988,1.0024,23487.700,5967.0,398.0,-0.009341
2015-11-18 09:50:00,0.001404,-0.005846,-0.003133,0.001615,85.15,,,,15.76,14.96,15.55,1.0254,0.0011,3878.350,1211.0,49.0,0.0249,0.0145,0.0019,0.0343,0.0011,0.0008,0.0004,0.0017,1.0077,1.0077,1.0069,1.0008,1.0077,1.0050,1.0027,1.0077,1.0029,1.0048,1.0077,0.9990,1.0087,1.0057,1.0059,1.0057,1.0002,1.0077,1.0057,1.0020,1.0102,1.0028,1.0073,1.0112,0.9984,1.0129,1.0051,1.0052,1.0051,1.0001,1.0060,1.0051,1.0009,1.0073,1.0026,1.0048,1.0080,0.9996,1.0084,1.0004,1.0004,1.0002,1.0002,1.0004,0.9996,1.0008,1.0004,0.9991,1.0013,1.0010,0.9989,1.0022,40561.200,6341.0,393.0,-0.010519
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-04 13:47:00,-0.001358,0.000500,0.000294,-0.000553,87.49,12.70,12.60,12.61,13.89,12.60,12.62,2.2702,-0.3775,4969.425,307.0,25.0,0.0038,0.0003,0.0003,0.0014,0.0005,0.0001,0.0002,0.0003,0.6118,0.6118,0.6118,1.0001,0.6122,0.6118,1.0007,0.6129,0.6118,1.0018,0.6139,0.6118,1.0034,0.1258,0.1258,0.1258,1.0003,0.1258,0.1257,1.0007,0.1260,0.1257,1.0021,0.1262,0.1257,1.0043,0.6552,0.6552,0.6552,1.0000,0.6552,0.6552,1.0001,0.6555,0.6551,1.0007,0.6559,0.6548,1.0016,1.3311,1.3312,1.3311,1.0001,1.3315,1.3311,1.0003,1.3317,1.3311,1.0005,1.3318,1.3309,1.0006,25137.625,1208.0,104.0,
2019-12-04 13:52:00,-0.002336,-0.002620,-0.001099,0.000085,86.92,12.69,12.61,12.61,13.87,12.60,12.62,2.2702,-0.3775,4969.425,153.0,20.0,0.0042,0.0012,0.0001,0.0027,0.0005,0.0001,0.0001,0.0004,0.6103,0.6104,0.6103,1.0001,0.6110,0.6103,1.0010,0.6122,0.6103,1.0031,0.6136,0.6103,1.0053,0.1255,0.1255,0.1255,1.0001,0.1256,0.1255,1.0009,0.1258,0.1255,1.0029,0.1262,0.1255,1.0056,0.6545,0.6545,0.6545,1.0000,0.6547,0.6545,1.0003,0.6552,0.6545,1.0011,0.6557,0.6545,1.0019,1.3312,1.3312,1.3312,1.0000,1.3313,1.3312,1.0001,1.3316,1.3311,1.0003,1.3317,1.3309,1.0006,25137.625,1292.0,115.0,
2019-12-04 13:57:00,-0.000329,-0.001153,-0.000720,0.000245,86.97,12.67,12.60,12.60,13.86,12.60,12.62,2.2702,-0.3775,4969.425,221.0,18.0,0.0018,0.0005,0.0001,0.0012,0.0004,0.0001,0.0001,0.0003,0.6101,0.6101,0.6101,1.0001,0.6104,0.6101,1.0005,0.6116,0.6100,1.0025,0.6133,0.6100,1.0054,0.1254,0.1254,0.1254,1.0001,0.1254,0.1254,1.0007,0.1257,0.1254,1.0028,0.1261,0.1254,1.0060,0.6540,0.6541,0.6540,1.0001,0.6543,0.6540,1.0004,0.6549,0.6540,1.0013,0.6556,0.6540,1.0024,1.3315,1.3315,1.3315,1.0000,1.3315,1.3313,1.0001,1.3315,1.3312,1.0002,1.3317,1.3310,1.0005,25137.625,603.0,101.0,
2019-12-04 14:02:00,-0.001424,-0.001623,-0.001381,0.000059,87.12,12.65,12.62,12.62,13.84,12.62,12.63,2.2702,-0.3775,4969.425,450.0,45.0,0.0047,0.0023,0.0001,0.0014,0.0006,0.0004,0.0001,0.0003,0.6093,0.6094,0.6093,1.0002,0.6097,0.6093,1.0008,0.6110,0.6093,1.0028,0.6130,0.6093,1.0061,0.1252,0.1252,0.1252,1.0004,0.1253,0.1252,1.0012,0.1256,0.1252,1.0034,0.1260,0.1252,1.0071,0.6531,0.6533,0.6531,1.0003,0.6537,0.6531,1.0009,0.6545,0.6531,1.0021,0.6554,0.6531,1.0035,1.3316,1.3316,1.3315,1.0001,1.3316,1.3314,1.0002,1.3316,1.3313,1.0002,1.3317,1.3310,1.0005,25137.625,1019.0,126.0,


In [10]:
clf.X

Unnamed: 0_level_0,f0,f1,f2,f3,f4,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f27,f30,f33,f36,f40,f41,f43,f46,f49,f50,f53,f56,f59,f62,f66,f68,f69,f72,f75,f76,f77,f78
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1
2015-11-18 09:30:00,0.000000,0.000000,0.000000,0.000000,,,0.9609,0.0413,4836.975,1555.0,115.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,,,
2015-11-18 09:35:00,0.001689,-0.001088,0.000058,0.000615,85.97,15.30,0.9901,0.0329,12362.375,2240.0,154.0,0.0592,0.0094,0.0018,0.0440,0.0019,0.0006,0.0003,0.0018,1.0017,1.0007,1.0030,1.0043,1.0048,1.0004,0.9999,1.0023,1.0036,1.0041,1.0001,1.0001,1.0009,1.0017,1.0019,1.0002,1.0002,1.0004,1.0005,1.0006,67368.450,11762.0,933.0
2015-11-18 09:40:00,0.004288,0.004110,0.001356,0.000040,85.90,15.26,1.0194,0.0060,5613.975,1719.0,57.0,0.0191,0.0166,0.0011,0.0204,0.0010,0.0010,0.0002,0.0010,1.0060,1.0001,1.0021,1.0062,1.0084,1.0002,1.0030,1.0020,1.0050,1.0064,1.0014,1.0001,1.0014,1.0034,1.0044,1.0000,1.0006,1.0003,1.0009,1.0012,46947.150,7726.0,469.0
2015-11-18 09:45:00,0.000268,0.008612,0.006797,-0.001845,85.84,15.54,1.0168,-0.0055,7265.750,1734.0,149.0,0.0453,0.0315,0.0021,0.0189,0.0019,0.0014,0.0004,0.0010,1.0063,1.0002,1.0009,1.0049,1.0083,1.0011,1.0117,1.0050,1.0107,1.0141,1.0082,1.0009,1.0033,1.0069,1.0092,1.0002,0.9988,1.0010,1.0019,1.0024,23487.700,5967.0,398.0
2015-11-18 09:50:00,0.001404,-0.005846,-0.003133,0.001615,85.15,15.55,1.0254,0.0011,3878.350,1211.0,49.0,0.0249,0.0145,0.0019,0.0343,0.0011,0.0008,0.0004,0.0017,1.0077,1.0008,1.0027,1.0048,1.0087,1.0002,1.0077,1.0020,1.0073,1.0129,1.0051,1.0001,1.0009,1.0048,1.0084,1.0002,0.9996,1.0008,1.0013,1.0022,40561.200,6341.0,393.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-04 13:47:00,-0.001358,0.000500,0.000294,-0.000553,87.49,12.62,2.2702,-0.3775,4969.425,307.0,25.0,0.0038,0.0003,0.0003,0.0014,0.0005,0.0001,0.0002,0.0003,0.6118,1.0001,1.0007,1.0018,1.0034,1.0003,0.1258,1.0007,1.0021,1.0043,0.6552,1.0000,1.0001,1.0007,1.0016,1.0001,1.3311,1.0003,1.0005,1.0006,25137.625,1208.0,104.0
2019-12-04 13:52:00,-0.002336,-0.002620,-0.001099,0.000085,86.92,12.62,2.2702,-0.3775,4969.425,153.0,20.0,0.0042,0.0012,0.0001,0.0027,0.0005,0.0001,0.0001,0.0004,0.6103,1.0001,1.0010,1.0031,1.0053,1.0001,0.1256,1.0009,1.0029,1.0056,0.6545,1.0000,1.0003,1.0011,1.0019,1.0000,1.3312,1.0001,1.0003,1.0006,25137.625,1292.0,115.0
2019-12-04 13:57:00,-0.000329,-0.001153,-0.000720,0.000245,86.97,12.62,2.2702,-0.3775,4969.425,221.0,18.0,0.0018,0.0005,0.0001,0.0012,0.0004,0.0001,0.0001,0.0003,0.6101,1.0001,1.0005,1.0025,1.0054,1.0001,0.1254,1.0007,1.0028,1.0060,0.6540,1.0001,1.0004,1.0013,1.0024,1.0000,1.3313,1.0001,1.0002,1.0005,25137.625,603.0,101.0
2019-12-04 14:02:00,-0.001424,-0.001623,-0.001381,0.000059,87.12,12.63,2.2702,-0.3775,4969.425,450.0,45.0,0.0047,0.0023,0.0001,0.0014,0.0006,0.0004,0.0001,0.0003,0.6093,1.0002,1.0008,1.0028,1.0061,1.0004,0.1253,1.0012,1.0034,1.0071,0.6531,1.0003,1.0009,1.0021,1.0035,1.0001,1.3314,1.0002,1.0002,1.0005,25137.625,1019.0,126.0
