<a href="https://colab.research.google.com/github/doogzultz/time_series_project/blob/main/preprocessing_and_model_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [357]:
import pandas as pd
import numpy as np
import tensorflow as tf
from pandas.plotting import scatter_matrix
from tensorflow.keras.layers import Layer, Dense, Dropout, Input, Flatten, Concatenate, Conv1D, LSTM, Bidirectional, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.utils import shuffle
from sklearn.compose import ColumnTransformer
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [358]:
url = "https://raw.githubusercontent.com/doogzultz/time_series_project/main/data.csv"
data = pd.read_csv(url)

In [359]:
data.columns

Index(['date', 'identifier', 'market_cap', 'sector', 'index_membership',
       'factor_1', 'factor_2', 'factor_3', 'factor_4', 'factor_5', 'factor_6',
       'factor_7', 'factor_8', 'factor_9', 'factor_10', 'target'],
      dtype='object')

In [360]:
data.drop(columns = 'index_membership', inplace = True)

In [361]:
data.sort_values(by = ['identifier', 'date'], inplace = True)
data

Unnamed: 0,date,identifier,market_cap,sector,factor_1,factor_2,factor_3,factor_4,factor_5,factor_6,factor_7,factor_8,factor_9,factor_10,target
82,2010-01-06,AD41WBQFVG43,5.843968e+10,35,0.766639,-0.000032,0.832677,0.500608,0.193489,0.246089,0.753702,0.792736,-0.000005,0.109029,-0.000006
400,2010-01-13,AD41WBQFVG43,4.758865e+10,35,0.907349,-0.022933,0.346457,0.581694,0.648483,0.872304,0.723412,0.333230,-0.009504,0.121348,-0.008721
718,2010-01-20,AD41WBQFVG43,3.374757e+10,35,0.476465,-0.000114,0.592328,0.379747,0.810961,0.017737,0.619208,0.058918,-0.000099,0.890898,-0.000081
1036,2010-01-27,AD41WBQFVG43,4.908395e+10,35,0.265141,-0.000034,0.521405,0.682492,0.547349,0.155915,0.068966,0.699718,-0.000037,0.355059,-0.000018
1354,2010-02-03,AD41WBQFVG43,2.729189e+10,35,0.423987,0.000197,0.950500,0.628533,0.956287,0.426634,0.752799,0.961276,0.000163,0.196711,0.000318
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12552,2010-10-06,ZMYXRS4KVOG2,6.504496e+08,25,0.069622,0.009936,0.588354,0.447469,0.353549,0.767779,0.790632,0.308049,0.012255,0.281193,0.005117
12862,2010-10-13,ZMYXRS4KVOG2,1.403710e+09,25,0.343444,0.002566,0.394802,0.143162,0.116762,0.744089,0.825323,0.559633,0.001058,0.755124,0.000446
13172,2010-10-20,ZMYXRS4KVOG2,7.284687e+08,25,0.857557,0.000146,0.892729,0.113346,0.574870,0.990100,0.319424,0.591153,0.000054,0.933679,0.000035
13483,2010-10-27,ZMYXRS4KVOG2,7.766592e+08,25,0.093894,-0.000885,0.311392,0.768537,0.589851,0.289180,0.431484,0.373209,-0.000411,0.225972,-0.000242


In [362]:
ident_counts= data.value_counts('identifier')
ident_counts

identifier
AD41WBQFVG43    104
UZ4DWDZ8ALZ4    104
KR47536Y10D4    104
KPJ8FTV9ESJ3    104
KMHQ727PU8E2    104
               ... 
PNUM57CYEB27      5
KHKNKSMXBWP3      5
LYCPQJ19AOY0      5
RENR64FT3I74      5
LALJLSN9UMP2      2
Length: 356, dtype: int64

In [363]:
data.value_counts('sector', normalize = True).sort_index()

sector
10    0.049221
15    0.093162
20    0.208527
25    0.132849
30    0.072759
35    0.049003
40    0.232812
45    0.051674
50    0.038352
55    0.071642
dtype: float64

In [364]:
data[data['identifier'].isin(list(ident_counts[ident_counts.values > 80].index))].value_counts('sector', normalize = True).sort_index()

sector
10    0.052423
15    0.093655
20    0.212044
25    0.126521
30    0.076618
35    0.045433
40    0.228443
45    0.052423
50    0.039048
55    0.073392
dtype: float64

In [365]:
data[data['identifier'].isin(list(ident_counts[ident_counts.values <= 80].index))].value_counts('sector', normalize = True).sort_index()

sector
10    0.010229
15    0.087152
20    0.165712
25    0.209902
30    0.025777
35    0.092471
40    0.286007
45    0.042553
50    0.029869
55    0.050327
dtype: float64

In [366]:
class g:
  window_size = 16

In [367]:
sector_onehot = pd.get_dummies(data['sector'], prefix='sector')
sector_onehot

Unnamed: 0,sector_10,sector_15,sector_20,sector_25,sector_30,sector_35,sector_40,sector_45,sector_50,sector_55
82,0,0,0,0,0,1,0,0,0,0
400,0,0,0,0,0,1,0,0,0,0
718,0,0,0,0,0,1,0,0,0,0
1036,0,0,0,0,0,1,0,0,0,0
1354,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
12552,0,0,0,1,0,0,0,0,0,0
12862,0,0,0,1,0,0,0,0,0,0
13172,0,0,0,1,0,0,0,0,0,0
13483,0,0,0,1,0,0,0,0,0,0


In [368]:
data.drop(columns = 'sector', inplace = True)
data = pd.concat([data, sector_onehot], axis = 1)
data.head()

Unnamed: 0,date,identifier,market_cap,factor_1,factor_2,factor_3,factor_4,factor_5,factor_6,factor_7,...,sector_10,sector_15,sector_20,sector_25,sector_30,sector_35,sector_40,sector_45,sector_50,sector_55
82,2010-01-06,AD41WBQFVG43,58439680000.0,0.766639,-3.2e-05,0.832677,0.500608,0.193489,0.246089,0.753702,...,0,0,0,0,0,1,0,0,0,0
400,2010-01-13,AD41WBQFVG43,47588650000.0,0.907349,-0.022933,0.346457,0.581694,0.648483,0.872304,0.723412,...,0,0,0,0,0,1,0,0,0,0
718,2010-01-20,AD41WBQFVG43,33747570000.0,0.476465,-0.000114,0.592328,0.379747,0.810961,0.017737,0.619208,...,0,0,0,0,0,1,0,0,0,0
1036,2010-01-27,AD41WBQFVG43,49083950000.0,0.265141,-3.4e-05,0.521405,0.682492,0.547349,0.155915,0.068966,...,0,0,0,0,0,1,0,0,0,0
1354,2010-02-03,AD41WBQFVG43,27291890000.0,0.423987,0.000197,0.9505,0.628533,0.956287,0.426634,0.752799,...,0,0,0,0,0,1,0,0,0,0


In [369]:
import math
from math import floor as fl
ident_counts= data.value_counts('identifier')
def split_to_series(df, valid_size = .1, test_size = .1,length_cutoff = 4*g.window_size):
  ident_counts= df.value_counts('identifier')
  df_long = df[df['identifier'].isin(list(ident_counts[ident_counts.values > length_cutoff].index))]
  df_short = df[df['identifier'].isin(list(ident_counts[(ident_counts.values <= length_cutoff) & (ident_counts.values >= 2*g.window_size)].index))]
  series_dict = {}
  train_dict = {}
  valid_dict = {}
  test_dict = {}
  for i, ident in enumerate(shuffle(df_long.value_counts('identifier').index)):
    series_dict[i] = df_long[df_long['identifier'] == ident].sort_values('date').reset_index()
    train_dict[i] = series_dict[i].iloc[:fl((1-valid_size - test_size)*len(series_dict[i])) - 2*g.window_size]
    valid_dict[i] = series_dict[i].iloc[fl((1-valid_size - test_size)*len(series_dict[i]))- 2*g.window_size:fl((1- test_size)*len(series_dict[i]))- g.window_size]
    test_dict[i] = series_dict[i].iloc[fl((1- test_size)*len(series_dict[i]))- g.window_size:]
  for j, ident in enumerate(shuffle(df_short.value_counts('identifier').index)):
    train_dict[len(series_dict)+j] = df_short[df_short['identifier'] == ident].sort_values('date').reset_index()
  print(f"Number of time series to be trained and tested: {len([keys for keys in series_dict])}")
  print(f"Number of time series to be trained: {len([keys for keys in train_dict])}")
  return train_dict, valid_dict, test_dict

In [370]:
train_dict, valid_dict, test_dict = split_to_series(data)

Number of time series to be trained and tested: 295
Number of time series to be trained: 323


In [371]:
train_dict[294]

Unnamed: 0,index,date,identifier,market_cap,factor_1,factor_2,factor_3,factor_4,factor_5,factor_6,...,sector_10,sector_15,sector_20,sector_25,sector_30,sector_35,sector_40,sector_45,sector_50,sector_55
0,39,2010-01-06,WFE5HYN0XFM4,2522785000.0,0.52632,-4.141176e-07,0.856059,0.017252,0.704788,0.396893,...,0,0,1,0,0,0,0,0,0,0
1,357,2010-01-13,WFE5HYN0XFM4,4100853000.0,0.739545,-0.0007518071,0.77384,0.63549,0.750161,0.274568,...,0,0,1,0,0,0,0,0,0,0
2,675,2010-01-20,WFE5HYN0XFM4,2693856000.0,0.711102,-0.0007397126,0.364993,0.273452,0.925048,0.723892,...,0,0,1,0,0,0,0,0,0,0
3,993,2010-01-27,WFE5HYN0XFM4,4035133000.0,0.78282,7.472607e-05,0.312481,0.186998,0.844158,0.959772,...,0,0,1,0,0,0,0,0,0,0
4,1311,2010-02-03,WFE5HYN0XFM4,2982057000.0,0.346139,0.0004267228,0.25927,0.466152,0.740618,0.751325,...,0,0,1,0,0,0,0,0,0,0
5,1630,2010-02-10,WFE5HYN0XFM4,3971898000.0,0.108043,8.001593e-05,0.645963,0.639942,0.489592,0.553481,...,0,0,1,0,0,0,0,0,0,0
6,1945,2010-02-17,WFE5HYN0XFM4,3939084000.0,0.92559,0.007504566,0.241468,0.328408,0.492996,0.106165,...,0,0,1,0,0,0,0,0,0,0
7,2260,2010-02-24,WFE5HYN0XFM4,1417950000.0,0.921369,5.273461e-05,0.810303,0.825225,0.850129,0.351899,...,0,0,1,0,0,0,0,0,0,0
8,2575,2010-03-03,WFE5HYN0XFM4,5006484000.0,0.917935,0.0008349635,0.401272,0.666373,0.626806,0.604281,...,0,0,1,0,0,0,0,0,0,0
9,2890,2010-03-10,WFE5HYN0XFM4,1255113000.0,0.512389,2.961562e-06,0.998658,0.401908,0.750653,0.689124,...,0,0,1,0,0,0,0,0,0,0


In [372]:
valid_dict[294]

Unnamed: 0,index,date,identifier,market_cap,factor_1,factor_2,factor_3,factor_4,factor_5,factor_6,...,sector_10,sector_15,sector_20,sector_25,sector_30,sector_35,sector_40,sector_45,sector_50,sector_55
51,16019,2010-12-29,WFE5HYN0XFM4,5546016000.0,0.156209,0.001118676,0.989436,0.152968,0.33793,0.382311,...,0,0,1,0,0,0,0,0,0,0
52,16331,2011-01-05,WFE5HYN0XFM4,5043673000.0,0.256968,1.358334e-07,0.072085,0.364793,0.530466,0.004841,...,0,0,1,0,0,0,0,0,0,0
53,16643,2011-01-12,WFE5HYN0XFM4,4897077000.0,0.964533,0.0005705462,0.036581,0.7801,0.264365,0.055982,...,0,0,1,0,0,0,0,0,0,0
54,16954,2011-01-19,WFE5HYN0XFM4,6842469000.0,0.311611,0.009795926,0.321356,0.612241,0.013034,0.46895,...,0,0,1,0,0,0,0,0,0,0
55,17265,2011-01-26,WFE5HYN0XFM4,7261038000.0,0.78156,-0.0002556072,0.057532,0.396246,0.280826,0.771343,...,0,0,1,0,0,0,0,0,0,0
56,17577,2011-02-02,WFE5HYN0XFM4,5297479000.0,0.41901,-0.0008900133,0.10849,0.2424,0.809652,0.419884,...,0,0,1,0,0,0,0,0,0,0
57,17889,2011-02-09,WFE5HYN0XFM4,3614743000.0,0.446189,-0.0005989635,0.537254,0.507036,0.253245,0.126702,...,0,0,1,0,0,0,0,0,0,0
58,18197,2011-02-16,WFE5HYN0XFM4,4167746000.0,0.535033,-0.00170248,0.295452,0.659453,0.004052,0.385977,...,0,0,1,0,0,0,0,0,0,0
59,18505,2011-02-23,WFE5HYN0XFM4,5507044000.0,0.872595,-2.32662e-05,0.238619,0.854279,0.006571,0.489151,...,0,0,1,0,0,0,0,0,0,0
60,18813,2011-03-02,WFE5HYN0XFM4,3274713000.0,0.65958,0.0002028551,0.438299,0.673329,0.26209,0.618681,...,0,0,1,0,0,0,0,0,0,0


In [373]:
test_dict[294]

Unnamed: 0,index,date,identifier,market_cap,factor_1,factor_2,factor_3,factor_4,factor_5,factor_6,...,sector_10,sector_15,sector_20,sector_25,sector_30,sector_35,sector_40,sector_45,sector_50,sector_55
77,24022,2011-06-29,WFE5HYN0XFM4,5749720000.0,0.504997,5.3e-05,0.867006,0.685947,0.425021,0.843319,...,0,0,1,0,0,0,0,0,0,0
78,24328,2011-07-06,WFE5HYN0XFM4,4402048000.0,0.174111,-0.002385,0.481294,0.452841,0.966818,0.387928,...,0,0,1,0,0,0,0,0,0,0
79,24635,2011-07-13,WFE5HYN0XFM4,3683474000.0,0.22747,-0.000298,0.542609,0.346455,0.549453,0.225019,...,0,0,1,0,0,0,0,0,0,0
80,24942,2011-07-20,WFE5HYN0XFM4,4002408000.0,0.34468,-0.019237,0.926404,0.818529,0.753235,0.617498,...,0,0,1,0,0,0,0,0,0,0
81,25249,2011-07-27,WFE5HYN0XFM4,6120280000.0,0.591148,-0.005541,0.236087,0.529903,0.896546,0.05425,...,0,0,1,0,0,0,0,0,0,0
82,25556,2011-08-03,WFE5HYN0XFM4,3240126000.0,0.426768,-0.000918,0.17461,0.526941,0.089903,0.77078,...,0,0,1,0,0,0,0,0,0,0
83,25863,2011-08-10,WFE5HYN0XFM4,3409482000.0,0.383165,0.001395,0.926183,0.80548,0.205228,0.196952,...,0,0,1,0,0,0,0,0,0,0
84,26169,2011-08-17,WFE5HYN0XFM4,4863994000.0,0.790687,-8.1e-05,0.013189,0.936399,0.39998,0.496167,...,0,0,1,0,0,0,0,0,0,0
85,26475,2011-08-24,WFE5HYN0XFM4,7550020000.0,0.588321,-0.003499,0.42175,0.171908,0.263384,0.675858,...,0,0,1,0,0,0,0,0,0,0
86,26781,2011-08-31,WFE5HYN0XFM4,6535241000.0,0.232438,-0.001352,0.79879,0.528823,0.655118,0.87475,...,0,0,1,0,0,0,0,0,0,0


In [374]:
numerical_features = ['market_cap']+list(data.loc[:,'factor_1':'factor_10'].columns)
numerical_features

['market_cap',
 'factor_1',
 'factor_2',
 'factor_3',
 'factor_4',
 'factor_5',
 'factor_6',
 'factor_7',
 'factor_8',
 'factor_9',
 'factor_10']

In [375]:
def preprocess(train_dict, valid_dict, test_dict, numerical_features):
  train_df = pd.concat(train_dict, ignore_index = True)
  t_mean = np.mean(train_df[numerical_features])
  t_std = np.std(train_df[numerical_features])
  for i in range(0,len(train_dict)):
    train_dict[i][numerical_features] = (train_dict[i][numerical_features]-t_mean)/t_std
  for i in range(0,len(valid_dict)):
    valid_dict[i][numerical_features] = (valid_dict[i][numerical_features]-t_mean)/t_std
    test_dict[i][numerical_features] = (test_dict[i][numerical_features]-t_mean)/t_std

  return train_dict, valid_dict, test_dict


In [376]:
train_scaled, valid_scaled, test_scaled = preprocess(train_dict, valid_dict, test_dict, numerical_features)

In [377]:
train_scaled[25]

Unnamed: 0,index,date,identifier,market_cap,factor_1,factor_2,factor_3,factor_4,factor_5,factor_6,...,sector_10,sector_15,sector_20,sector_25,sector_30,sector_35,sector_40,sector_45,sector_50,sector_55
0,139,2010-01-06,ATQCTI8X7FB2,0.221215,0.239283,-1.842549,1.185977,0.021372,1.21092,-0.583992,...,0,0,0,0,0,0,0,0,0,1
1,457,2010-01-13,ATQCTI8X7FB2,0.560464,-0.367496,-0.243494,0.73867,-0.712881,0.86333,-1.601027,...,0,0,0,0,0,0,0,0,0,1
2,775,2010-01-20,ATQCTI8X7FB2,0.128493,0.500075,-0.183707,-1.082607,0.961416,-1.307936,-1.600755,...,0,0,0,0,0,0,0,0,0,1
3,1093,2010-01-27,ATQCTI8X7FB2,1.285192,1.429813,0.205011,1.53317,0.309879,-0.56649,-0.489586,...,0,0,0,0,0,0,0,0,0,1
4,1411,2010-02-03,ATQCTI8X7FB2,0.000994,0.430311,-0.04321,-1.10541,0.771748,-1.491054,-1.194393,...,0,0,0,0,0,0,0,0,0,1
5,1728,2010-02-10,ATQCTI8X7FB2,0.502321,-0.678553,0.623614,0.447207,-0.651182,0.366314,-1.338058,...,0,0,0,0,0,0,0,0,0,1
6,2043,2010-02-17,ATQCTI8X7FB2,0.227855,1.545929,0.092204,-0.222378,1.667676,1.12282,0.393737,...,0,0,0,0,0,0,0,0,0,1
7,2358,2010-02-24,ATQCTI8X7FB2,-0.132315,0.70941,0.016027,-0.658394,-1.41134,-1.610413,-1.500846,...,0,0,0,0,0,0,0,0,0,1
8,2673,2010-03-03,ATQCTI8X7FB2,0.503293,-1.14725,-0.041208,-0.975552,-1.525861,-1.016623,0.433935,...,0,0,0,0,0,0,0,0,0,1
9,2988,2010-03-10,ATQCTI8X7FB2,0.570681,1.522919,-0.05433,1.66446,1.315356,-0.909396,-0.086474,...,0,0,0,0,0,0,0,0,0,1


In [378]:
valid_scaled[25]

Unnamed: 0,index,date,identifier,market_cap,factor_1,factor_2,factor_3,factor_4,factor_5,factor_6,...,sector_10,sector_15,sector_20,sector_25,sector_30,sector_35,sector_40,sector_45,sector_50,sector_55
51,16119,2010-12-29,ATQCTI8X7FB2,0.795234,-1.564844,-0.196344,-0.074239,-1.646796,0.687231,1.652796,...,0,0,0,0,0,0,0,0,0,1
52,16430,2011-01-05,ATQCTI8X7FB2,0.370367,1.286088,-0.324922,1.625998,-0.190978,0.484494,0.527097,...,0,0,0,0,0,0,0,0,0,1
53,16741,2011-01-12,ATQCTI8X7FB2,0.699524,1.585823,-0.046526,-0.777262,1.304778,0.331932,-1.054228,...,0,0,0,0,0,0,0,0,0,1
54,17052,2011-01-19,ATQCTI8X7FB2,0.362857,0.441475,-0.045659,-1.551402,-0.179838,0.236314,-0.771463,...,0,0,0,0,0,0,0,0,0,1
55,17363,2011-01-26,ATQCTI8X7FB2,0.17653,1.635689,-0.051069,0.576894,0.271401,-1.182511,1.079131,...,0,0,0,0,0,0,0,0,0,1
56,17676,2011-02-02,ATQCTI8X7FB2,0.663384,-0.620194,-0.044806,1.253316,-0.442305,0.621425,0.738186,...,0,0,0,0,0,0,0,0,0,1
57,17988,2011-02-09,ATQCTI8X7FB2,0.483715,1.382471,0.008866,-1.291658,1.68603,-0.395543,0.590598,...,0,0,0,0,0,0,0,0,0,1
58,18296,2011-02-16,ATQCTI8X7FB2,0.685213,0.761336,-0.042926,-1.346233,1.155144,0.743156,-1.501114,...,0,0,0,0,0,0,0,0,0,1
59,18604,2011-02-23,ATQCTI8X7FB2,0.25812,-0.926983,0.048048,0.299016,-0.095824,0.822849,0.653884,...,0,0,0,0,0,0,0,0,0,1
60,18912,2011-03-02,ATQCTI8X7FB2,0.619634,0.276794,0.010079,-0.752156,-0.971858,-0.972546,-1.280831,...,0,0,0,0,0,0,0,0,0,1


In [379]:
test_scaled[25]

Unnamed: 0,index,date,identifier,market_cap,factor_1,factor_2,factor_3,factor_4,factor_5,factor_6,...,sector_10,sector_15,sector_20,sector_25,sector_30,sector_35,sector_40,sector_45,sector_50,sector_55
77,24119,2011-06-29,ATQCTI8X7FB2,0.488673,0.708821,-0.4306,0.971346,1.560433,0.398176,-0.33427,...,0,0,0,0,0,0,0,0,0,1
78,24426,2011-07-06,ATQCTI8X7FB2,0.610241,0.614853,-0.321834,1.468051,-1.587864,1.43661,-1.55875,...,0,0,0,0,0,0,0,0,0,1
79,24733,2011-07-13,ATQCTI8X7FB2,0.326041,-0.381688,-0.833601,0.916149,-1.21101,-0.347884,-0.677773,...,0,0,0,0,0,0,0,0,0,1
80,25040,2011-07-20,ATQCTI8X7FB2,0.427265,1.124673,-0.366666,-0.433203,-1.083913,1.14783,0.018539,...,0,0,0,0,0,0,0,0,0,1
81,25347,2011-07-27,ATQCTI8X7FB2,0.1679,1.385692,-0.050553,-0.1934,-0.172296,-1.175284,0.472428,...,0,0,0,0,0,0,0,0,0,1
82,25654,2011-08-03,ATQCTI8X7FB2,0.242543,-1.293574,0.076598,0.412128,0.481548,1.067096,-1.254173,...,0,0,0,0,0,0,0,0,0,1
83,25960,2011-08-10,ATQCTI8X7FB2,0.811603,1.068503,0.410513,1.665696,-1.627082,-1.309936,-0.909988,...,0,0,0,0,0,0,0,0,0,1
84,26266,2011-08-17,ATQCTI8X7FB2,0.433896,-0.51542,-0.17166,0.613113,0.982483,0.251733,-1.236948,...,0,0,0,0,0,0,0,0,0,1
85,26572,2011-08-24,ATQCTI8X7FB2,-0.084465,-1.028476,-0.035607,0.121815,0.069908,-1.244085,1.482791,...,0,0,0,0,0,0,0,0,0,1
86,26878,2011-08-31,ATQCTI8X7FB2,0.59304,1.009435,-0.120488,0.937636,0.391777,-0.656899,-1.376725,...,0,0,0,0,0,0,0,0,0,1


In [None]:
def convert_to_tensor(data, window_size = 16)