# 2.c Data Preprocessing and Feature Engineering (Sliding Window (Close))

### A360 MDK interface

In [89]:
a360ai

<A360 AI Interface for project: Btc-price>

In [90]:
# Get default data repo
DATAREPO_LIST = a360ai.list_datarepos()
DATAREPO = DATAREPO_LIST['name'][0]
DATAREPO

'a360-btc-use-case'

### Set Default Data Repo

In [91]:
a360ai.set_default_datarepo(DATAREPO)

### Import Necessary Packages

In [92]:
import numpy as np
import math
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Library to suppress warnings or deprecation notes
import warnings
warnings.filterwarnings("ignore")

### Getting Data from Yahoo Finance¶

In [93]:
btc = f'https://query1.finance.yahoo.com/v7/finance/download/BTC-USD?period1=1410912000&period2=1656633600&interval=1d&events=history&includeAdjustedClose=true'

### Load the Data

In [94]:
data= pd.read_csv(btc)


### Making new dataframe with requried features

In [95]:
data = data[['Close']]

In [96]:
data

Unnamed: 0,Close
0,457.334015
1,424.440002
2,394.795990
3,408.903992
4,398.821014
...,...
2840,20735.478516
2841,20280.634766
2842,20104.023438
2843,19784.726563


In [97]:
data.shape

(2845, 1)

In [98]:
data.head()

Unnamed: 0,Close
0,457.334015
1,424.440002
2,394.79599
3,408.903992
4,398.821014


### Creating a function for sliding window

In [99]:
def mv_window(row, col, i_start_a, win_1, dataset):
    import numpy as np
    X = np.zeros((row, col))
    for d in range(0,win_1):
        for i in range(0,row):
            X[i][d] = dataset['Close'][i+i_start_a+d]
    return X

In [100]:
#selecting the window and size
sliding=mv_window(2815, 30, 0, 30, data)

In [101]:
sliding = pd.DataFrame(data=sliding)

In [102]:
sliding.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
2810,28814.900391,29445.957031,31726.390625,31792.310547,29799.080078,30467.488281,29704.390625,29832.914063,29906.662109,31370.671875,...,20471.482422,19017.642578,20553.271484,20599.537109,20710.597656,19987.029297,21085.876953,21231.65625,21502.337891,21027.294922
2811,29445.957031,31726.390625,31792.310547,29799.080078,30467.488281,29704.390625,29832.914063,29906.662109,31370.671875,31155.478516,...,19017.642578,20553.271484,20599.537109,20710.597656,19987.029297,21085.876953,21231.65625,21502.337891,21027.294922,20735.478516
2812,31726.390625,31792.310547,29799.080078,30467.488281,29704.390625,29832.914063,29906.662109,31370.671875,31155.478516,30214.355469,...,20553.271484,20599.537109,20710.597656,19987.029297,21085.876953,21231.65625,21502.337891,21027.294922,20735.478516,20280.634766
2813,31792.310547,29799.080078,30467.488281,29704.390625,29832.914063,29906.662109,31370.671875,31155.478516,30214.355469,30111.998047,...,20599.537109,20710.597656,19987.029297,21085.876953,21231.65625,21502.337891,21027.294922,20735.478516,20280.634766,20104.023438
2814,29799.080078,30467.488281,29704.390625,29832.914063,29906.662109,31370.671875,31155.478516,30214.355469,30111.998047,29083.804688,...,20710.597656,19987.029297,21085.876953,21231.65625,21502.337891,21027.294922,20735.478516,20280.634766,20104.023438,19784.726563


In [103]:
#since first 30 get ommited
y=data["Close"][30:]

In [104]:
y=pd.DataFrame(y)

### Splitting the last 30 days as the validation set

In [105]:
y_val = y[2785:]

In [106]:
X_val=sliding[2785:]

In [107]:
#last 30 get ommited as they are the validation set
X=sliding[:2785]
y1=y[:2785]

### Split Data to prepare train and test set

In [108]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y1, test_size=0.30, random_state=1
)

In [109]:
print("Shape of Training set : ", X_train.shape)
print("Shape of test set : ", X_test.shape)

Shape of Training set :  (1949, 30)
Shape of test set :  (836, 30)


### Export Data to save it for Model Training

In [110]:
X_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
297,293.114990,310.867004,292.053986,287.463989,285.829010,278.088989,279.471985,274.901001,273.614014,278.980988,...,284.649994,281.601013,282.614014,281.226990,285.217987,281.881989,278.576996,279.584991,260.997009,265.083008
65,350.847992,352.920013,367.572998,376.901001,375.347992,368.369995,369.670013,376.446991,375.490997,378.046997,...,350.506012,352.541992,347.376007,351.631989,345.345001,327.062012,319.776001,311.395996,317.842987,329.955994
2497,30817.832031,29807.347656,32110.693359,32313.105469,33581.550781,34292.445313,35350.187500,37337.535156,39406.941406,39995.906250,...,43798.117188,46365.402344,45585.031250,45593.636719,44428.289063,47793.320313,47096.945313,47047.003906,46004.484375,44695.359375
28,394.773010,382.556000,383.757996,391.441986,389.545990,382.845001,386.475006,383.157990,358.416992,358.345001,...,330.492004,339.485992,349.290009,342.415009,345.488007,363.264008,366.924011,367.695007,423.561005,420.734985
2509,41626.195313,39974.894531,39201.945313,38152.980469,39747.503906,40869.554688,42816.500000,44555.800781,43798.117188,46365.402344,...,49339.175781,48905.492188,49321.652344,49546.148438,47706.117188,48960.789063,46942.218750,49058.667969,48902.402344,48829.832031
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,573.216003,574.317993,575.630005,581.697021,581.307983,586.752991,583.414978,580.182007,577.760986,579.651001,...,610.435974,614.544006,626.315979,622.861023,623.508972,606.718994,608.242981,609.241028,610.684021,607.155029
2633,56477.816406,53598.246094,49200.703125,49368.847656,50582.625000,50700.085938,50504.796875,47672.121094,47243.304688,49362.507813,...,48628.511719,50784.539063,50822.195313,50429.859375,50809.515625,50640.417969,47588.855469,46444.710938,47178.125000,46306.445313
614,444.154999,445.980988,449.598999,453.384003,473.463989,530.039978,526.232971,533.864014,531.385986,536.919983,...,672.783997,704.375977,685.559021,694.468994,766.307983,748.908997,756.226990,763.781006,737.226013,666.651978
1452,6225.979980,6300.859863,6329.700195,6321.200195,6351.799805,6517.310059,6512.709961,6543.200195,6517.180176,6281.200195,...,6644.129883,6601.959961,6625.560059,6589.620117,6556.100098,6502.589844,6576.689941,6622.479980,6588.310059,6602.950195


In [111]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
2653,48628.511719,50784.539063,50822.195313,50429.859375,50809.515625,50640.417969,47588.855469,46444.710938,47178.125,46306.445313,...,42735.855469,43949.101563,42591.570313,43099.699219,43177.398438,43113.878906,42250.550781,42375.632813,41744.328125,40680.417969
692,587.801025,592.103027,589.119995,587.559021,585.588013,570.473022,567.23999,577.439026,573.216003,574.317993,...,574.106995,577.502991,575.471985,572.302979,575.536987,598.211975,608.633972,606.590027,610.435974,614.544006
421,338.152008,336.752991,332.906006,320.165985,330.751007,335.093994,334.589996,326.148987,322.022003,326.927002,...,359.187012,361.04599,363.183014,388.949005,388.78299,395.536011,415.562988,417.562988,415.479004,451.937988
592,451.875,444.669006,450.303986,446.721985,447.976013,459.602997,458.536011,458.548004,460.483002,450.894989,...,443.187988,439.322998,444.154999,445.980988,449.598999,453.384003,473.463989,530.039978,526.232971,533.864014
927,1080.5,1102.170044,1143.810059,1133.25,1124.780029,1182.680054,1176.900024,1175.949951,1187.869995,1187.130005,...,1222.050049,1231.709961,1207.209961,1250.150024,1265.48999,1281.079956,1317.72998,1316.47998,1321.790039,1347.890015


### Write dataaset to Data Repo

In [112]:
a360ai.write_dataset(X_train,"X_train_close", overwrite=True)
a360ai.write_dataset(y_train,"y_train_close", overwrite=True)
a360ai.write_dataset(X_test,"X_test_close", overwrite=True)
a360ai.write_dataset(y_test,"y_test_close", overwrite=True)
a360ai.write_dataset(X_val,"X_val_close", overwrite=True)
a360ai.write_dataset(y_val,"y_val_close", overwrite=True)

True