In [95]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score

BTC_df = pd.read_csv('data.csv')

In [96]:
BTC_df.head()

Unnamed: 0.1,Unnamed: 0,Datetime,Prices,Volumes,High,Low,RSI,ADL,ADL_slope,OBV,OBV_slope
0,0,1560575162,8644.08,13197.701907,8644.86,8640.43,15.0,5203,-0.09251,8,-0.000454
1,1,1560575222,8644.08,13197.701907,8644.87,8640.5,15.0,10443,2.008249,5130,-0.09251
2,2,1560575282,8646.01,13197.701907,8646.01,8646.0,10.0,16145,1.985836,-5706,0.010063
3,3,1560575342,8646.01,13200.910764,8646.01,8646.0,9.0,9647,6.193095,5378,-0.003326
4,4,1560575402,8635.99,13200.910764,8635.37,8634.36,9.0,-4861,0.818087,10819,2.00037


In [97]:
def trainPCA(X_train, X_test):
    if X_train.shape[0] < X_train.shape[1]:
        if X_train.shape[0] < X_test.shape[0]:
            n_components=X_train.shape[0]
        else:
            n_components=X_test.shape[0]
    else:
        if X_train.shape[1] < X_test.shape[1]:
            n_components=X_train.shape[1]
        else:
            n_components=X_test.shape[1]
    
    X_PCA = PCA(n_components=n_components)
    
    X_train_PCA = X_PCA.fit_transform(X_train)
    X_train_variance = X_PCA.fit(X_train)
    print(f'X_train Variance: {sum(X_train_variance.explained_variance_ratio_)}')
    print(f'X_train Shape: {X_train.shape}')
    X_train_df = pd.DataFrame(X_train_PCA)
    
    X_test_PCA = X_PCA.fit_transform(X_test)
    X_test_variance = X_PCA.fit(X_test)
    print(f'X_train Variance: {sum(X_test_variance.explained_variance_ratio_)}')
    print(f'X_train Shape: {X_test_PCA.shape}')
    X_test_df = pd.DataFrame(X_test_PCA)
    
    return X_train_df, X_test_df

In [98]:
BTC_df['High'] = BTC_df.High.astype('float64')
BTC_df['Low'] = BTC_df.Low.astype('float64')
BTC_df['Volumes'] = BTC_df.Volumes.astype('float64')

ADL_avg = BTC_df.ADL.mean()
RSI_avg = BTC_df.RSI.mean()
ADL_slp_avg = BTC_df.ADL_slope.mean()
OBV_slp_avg = BTC_df.OBV_slope.mean()

values = {'OBV_slope': OBV_slp_avg, 'RSI': RSI_avg }
BTC_df = BTC_df.fillna(value=values)

In [99]:
BTC_df.head()

Unnamed: 0.1,Unnamed: 0,Datetime,Prices,Volumes,High,Low,RSI,ADL,ADL_slope,OBV,OBV_slope
0,0,1560575162,8644.08,13197.701907,8644.86,8640.43,15.0,5203,-0.09251,8,-0.000454
1,1,1560575222,8644.08,13197.701907,8644.87,8640.5,15.0,10443,2.008249,5130,-0.09251
2,2,1560575282,8646.01,13197.701907,8646.01,8646.0,10.0,16145,1.985836,-5706,0.010063
3,3,1560575342,8646.01,13200.910764,8646.01,8646.0,9.0,9647,6.193095,5378,-0.003326
4,4,1560575402,8635.99,13200.910764,8635.37,8634.36,9.0,-4861,0.818087,10819,2.00037


In [100]:
X =  BTC_df.drop(['Unnamed: 0','Datetime','Prices'], axis=1)
y = BTC_df['Prices'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

In [101]:
X_train.head()

Unnamed: 0,Volumes,High,Low,RSI,ADL,ADL_slope,OBV,OBV_slope
63280,5571.722642,10406.01,10406.0,58.0,24693,1.998543,16579,1.998794
37568,15210.610285,11413.97,11412.0,57.0,19299,3.764522,8102,1.997854
85103,36025.087018,8330.0,8329.99,61.0,-2939162,0.995503,-2939162,-0.002195
91172,7076.497036,8038.19,8035.05,48.0,-2939162,0.995503,-2939162,-0.002195
3137,21895.313657,9050.0,9049.99,55.0,5374,0.007187,-22936,2.000262


In [109]:
X_PCA = PCA(n_components=4)
X_trainPCA = X_PCA.fit_transform(X_train)
X_trainVAR = X_PCA.fit(X_train)

In [110]:
arr = X_trainVAR.explained_variance_ratio_
for i,j in enumerate(arr):
    print(f'{i}: {round(j,4)}')

0: 0.9999
1: 0.0001
2: 0.0
3: 0.0


In [107]:
X_trainPCA = pd.DataFrame(X_trainPCA)

In [108]:
X_trainPCA.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,-1667309.0,-748281.7,-15434.339885,-6.894891,51.878798,8.885554,8.973405,-0.617793
1,-1661914.0,-739833.2,-5739.500047,-10.510947,1245.024961,0.017052,7.252826,0.174849
2,1296686.0,2207224.0,23277.117224,-9.778442,-1422.942491,10.091596,10.814373,-1.36056
3,1296686.0,2207305.0,-5672.812863,-3.825468,-1126.509401,8.872826,-2.057707,2.230501
4,-1647988.0,-708812.0,948.631042,-7.642976,-2236.691524,24.615259,6.991864,-1.136232
