<a href="https://colab.research.google.com/github/cepdnaclk/e19-CO544-Bitcoin-Cost-Forecast-System/blob/main/Multivariate_LSTM_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Data

In [1]:
import yfinance as yf
import pandas as pd

# Define the ticker simbol for Bitcoin
ticker = 'BTC-USD'

# Get historical market data
hist = yf.Ticker(ticker).history(period="max")

# Making the 'Date' as the index
hist.index = pd.to_datetime(hist.index)

# Drop the "Dividends" column and "Stock Splits" column
hist.drop(columns = ['Dividends', 'Stock Splits'], inplace = True)

# Print the data
print(hist.head())

                                 Open        High         Low       Close  \
Date                                                                        
2014-09-17 00:00:00+00:00  465.864014  468.174011  452.421997  457.334015   
2014-09-18 00:00:00+00:00  456.859985  456.859985  413.104004  424.440002   
2014-09-19 00:00:00+00:00  424.102997  427.834991  384.532013  394.795990   
2014-09-20 00:00:00+00:00  394.673004  423.295990  389.882996  408.903992   
2014-09-21 00:00:00+00:00  408.084991  412.425995  393.181000  398.821014   

                             Volume  
Date                                 
2014-09-17 00:00:00+00:00  21056800  
2014-09-18 00:00:00+00:00  34483200  
2014-09-19 00:00:00+00:00  37919700  
2014-09-20 00:00:00+00:00  36863600  
2014-09-21 00:00:00+00:00  26580100  


# Set Inputs and Outputs

In [2]:
X, y = hist.drop(columns = ['Close']), hist.Close.values
X.shape, y.shape

((3516, 4), (3516,))

## Standardize Features

In [3]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
mm = MinMaxScaler()
ss = StandardScaler()

X_trans = ss.fit_transform(X)
y_trans = mm.fit_transform(y.reshape(-1, 1))

## Split a multivariate sequence past, future samples (X abd y)

In [5]:
import numpy as np

# split a multivariate sequence past, future samples (X and y)
def split_sequences(input_sequences, output_sequence, n_steps_in, n_steps_out):

    X, y = list(), list() # instantiate X and y

    for i in range(len(input_sequences)):

        # find the end of the input, output sequence
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out - 1

        # check if we are beyond the dataset
        if out_end_ix > len(input_sequences): break

        # gather input and output of the pattern
        seq_x, seq_y = input_sequences[i:end_ix], output_sequence[end_ix-1:out_end_ix, -1]
        X.append(seq_x), y.append(seq_y)

    return np.array(X), np.array(y)

X_ss, y_mm = split_sequences(X_trans, y_trans, 100, 1)
print(X_ss.shape, y_mm.shape)

(3417, 100, 4) (3417, 1)


## Check the y_mm sample

In [9]:
print("y_mm[0]:", y_mm[0])
print("y_trans[99:149].squeeze(1):", y_trans[99:149].squeeze(1))

assert y_mm[0].all() == y_trans[99:100].squeeze(1).all()

y_mm[0]

y_mm[0]: [0.00193271]
y_trans[99:149].squeeze(1): [0.00193271 0.00205501 0.00188957 0.00190845 0.00184578 0.00181926
 0.00194896 0.00186743 0.00187817 0.0014125  0.00118087 0.00132186
 0.00148255 0.00159431 0.0014436  0.00154042 0.00132628 0.00120097
 0.00123027 0.00065507 0.         0.00043537 0.00041141 0.0002902
 0.00044216 0.00050419 0.00045555 0.00066928 0.00075856 0.00075133
 0.00095664 0.00103717 0.00130813 0.001171   0.00076554 0.00076003
 0.0006628  0.00053989 0.00067031 0.00082471 0.00067437 0.00066867
 0.00053505 0.00060576 0.00068103 0.00062148 0.00057619 0.00057247
 0.0005635  0.00059887]


array([0.00193271])

In [10]:
y_trans[99:100].squeeze(1)

array([0.00193271])

## Split Training & Test Sets

In [11]:
total_samples = len(X)
train_test_cutoff = round(0.90 * total_samples)

X_train = X_ss[:-150]
X_test = X_ss[-150:]

y_train = y_mm[:-150]
y_test = y_mm[-150:]

print("Training Shape:", X_train.shape, y_train.shape)
print("Testing Shape:", X_test.shape, y_test.shape)

Training Shape: (3267, 100, 4) (3267, 1)
Testing Shape: (150, 100, 4) (150, 1)
