In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pi
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn

data = pd.read_csv('btc.csv')

## <span>Data Ovreview</span>

In [2]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
2,2014-09-19,424.102997,427.834991,384.532013,394.79599,394.79599,37919700
3,2014-09-20,394.673004,423.29599,389.882996,408.903992,408.903992,36863600
4,2014-09-21,408.084991,412.425995,393.181,398.821014,398.821014,26580100


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3500 entries, 0 to 3499
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       3500 non-null   object 
 1   Open       3500 non-null   float64
 2   High       3500 non-null   float64
 3   Low        3500 non-null   float64
 4   Close      3500 non-null   float64
 5   Adj Close  3500 non-null   float64
 6   Volume     3500 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 191.5+ KB


In [4]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,3500.0,15822.35,17539.41,176.897,1005.538,8660.376,26724.53,73079.38
High,3500.0,16192.68,17961.34,211.731,1033.745,8831.567,27092.02,73750.07
Low,3500.0,15426.89,17073.76,171.51,981.305,8392.175,26330.32,71334.09
Close,3500.0,15838.89,17552.93,178.103,1006.747,8660.094,26753.94,73083.5
Adj Close,3500.0,15838.89,17552.93,178.103,1006.747,8660.094,26753.94,73083.5
Volume,3500.0,17103320000.0,19287550000.0,5914570.0,207527000.0,12745700000.0,27542720000.0,350967900000.0


In [5]:
data.isna().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

## <span>Data Visualization</span>

In [6]:
fig = px.line(data, x='Date', y='Volume', title='Daily Trading Volume')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Volume')
fig.update_layout(template='plotly_dark')
fig.show()

In [7]:
fig = px.line(data, x='Date', y='Close', title='Closing Prices Over Time')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Closing Price')
fig.show()

In [8]:
fig.add_trace(go.Scatter(x=data['Date'], y=data['Open'], mode='lines+markers', name='Open'))
fig.add_trace(go.Scatter(x=data['Date'], y=data['High'], mode='lines+markers', name='High'))
fig.add_trace(go.Scatter(x=data['Date'], y=data['Low'], mode='lines+markers', name='Low'))
fig.add_trace(go.Scatter(x=data['Date'], y=data['Close'], mode='lines+markers', name='Close'))

fig.update_layout(title='Price Analysis',
                  xaxis_title='Date',
                  yaxis_title='Price')

fig.show()

In [9]:
daily_changes = data['Close'].diff()
fig = px.histogram(daily_changes, nbins=10, title='Histogram of Daily Price Changes')
fig.update_xaxes(title='Daily Price Change')
fig.update_yaxes(title='Frequency')
fig.update_layout(template='plotly_dark')
fig.show()

In [10]:
data['20-day MA'] = data['Close'].rolling(window=20).mean()

fig = go.Figure(data=[go.Candlestick(
    x=data['Date'],
    open=data['Open'],
    high=data['High'],
    low=data['Low'],
    close=data['Close'],
    name="Candlesticks",
    increasing_line_color='green',
    decreasing_line_color='red',
    line=dict(width=1),
    showlegend=False
)])

fig.add_trace(go.Scatter(x=data['Date'], y=data['20-day MA'], mode='lines', name='20-day Moving Average', line=dict(color='rgba(255, 255, 0, 0.3)')))

fig.update_layout(
    title="BTC Stock Price Candlestick Chart with Moving Average",
    xaxis_title="Date",
    yaxis_title="Price",
    template="plotly_dark",
)

fig.show()

In [11]:
data['Date'] = pd.to_datetime(data['Date'])
stock_data = data.sort_values('Date')

In [12]:
stock = data[['Date', 'Close', 'High', 'Low', 'Open', 'Volume']]

In [13]:
scaler = MinMaxScaler(feature_range=(-1, 1))
normalized_data = stock[['Close', 'High', 'Low', 'Open', 'Volume']].copy()
shifted_df_as_np = scaler.fit_transform(normalized_data)

shifted_df_as_np

array([[-0.99233991, -0.9930256 , -0.99210506, -0.99207251, -0.99991371],
       [-0.99324228, -0.9933333 , -0.99321008, -0.99231952, -0.9998372 ],
       [-0.9940555 , -0.99412269, -0.99401309, -0.99321817, -0.99981761],
       ...,
       [ 0.79851222,  0.78444875,  0.74345389,  0.74642441, -0.72032071],
       [ 0.73507341,  0.81312001,  0.74700122,  0.7986426 , -0.75159704],
       [ 0.71656344,  0.73008584,  0.73524513,  0.73560805, -0.73468054]])

In [14]:
X = shifted_df_as_np[:, 1:]
y = shifted_df_as_np[:, 0]

X.shape, y.shape

((3500, 4), (3500,))

In [15]:
from copy import deepcopy as dc
X = dc(np.flip(X, axis=1))
X

array([[-0.99991371, -0.99207251, -0.99210506, -0.9930256 ],
       [-0.9998372 , -0.99231952, -0.99321008, -0.9933333 ],
       [-0.99981761, -0.99321817, -0.99401309, -0.99412269],
       ...,
       [-0.72032071,  0.74642441,  0.74345389,  0.78444875],
       [-0.75159704,  0.7986426 ,  0.74700122,  0.81312001],
       [-0.73468054,  0.73560805,  0.73524513,  0.73008584]])

In [16]:
split_index = int(len(X) * 0.95)

split_index

3325

In [17]:
X_train = X[:split_index]
X_test = X[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((3325, 4), (175, 4), (3325,), (175,))

In [18]:
from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)

    df.set_index('Date', inplace=True)

    for i in range(1, n_steps+1):
        df[f'Close(t-{i})'] = df['Close'].shift(i)

    df.dropna(inplace=True)

    return df

lookback = 7
shifted_df = prepare_dataframe_for_lstm(data, lookback)
shifted_df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,20-day MA,Close(t-1),Close(t-2),Close(t-3),Close(t-4),Close(t-5),Close(t-6),Close(t-7)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2014-10-06,320.389008,345.134003,302.559998,330.079010,330.079010,79011800,389.910399,320.510010,328.865997,359.511993,375.071991,383.614990,386.944000,375.467010
2014-10-07,330.584015,339.247009,320.481995,336.187012,336.187012,49199900,383.853049,330.079010,320.510010,328.865997,359.511993,375.071991,383.614990,386.944000
2014-10-08,336.115997,354.364014,327.187988,352.940002,352.940002,54736300,380.278049,336.187012,330.079010,320.510010,328.865997,359.511993,375.071991,383.614990
2014-10-09,352.747986,382.726013,347.687012,365.026001,365.026001,83641104,378.789549,352.940002,336.187012,330.079010,320.510010,328.865997,359.511993,375.071991
2014-10-10,364.687012,375.066986,352.963013,361.562012,361.562012,43665700,376.422450,365.026001,352.940002,336.187012,330.079010,320.510010,328.865997,359.511993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-12,70061.382813,71222.742188,65254.835938,67195.867188,67195.867188,44129299406,69130.135742,70060.609375,70587.882813,69139.015625,71631.359375,69362.554688,68896.109375,67837.640625
2024-04-13,67188.375000,67931.429688,60919.105469,63821.472656,63821.472656,52869738185,68959.500781,67195.867188,70060.609375,70587.882813,69139.015625,71631.359375,69362.554688,68896.109375
2024-04-14,63836.230469,65824.429688,62205.851563,65738.726563,65738.726563,49084320047,68748.496485,63821.472656,67195.867188,70060.609375,70587.882813,69139.015625,71631.359375,69362.554688
2024-04-15,65739.648438,66878.648438,62332.070313,63426.210938,63426.210938,43595917654,68420.415235,65738.726563,63821.472656,67195.867188,70060.609375,70587.882813,69139.015625,71631.359375


In [19]:
X_train = X_train.reshape((-1, lookback, 1))
X_test = X_test.reshape((-1, lookback, 1))

y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1900, 7, 1), (100, 7, 1), (3325, 1), (175, 1))

In [20]:
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([1900, 7, 1]),
 torch.Size([100, 7, 1]),
 torch.Size([3325, 1]),
 torch.Size([175, 1]))

In [21]:
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

TRAIN FROM HERE

In [27]:
from torch.utils.data import DataLoader
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

for _, batch in enumerate(train_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    print(x_batch.shape, y_batch.shape)
    break
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers,
                            batch_first=True)

        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

model = LSTM(1, 4, 1)
model.to(device)

def train_one_epoch():
    model.train(True)
    print(f'Epoch: {epoch + 1}')
    running_loss = 0.0

    for batch_index, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        output = model(x_batch)
        loss = loss_function(output, y_batch)
        running_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 100 == 99:  # print every 100 batches
            avg_loss_across_batches = running_loss / 100
            print('Batch {0}, Loss: {1:.3f}'.format(batch_index+1,
                                                    avg_loss_across_batches))
            running_loss = 0.0
    print()
    def validate_one_epoch():
    model.train(False)
    running_loss = 0.0

    for batch_index, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        with torch.no_grad():
            output = model(x_batch)
            loss = loss_function(output, y_batch)
            running_loss += loss.item()

    avg_loss_across_batches = running_loss / len(test_loader)

    print('Val Loss: {0:.3f}'.format(avg_loss_across_batches))
    print('***************************************************')
    print()

torch.Size([16, 7, 1]) torch.Size([16, 1])


In [28]:
def validate_one_epoch():
    model.train(False)
    running_loss = 0.0

    for batch_index, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        with torch.no_grad():
            output = model(x_batch)
            loss = loss_function(output, y_batch)
            running_loss += loss.item()

    avg_loss_across_batches = running_loss / len(test_loader)

    print('Val Loss: {0:.3f}'.format(avg_loss_across_batches))
    print('***************************************************')
    print()