# LSTM model
This notebook deals with training & testing an LSTM model for bitcoin price prediction. We'll use PyTorch and particularly PyTorch Lightning modules to build the network.

In [122]:
# import necessary libraries and read data
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from pytorch_lightning.loggers import CSVLogger
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
# df = pd.read_csv('data/raw.csv').drop(columns='market_caps')

### Preprocessing & Setup
The LSTM takes data in the format `(batch_len, seq_len, n_features)`, so we'll do some data preprocessing to get it into that format (soon). In this case, the batch length is the number of labeled samples, and the sequence length is the number of timesteps per sample.

In [111]:
class BTCDataset(Dataset):
    
    def __init__(self, train_file):
        X_raw = np.load(train_file)
        scaler = MinMaxScaler()
        X_scaled = scaler.fit_transform(X_raw)

        # Step 2: Label based on average of next <avg_window> open prices, & truncate data to match
        # Labels are {-1, 1} = {sell, buy}; as class indices, these become {0, 1} = {sell, buy}
        avg_window = 10 # can't be longer than sequence length
        windowed_size = len(X_scaled) - avg_window
        avgs = np.zeros(windowed_size)
        labels = [0]*windowed_size
        for i in range(windowed_size):
            avgs[i] = np.average(X_scaled[i:i + avg_window, 0])
            labels[i] = int(np.sign(avgs[i] - X_scaled[i, 0]))
            if(labels[i] < 0): labels[i] = 0

        # columns = ['open', 'high', 'low', 'close', 'volume', 'EMA diff']

        # Step 3: divide into sequences and batch
        self.seq_len = 60
        batch_len = len(X_scaled) - seq_len
        self.n_features = 6
        X_batched = np.zeros((batch_len, self.seq_len, self.n_features))
        for i in range(batch_len):
            X_batched[i] = X_scaled[i:i + self.seq_len]

        self.x_data = torch.Tensor(X_batched)
        self.y_data = torch.Tensor(labels).type(torch.LongTensor)
        
        
    def __len__(self):
        return len(self.x_data)
        
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        X = self.x_data[idx]
        y = self.y_data[idx]
        return X, y

In [142]:
# splitting data at random into train/test sets, since we've fixed all the time-dependencies
class BTCData(pl.LightningDataModule):
    def __init__(self, filename):
        super().__init__()
        self.scaler = MinMaxScaler()
        self.filename = filename
        self.batch_size = 1
        
    def load_data(self):
        return np.load(self.filename)
    
    def make_dataset(self, X, y):
        X = torch.Tensor(X)
        y = torch.Tensor(y).type(torch.LongTensor)
        
        return TensorDataset(X, y)
        
    def setup(self, stage):
        data = self.load_data()
        data = self.scaler.fit_transform(data)
        
        avg_window = 10 # can't be longer than sequence length
        windowed_size = len(data) - avg_window
        avgs = np.zeros(windowed_size)
        labels = [0]*windowed_size
        for i in range(windowed_size):
            avgs[i] = np.average(data[i:i + avg_window, 0])
            labels[i] = int(np.sign(avgs[i] - data[i, 0]))
            if(labels[i] < 0): labels[i] = 0
                
        self.seq_len = 60
        batch_len = len(data) - seq_len
        self.n_features = 6
        X_batched = np.zeros((batch_len, self.seq_len, self.n_features))
        for i in range(batch_len):
            X_batched[i] = data[i:i + self.seq_len]
        labels = labels[:batch_len]
            
        X_train, X_test, y_train, y_test = train_test_split(X_batched, labels, test_size=0.2)
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.125)
        
        self.train = self.make_dataset(X_train, y_train)
        self.val = self.make_dataset(X_val, y_val)
        self.test = self.make_dataset(X_test, y_test)
        
    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size)
    
    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size)
    
    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size)
        

In [147]:
# lightning module structure
class LSTM_Classifier(pl.LightningModule):
    def __init__(self, n_features, hidden, seq_len, classes):
        super().__init__()
        
        self.hidden = hidden
        self.seq_len = seq_len
        self.n_classes = classes
        
        # lstm layer and linear hidden-state to classes layer
        # lstm inputs a batch of samples of shape (seq_len, n_features),
        #   outputs 1 hidden state of shape (seq_len, hidden_size)
        self.lstm = nn.LSTM(input_size=n_features, hidden_size=2, batch_first=True)
        # change the output to 2
        self.h2c = nn.Linear(hidden * seq_len, self.n_classes)
    
    def accuracy(self, y_hat, y):
        return (y == y_hat.round()).to(torch.float32).mean()
    
    # forward step - classification
    def forward(self, X):
        lstm_out, _ = self.lstm(X)
        class_preds = self.h2c(lstm_out[-1].view((-1, self.hidden * self.seq_len)))
        return F.softmax(class_preds, -1)
        
    def training_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        
        train_loss = F.cross_entropy(y_hat.view((1, self.n_classes)), y)
        acc = self.accuracy(y_hat, y)
        result = pl.TrainResult(train_loss)
        result.log('train_loss', train_loss)
        result.log('train_accuracy', acc, prog_bar=True)
        return train_loss
    
    def validation_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        
        val_loss = F.cross_entropy(y_hat.view((1, self.n_classes)), y)
        acc = self.accuracy(y_hat, y)
        result = pl.EvalResult(val_loss)
        result.log('val_loss', val_loss)
        result.log('val_accuracy', acc, prog_bar=True)
        return val_loss
    
    def test_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self.forward(X)
        
        test_loss = F.cross_entropy(y_hat.view((1, self.n_classes)), y)
        acc = self.accuracy(y_hat, y)
        result = pl.EvalResult(test_loss)
        result.log('test_loss', test_loss)
        result.log('train_accuracy', acc, prog_bar=True)
        return test_loss
        
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

                                           

Notes/TODO:
- Softmax is good for multiclass, and works well with cross-entropy loss. However, the torch cross-entropy loss fn. expects scores $0 \leq s \leq 1$ for each possible class. I've added a linear layer that maps LSTM output to a score for each class, and applied the softmax activation fn. to it. I also changed the label for "buy" from -1 to 2, to be within the expected $0 \leq l \leq C - 1$ range.
- Apparently I need to add an explicit dimension for softmax - this might just be a -1 at the end of the call
- I should also implement validation for the LSTM, so it's not just train & test & hope for the best

### Training
Now we're ready to actually train the model. I'll use the PL Trainer module to handle training.

In [148]:
pl.seed_everything(4622)

# feature params
n_features = 6
hidden = 2
seq_len = 60
classes = 2

# instantiate dataset, dataloader, model, and trainer
logger = CSVLogger("logs", name="lstm")
train_ds = BTCData('data/LSTM_EMA_diff_data.npy')
lstm_model = LSTM_Classifier(n_features, hidden, seq_len, classes)
trainer = pl.Trainer(deterministic=True, logger=logger, gpus=int(torch.cuda.is_available()))

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [149]:
trainer.fit(lstm_model, train_ds)


  | Name | Type   | Params
--------------------------------
0 | lstm | LSTM   | 80    
1 | h2c  | Linear | 242   


Epoch 0:   0%|▏                                              | 743/150433 [00:09<32:33, 76.62it/s, loss=0.700, v_num=6]

Saving latest checkpoint..


Epoch 0:   0%|▏                                              | 743/150433 [00:09<32:36, 76.49it/s, loss=0.700, v_num=6]


1

In [None]:
save_path = 'models/lstm_done_poorly.pt'
torch.save(lstm_model, save_path)