## Set up the imports

In [514]:
import pandas as pd
import numpy as np
from pathlib import Path
from matplotlib import pyplot as plt
import math
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateLogger, ModelCheckpoint

%matplotlib inline

import the local modules

In [515]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from indicators import (indicators, normalize)

## load files into dataframe

In [516]:
parent_dir = Path("../data")
files = ["xtbusd_1h_2019.csv"]
candle_patterns = indicators.DEFAULT_CANDLE_PATTERNS

`load_df` loads the csv files into a single dataframe

In [517]:
def load_df() -> pd.DataFrame:
    _df = None
    for f in files:
        print(parent_dir / f)
        _file_df = pd.read_csv(parent_dir / f, dtype=np.float64)
        if _df is None:
            _df = _file_df
            continue
        _df = pd.concat([_df, _file_df])

    return _df


run `load_df` to load the csv files into `df`

In [518]:
df = load_df()
df

../data/xtbusd_1h_2019.csv


Unnamed: 0.1,Unnamed: 0,Time,Period,Open,High,Low,Close,PriceAverage,TradeAverage,SellAverage,BuyAverage,TradeMedian,Volume,TradeCount
0,0.0,1.546322e+27,3.600000e+12,3689.3,3694.1,3687.4,3691.9,3691.438947,1328.853288,1199.975765,1535.410414,195.958810,68.376207,190.0
1,1.0,1.546326e+27,3.600000e+12,3691.8,3719.4,3691.8,3706.2,3704.395622,1415.701715,985.041760,1636.283643,295.439626,165.736900,434.0
2,2.0,1.546330e+27,3.600000e+12,3706.1,3725.3,3701.1,3714.4,3714.696651,2012.671204,2327.584749,1821.300665,375.295800,113.205647,209.0
3,3.0,1.546333e+27,3.600000e+12,3714.0,3714.0,3695.1,3699.7,3706.429341,1563.477233,1410.126715,1932.770317,386.131200,70.452809,167.0
4,0.0,1.546333e+27,3.600000e+12,3699.7,3699.7,3695.2,3696.9,3698.131183,843.513573,962.383404,671.465135,358.706000,21.213916,93.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15145,0.0,1.577830e+27,3.600000e+12,7178.0,7178.0,7176.2,7176.2,7177.783333,2719.207747,4898.045592,1992.928465,861.360000,4.545977,12.0
15146,1.0,1.577833e+27,3.600000e+12,7176.2,7176.2,7163.3,7168.3,7169.355844,1478.774692,2741.994702,505.950087,201.523750,31.768837,154.0
15147,2.0,1.577837e+27,3.600000e+12,7168.3,7170.5,7153.8,7155.7,7157.538202,1896.904483,2511.989348,1491.941777,219.619947,70.761775,267.0
15148,3.0,1.577840e+27,3.600000e+12,7155.7,7200.0,7150.0,7194.4,7180.951515,3058.986555,3334.343040,2906.730617,309.099528,224.921206,528.0


## Add chart patterns to the dataframe
`add_chart_patterns` adds all candle patterns to the chart

In [519]:
def add_chart_patterns(df: pd.DataFrame) -> pd.DataFrame:
    cndl_p = indicators.CandlePatterns(norm=True)
    return cndl_p.concat(df, open=df["Open"], high=df["High"], low=df["Low"], close=df["Close"])

run `add_chart_patterns` to add the patterns to the current df


In [520]:
df = add_chart_patterns(df)
df[df["CandlePatterns DOJI"] == 1]["CandlePatterns DOJI"]

21       1.0
22       1.0
26       1.0
27       1.0
30       1.0
        ... 
15108    1.0
15130    1.0
15135    1.0
15141    1.0
15145    1.0
Name: CandlePatterns DOJI, Length: 2473, dtype: float64

## Add the target

set up the labels, we'll look into price direction at close of the next candle

In [521]:
targets = df['Close'].shift(-1)
df['close_target'] = targets
df[['close_target', 'Close']]

Unnamed: 0,close_target,Close
0,3706.2,3691.9
1,3714.4,3706.2
2,3699.7,3714.4
3,3696.9,3699.7
4,3705.8,3696.9
...,...,...
15145,7168.3,7176.2
15146,7155.7,7168.3
15147,7194.4,7155.7
15148,7198.4,7194.4


In [522]:
df["target"] = np.where(df['close_target'] > df['Close'], 1, 0)
df

Unnamed: 0.1,Unnamed: 0,Time,Period,Open,High,Low,Close,PriceAverage,TradeAverage,SellAverage,...,CandlePatterns STICKSANDWICH,CandlePatterns TAKURI,CandlePatterns TASUKIGAP,CandlePatterns THRUSTING,CandlePatterns TRISTAR,CandlePatterns UNIQUE3RIVER,CandlePatterns UPSIDEGAP2CROWS,CandlePatterns XSIDEGAP3METHODS,close_target,target
0,0.0,1.546322e+27,3.600000e+12,3689.3,3694.1,3687.4,3691.9,3691.438947,1328.853288,1199.975765,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,3706.2,1
1,1.0,1.546326e+27,3.600000e+12,3691.8,3719.4,3691.8,3706.2,3704.395622,1415.701715,985.041760,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,3714.4,1
2,2.0,1.546330e+27,3.600000e+12,3706.1,3725.3,3701.1,3714.4,3714.696651,2012.671204,2327.584749,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,3699.7,0
3,3.0,1.546333e+27,3.600000e+12,3714.0,3714.0,3695.1,3699.7,3706.429341,1563.477233,1410.126715,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,3696.9,0
4,0.0,1.546333e+27,3.600000e+12,3699.7,3699.7,3695.2,3696.9,3698.131183,843.513573,962.383404,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,3705.8,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15145,0.0,1.577830e+27,3.600000e+12,7178.0,7178.0,7176.2,7176.2,7177.783333,2719.207747,4898.045592,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,7168.3,0
15146,1.0,1.577833e+27,3.600000e+12,7176.2,7176.2,7163.3,7168.3,7169.355844,1478.774692,2741.994702,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,7155.7,0
15147,2.0,1.577837e+27,3.600000e+12,7168.3,7170.5,7153.8,7155.7,7157.538202,1896.904483,2511.989348,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,7194.4,1
15148,3.0,1.577840e+27,3.600000e+12,7155.7,7200.0,7150.0,7194.4,7180.951515,3058.986555,3334.343040,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,7198.4,1


## Add Indicators

In [523]:
sma_set = [
    indicators.SMA(12),
    indicators.SMA(24),
    indicators.SMA(50),
]

ema_set = [
    indicators.EMA(12),
    indicators.EMA(24),
    indicators.EMA(50),
]

df = indicators.ADOSC(3, 10, "ADSOC").concat(df, df["High"], df["Low"], df["Close"], df["Volume"])
df = indicators.OBV("OBV").concat(df, df["Close"], df["Volume"])
df = indicators.Set(ema_set, "EMA").concat(df, df["Close"])
df = indicators.Set(sma_set, "SMA").concat(df, df["Close"])
df = indicators.Set(ema_set, "EMA Buy Avg").concat(df, df["BuyAverage"])
df = indicators.Set(ema_set, "EMA Sell Avg").concat(df, df["SellAverage"])
df = indicators.Set([
    indicators.BBANDS(16, 2),
    indicators.BBANDS(16, 3),
], "BBANDS").concat(df, df["Close"])
df = indicators.Set([
    indicators.ATR(14),
    indicators.KelBbandDif(),
]).concat(df, df["High"], df["Low"], df["Close"])
df.dropna(inplace=True)
df

Unnamed: 0.1,Unnamed: 0,Time,Period,Open,High,Low,Close,PriceAverage,TradeAverage,SellAverage,...,EMA Sell Avg p=50,"BBANDS UPPER p=16, d=2.0","BBANDS MID p=16, d=2.0","BBANDS LOWER p=16, d=2.0","BBANDS UPPER p=16, d=3.0","BBANDS MID p=16, d=3.0","BBANDS LOWER p=16, d=3.0",ATR p=14,"KelBbandDif Upper p=20, bd=2.0, km=2","KelBbandDif Lower p=20, bd=2.0, km=2"
49,1.0,1.546445e+27,3.600000e+12,3820.0,3889.9,3816.0,3883.9,3850.552047,2166.075787,1326.544134,...,1788.548474,3867.749540,3817.24375,3766.737960,3893.002435,3817.24375,3741.485065,42.469506,-49.558047,-36.776586
50,0.0,1.546445e+27,3.600000e+12,3883.9,3890.0,3851.9,3857.5,3869.426362,1190.805556,745.120903,...,1747.629746,3874.035050,3820.83750,3767.639950,3900.633826,3820.83750,3741.041174,42.157398,-46.327121,-31.930084
51,1.0,1.546448e+27,3.600000e+12,3857.5,3865.2,3835.0,3850.8,3849.711091,2413.965841,2458.647045,...,1775.512777,3875.850734,3825.26250,3774.674266,3901.144850,3825.26250,3749.380150,41.303299,-43.434968,-29.083363
52,0.0,1.546448e+27,3.600000e+12,3850.8,3852.0,3846.7,3849.9,3849.451667,2129.670273,2241.838688,...,1793.800068,3877.938362,3828.75000,3779.561638,3902.532544,3828.75000,3754.967456,38.731634,-38.558282,-25.012545
53,1.0,1.546452e+27,3.600000e+12,3849.9,3851.5,3832.9,3845.6,3844.052041,1471.380725,1158.860385,...,1768.900472,3879.262284,3831.46250,3783.662716,3903.162176,3831.46250,3759.762824,37.293661,-35.456898,-22.902184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15144,3.0,1.577830e+27,3.600000e+12,7157.8,7178.0,7155.6,7177.9,7167.840067,3161.733534,2171.929141,...,2254.598581,7243.180636,7179.40625,7115.631864,7275.067829,7179.40625,7083.744671,25.982908,16.421661,7.387889
15145,0.0,1.577830e+27,3.600000e+12,7178.0,7178.0,7176.2,7176.2,7177.783333,2719.207747,4898.045592,...,2358.263170,7233.953778,7175.98750,7118.021222,7262.936917,7175.98750,7089.038083,24.255557,17.433673,10.273594
15146,1.0,1.577833e+27,3.600000e+12,7176.2,7176.2,7163.3,7168.3,7169.355844,1478.774692,2741.994702,...,2373.311465,7222.456191,7172.03750,7121.618809,7247.665536,7172.03750,7096.409464,23.444446,16.937237,11.585260
15147,2.0,1.577837e+27,3.600000e+12,7168.3,7170.5,7153.8,7155.7,7157.538202,1896.904483,2511.989348,...,2378.749813,7211.061086,7167.79375,7124.526414,7232.694754,7167.79375,7102.892746,22.962700,15.547888,11.916100


 ## Create the dataset

In [524]:
from torch.utils.data import Dataset, random_split, DataLoader

In [525]:
def split_df_by_col(col_starts_with: [], source_df: pd.DataFrame):
    cols = []
    for c in source_df.columns:
        for c_start in col_starts_with:
            if c.startswith(c_start):
                if c_start not in cols:
                    cols.append(c)

    if len(cols) < 0:
        return None

    return source_df[cols]

In [526]:
class CryptoChartDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, window: int, target_col="target"):
        self._df: pd.DataFrame = dataframe
        self._window = window
        self._target_col = target_col
        self._len = len(dataframe) - window
        self._loaded = {}
    
    def __len__(self):
        return self._len
    
    def load_idx(self, idx):
        # df window
        dfw: pd.DataFrame = self._df.iloc[idx:idx+self._window]
        # get the target
        y_hat = dfw[self._target_col].tail(1)
        
        # process the window
        result_df: pd.Dataframe = split_df_by_col(["CandlePatterns"], dfw)

        for group in [
            ["KelBbandDif"],
            ["ATR"],
            ["EMA", "SMA", "BBANDS"], #ema and sma patterns
        ]:
            result_df = pd.concat((result_df, normalize.min_max_dataframe(split_df_by_col(group, dfw))),
                                  axis=1, join="outer")
            
        self._loaded[idx] = (torch.tensor(result_df.to_numpy(), dtype=torch.float32),
                             torch.tensor(y_hat.to_numpy(), dtype=torch.float32))
    
    def __getitem__(self, idx):
        if idx not in self._loaded.keys():
            self.load_idx(idx)
        return self._loaded[idx]
        

In [527]:
dataset = CryptoChartDataset(df, 100, "target")

In [528]:
example = dataset.__getitem__(len(dataset)-1)
example[0], example[0].shape, example[1].shape

(tensor([[0.5000, 0.5000, 0.5000,  ..., 0.9728, 0.9649, 0.9571],
         [0.5000, 0.5000, 0.5000,  ..., 0.9727, 0.9650, 0.9573],
         [0.5000, 0.5000, 0.5000,  ..., 0.9723, 0.9651, 0.9580],
         ...,
         [0.5000, 0.5000, 0.5000,  ..., 0.9570, 0.9429, 0.9288],
         [0.5000, 0.5000, 0.5000,  ..., 0.9545, 0.9423, 0.9300],
         [0.5000, 0.5000, 0.5000,  ..., 0.9521, 0.9416, 0.9310]]),
 torch.Size([100, 82]),
 torch.Size([1]))

### Split the dataset

In [529]:
val30 = math.floor(len(dataset) * .30)
train_set, val_set = random_split(dataset, [len(dataset) - val30, val30])
len(train_set), len(val_set)


(10500, 4500)

### configure cuda

In [530]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

### the GRUNet lightning model

In [531]:
class GRUNet(pl.LightningModule):
    batch_size = 10
    learning_rate = 1e-3

    def __init__(self, train_dataset: Dataset, val_dataset: Dataset, hidden_dim, n_layers, drop_prob=0.2):
        super(GRUNet, self).__init__()

        self.train_loader = DataLoader(train_dataset, shuffle=True, batch_size=self.batch_size)
        self.val_loader = DataLoader(val_dataset, shuffle=False, batch_size=self.batch_size)

        # input dim pulled dynamically from the training dataset
        _input_dim = train_dataset.__getitem__(0)[0].shape[1]

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.gru = nn.GRU(_input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, 1)
        self.relu = nn.ReLU()

    def val_dataloader(self) -> DataLoader:
        return self.val_loader

    def train_dataloader(self) -> DataLoader:
        return self.train_loader

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(self.relu(out[:, -1]))
        return out

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        lr_scheduler = {'scheduler': torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.95),
                        'name': 'expo_lr'}
        return [optimizer], [lr_scheduler]

    def _step(self, batch):
        x, y = batch
        out = self.forward(x)
        return F.mse_loss(out, y)

    def training_step(self, batch, batch_idx):
        loss = self._step(batch)
        result = pl.TrainResult(loss)
        result.log('train_loss', loss)
        return result

    def validation_step(self, val_batch, batch_idx):
        loss = self._step(val_batch)
        result = pl.EvalResult(checkpoint_on=loss)
        result.log('val_loss', loss)
        return result

    def test_step(self, test_batch, batch_idx):
        loss = self._step(test_batch)
        result = pl.TrainResult(loss)
        result.log('test_loss', loss)
        return result
    #
    # def validation_epoch_end(self, outputs):
    #     avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
    #     return {'avg_val_loss': avg_loss, 'log': {'val_loss': avg_loss}}
    #
    # def test_epoch_end(self, outputs):
    #     avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
    #     return {'avg_test_loss': avg_loss, 'log': {'test_loss': avg_loss}}

## Training!

In [532]:
pl.seed_everything(1)

model = GRUNet(train_dataset=train_set,
               val_dataset=val_set,
               hidden_dim=1000,
               n_layers=10)

# Learning Rate Logger
lr_logger = LearningRateLogger()

# Set Early Stopping
early_stopping = EarlyStopping('val_loss', mode='min', patience=5)

trainer = pl.Trainer(max_epochs=1, profiler=True, callbacks=[lr_logger],
                     early_stop_callback=early_stopping,
                     # checkpoint_callback=checkpoint_callback,
                     default_root_dir="../models") #gpus=1

trainer.fit(model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores

  | Name | Type   | Params
--------------------------------
0 | gru  | GRU    | 57 M  
1 | fc   | Linear | 1 K   
2 | relu | ReLU   | 0     
Saving latest checkpoint..


Profiler Report

Action              	|  Mean duration (s)	|  Total time (s) 
-----------------------------------------------------------------
on_validation_epoch_start	|  1.5475e-05     	|  1.5475e-05     
on_validation_epoch_end	|  1.846e-05      	|  1.846e-05      
on_train_start      	|  0.020851       	|  0.020851       
on_epoch_start      	|  0.0023782      	|  0.0023782      
on_train_epoch_start	|  1.3766e-05     	|  1.3766e-05     
get_train_batch     	|  0.085098       	|  3.8294         
on_batch_start      	|  1.878e-05      	|  0.00084508     
on_train_batch_start	|  6.705e-06      	|  0.00030172     
model_forward       	|  0.56313        	|  25.341         
model_backward      	|  5.9318         	|  266.93         
on_after_bac

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…




1