In [7]:
import os
import pandas as pd
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset

In [8]:
data_path = "../data/gbpusd"
files = list(os.walk(data_path))[0][2]
dfs = []
for file in files:
    if "_m15_" in file:
        _df = pd.read_csv(os.path.join(data_path,file),sep=";")
        _df["FILE"] = file
        dfs.append(_df)
df = pd.concat(dfs)
df.head()

Unnamed: 0,OPEN_DATE,OPEN_TIME,CLOSE_DATE,CLOSE_TIME,OPEN,HIGH,LOW,CLOSE,VOLUME,DELTA,ASK,BID,EXPIRATION,FILE,PRICE
0,02.01.2013,13:00,02.01.2013,13:15,1.6249,1.63,1.6278,1.6297,3284,-142,1571,1713,03-13,6b_m15_20130101_20131231.csv,
1,02.01.2013,13:15,02.01.2013,13:30,1.6297,1.6303,1.6282,1.6293,2070,86,1078,992,03-13,6b_m15_20130101_20131231.csv,
2,02.01.2013,13:30,02.01.2013,13:45,1.6291,1.6314,1.6292,1.6297,2951,129,1540,1411,03-13,6b_m15_20130101_20131231.csv,
3,02.01.2013,13:45,02.01.2013,14:00,1.6302,1.6302,1.6292,1.6292,1117,-77,520,597,03-13,6b_m15_20130101_20131231.csv,
4,02.01.2013,14:00,02.01.2013,14:15,1.6292,1.6307,1.629,1.6307,977,27,502,475,03-13,6b_m15_20130101_20131231.csv,


In [10]:
df["OPEN_DATE"] = pd.to_datetime(df["OPEN_DATE"])
df = df.sort_values(by=["OPEN_DATE","OPEN_TIME"]).reset_index(drop=True)
df.head()

Unnamed: 0,OPEN_DATE,OPEN_TIME,CLOSE_DATE,CLOSE_TIME,OPEN,HIGH,LOW,CLOSE,VOLUME,DELTA,ASK,BID,EXPIRATION,FILE,PRICE
0,2012-01-02,01:00,01.02.2012,01:15,1.5764,1.5764,1.5757,1.5764,371,201,286,85,03-12,6b_m15_20120101_20121231.csv,
1,2012-01-02,01:15,01.02.2012,01:30,1.5764,1.576,1.5755,1.5764,119,-19,50,69,03-12,6b_m15_20120101_20121231.csv,
2,2012-01-02,01:30,01.02.2012,01:45,1.5764,1.576,1.5754,1.5764,188,48,118,70,03-12,6b_m15_20120101_20121231.csv,
3,2012-01-02,01:45,01.02.2012,02:00,1.5764,1.576,1.5749,1.5764,243,23,133,110,03-12,6b_m15_20120101_20121231.csv,
4,2012-01-02,02:00,01.02.2012,02:15,1.5764,1.5754,1.5747,1.5764,251,-1,125,126,03-12,6b_m15_20120101_20121231.csv,


In [11]:
df = df[df["FILE"]!='6b_cluster_m5_20130101_20131231.csv']
len(df)

453057

In [None]:
def _create_data(df, input_len, output_len):
    inputs = list()
    outputs = list()
    for i in range(len(df)):
        try:
            X = df.iloc[i:i+input_len][["OPEN","HIGH","LOW","CLOSE","VOLUME","DELTA","ASK","BID"]].values
            y = df.iloc[i+input_len:i+input_len+output_len]["CLOSE"].values
            inputs.append(X)
            outputs.append(y)
        except IndexError:
            pass
    return inputs, outputs

def create_data(file_names):

    X = []
    y = []
    
    for file_id in tqdm(file_names):
        _X, _y = _create_data(df[df["FILE"]==file_id],32,8)
        X.extend(_X)
        y.extend(_y)
    
    return X,y

train_files = df["FILE"].unique()[-3:-1]
test_files = df["FILE"].unique()[-1:]
X_train, y_train = create_data(train_files)
X_test, y_test = create_data(test_files)

In [20]:
batch_size = 32

class GBPDataset(Dataset):
    
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx])
    
    
train_dataset = GBPDataset(X_train,y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size)

val_dataset = GBPDataset(X_test,y_test)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

next(iter(val_dataset))

(tensor([[   1.4216,    1.4222,    1.4216,    1.4216,  303.0000,  171.0000,
           237.0000,   66.0000],
         [   1.4220,    1.4225,    1.4220,    1.4220,  154.0000,   40.0000,
            97.0000,   57.0000],
         [   1.4225,    1.4226,    1.4218,    1.4224,  912.0000,  232.0000,
           572.0000,  340.0000],
         [   1.4224,    1.4227,    1.4221,    1.4221,  110.0000,  -26.0000,
            42.0000,   68.0000],
         [   1.4222,    1.4235,    1.4220,    1.4234, 1107.0000,  -59.0000,
           524.0000,  583.0000],
         [   1.4233,    1.4233,    1.4224,    1.4226,  427.0000,  -73.0000,
           177.0000,  250.0000],
         [   1.4226,    1.4230,    1.4223,    1.4228,  292.0000,   12.0000,
           152.0000,  140.0000],
         [   1.4227,    1.4227,    1.4219,    1.4226,  506.0000,  -20.0000,
           243.0000,  263.0000],
         [   1.4227,    1.4227,    1.4216,    1.4218,  388.0000,  -26.0000,
           181.0000,  207.0000],
         [   1.4218

In [None]:
class StasNet(nn.Module):
    
    def __init__(self):
        super(self,StasNet).__init__()
        self.lstm1 = nn.LSTM(8, 10)
        self.lstm2 = nn.LSTM(10, 8)
                
    def forward(x):
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)
        return x[:,-1,:]