# Part A: Basic Map-Style Dataset

In [188]:
from torch.utils.data import Dataset
import pandas as pd
import torch

In [189]:
df = pd.read_csv('../../data/raw/BTCUSDT.csv', sep='|', header=None, usecols=[0, 1, 2, 3, 4, 5], 
                 names=['timestemp', 'open', 'high', 'low', 'close', 'volume'], nrows=1000)
df['timestemp'] = pd.to_datetime(df['timestemp'], unit='s')

tensor_data = torch.tensor(df.set_index('timestemp').sort_index().values, dtype=torch.float32)

In [190]:
class BasicMarketDataset(Dataset):
    def __init__(self, data):
        super().__init__()

        self.data = data

    def __len__(self):
        return len(self.data) - 1

    def __getitem__(self, idx):
        if idx < 0:
            idx += len(self)
        x = self.data[idx]
        y = self.data[idx+1, 3]
        return x, y

In [191]:
data = BasicMarketDataset(tensor_data)

In [192]:
len(data)

999

In [193]:
x_first, y_first = data[0]
x_first.shape, y_first.shape

(torch.Size([5]), torch.Size([]))

In [194]:
x_first, y_first

(tensor([4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.7752e+00]),
 tensor(4261.4800))

In [195]:
x_last, y_last = data[-1]
x_last.shape, y_last.shape

(torch.Size([5]), torch.Size([]))

In [196]:
x_last, y_last

(tensor([4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 3.7240e-01]),
 tensor(4303.1802))

# Part B: Sliding Window

In [222]:
class SlidingWindowDataset(Dataset):
    def __init__(self, data, look_back, look_ahead):
        super().__init__()

        self.data = data
        self.look_back = look_back
        self.look_ahead = look_ahead

    def __len__(self):
        return len(self.data) - self.look_back - self. look_ahead + 1
    
    def __getitem__(self, idx):
        if idx < 0:
            idx += len(self)
        
        t = idx + self.look_back - 1
        x = self.data[idx : t + 1]
        y = self.data[t + 1 : t + self.look_ahead + 1]
        return x, y

In [223]:
windows = SlidingWindowDataset(tensor_data, 5, 3)

In [224]:
len(windows)

993

In [228]:
tensor_data[:8]

tensor([[4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.7752e+00],
        [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 0.0000e+00],
        [4.2806e+03, 4.2806e+03, 4.2806e+03, 4.2806e+03, 2.6107e-01],
        [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.2008e-02],
        [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.4080e-01],
        [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 0.0000e+00],
        [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 0.0000e+00],
        [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 0.0000e+00]])

In [229]:
windows[0]

(tensor([[4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.7752e+00],
         [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 0.0000e+00],
         [4.2806e+03, 4.2806e+03, 4.2806e+03, 4.2806e+03, 2.6107e-01],
         [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.2008e-02],
         [4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.4080e-01]]),
 tensor([[4261.4800, 4261.4800, 4261.4800, 4261.4800,    0.0000],
         [4261.4800, 4261.4800, 4261.4800, 4261.4800,    0.0000],
         [4261.4800, 4261.4800, 4261.4800, 4261.4800,    0.0000]]))

In [225]:
tensor_data[-8:]

tensor([[4.2871e+03, 4.2871e+03, 4.2871e+03, 4.2871e+03, 8.5157e-02],
        [4.2871e+03, 4.3030e+03, 4.2871e+03, 4.3030e+03, 5.5204e-01],
        [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 4.2023e-02],
        [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 5.2740e-02],
        [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 5.8553e-01],
        [4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 4.4585e-01],
        [4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 3.7240e-01],
        [4.3019e+03, 4.3032e+03, 4.2625e+03, 4.3032e+03, 1.1619e+00]])

In [226]:
windows[-1]

(tensor([[4.2871e+03, 4.2871e+03, 4.2871e+03, 4.2871e+03, 8.5157e-02],
         [4.2871e+03, 4.3030e+03, 4.2871e+03, 4.3030e+03, 5.5204e-01],
         [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 4.2023e-02],
         [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 5.2740e-02],
         [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 5.8553e-01]]),
 tensor([[4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 4.4585e-01],
         [4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 3.7240e-01],
         [4.3019e+03, 4.3032e+03, 4.2625e+03, 4.3032e+03, 1.1619e+00]]))

In [227]:
windows[len(windows) - 1]

(tensor([[4.2871e+03, 4.2871e+03, 4.2871e+03, 4.2871e+03, 8.5157e-02],
         [4.2871e+03, 4.3030e+03, 4.2871e+03, 4.3030e+03, 5.5204e-01],
         [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 4.2023e-02],
         [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 5.2740e-02],
         [4.3030e+03, 4.3030e+03, 4.3030e+03, 4.3030e+03, 5.8553e-01]]),
 tensor([[4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 4.4585e-01],
         [4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 3.7240e-01],
         [4.3019e+03, 4.3032e+03, 4.2625e+03, 4.3032e+03, 1.1619e+00]]))