# Part A: Basic Map-Style Dataset

In [260]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch

In [261]:
df = pd.read_csv('../../data/raw/BTCUSDT.csv', sep='|', header=None, usecols=[0, 1, 2, 3, 4, 5], 
                 names=['timestemp', 'open', 'high', 'low', 'close', 'volume'], nrows=1000)
df['timestemp'] = pd.to_datetime(df['timestemp'], unit='s')

tensor_data = torch.tensor(df.set_index('timestemp').sort_index().values, dtype=torch.float32)

In [262]:
class BasicMarketDataset(Dataset):
    def __init__(self, data):
        super().__init__()

        self.data = data

    def __len__(self):
        return len(self.data) - 1

    def __getitem__(self, idx):
        if idx < 0:
            idx += len(self)
        x = self.data[idx]
        y = self.data[idx+1, 3]
        return x, y

In [263]:
data = BasicMarketDataset(tensor_data)

In [264]:
len(data)

999

In [265]:
x_first, y_first = data[0]
x_first.shape, y_first.shape

(torch.Size([5]), torch.Size([]))

In [266]:
x_first, y_first

(tensor([4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.7752e+00]),
 tensor(4261.4800))

In [267]:
x_last, y_last = data[-1]
x_last.shape, y_last.shape

(torch.Size([5]), torch.Size([]))

In [268]:
x_last, y_last

(tensor([4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 3.7240e-01]),
 tensor(4303.1802))

# Part B: Sliding Window

In [269]:
class SlidingWindowDataset(Dataset):
    def __init__(self, data, look_back, look_ahead):
        super().__init__()

        self.data = data
        self.look_back = look_back
        self.look_ahead = look_ahead

    def __len__(self):
        return len(self.data) - self.look_back - self. look_ahead + 1
    
    def __getitem__(self, idx):
        if idx < 0:
            idx += len(self)
        
        t = idx + self.look_back - 1
        x = self.data[idx : t + 1]
        y = self.data[t + 1 : t + self.look_ahead + 1]
        return x, y

In [270]:
lb, la = 4, 3
windows = SlidingWindowDataset(tensor_data, look_back=lb, look_ahead=la)

In [271]:
len(windows)

994

In [272]:
torch.equal(tensor_data[:lb+la], torch.cat(windows[0]))

True

In [273]:
torch.equal(torch.cat(windows[len(windows)-1]), tensor_data[-(lb+la):])

True

In [274]:
torch.equal(torch.cat(windows[-1]), torch.cat(windows[len(windows)-1]))

True

# Part C: Data Loader

In [275]:
sequences = DataLoader(windows, 32, shuffle=False)

In [276]:
for sequence in sequences:
    x, y = sequence
    print(x.shape, y.shape)

torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size(