# Part A: Basic Map-Style Dataset

In [1]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch

In [2]:
df = pd.read_csv('../../data/raw/BTCUSDT.csv', sep='|', header=None, usecols=[0, 1, 2, 3, 4, 5], 
                 names=['timestemp', 'open', 'high', 'low', 'close', 'volume'], nrows=1000)
df['timestemp'] = pd.to_datetime(df['timestemp'], unit='s')

tensor_data = torch.tensor(df.set_index('timestemp').sort_index().values, dtype=torch.float32)

In [3]:
class BasicMarketDataset(Dataset):
    def __init__(self, data):
        super().__init__()

        self.data = data

    def __len__(self):
        return len(self.data) - 1

    def __getitem__(self, idx):
        if idx < 0:
            idx += len(self)
        x = self.data[idx]
        y = self.data[idx+1, 3]
        return x, y

In [4]:
data = BasicMarketDataset(tensor_data)

In [5]:
len(data)

999

In [6]:
x_first, y_first = data[0]
x_first.shape, y_first.shape

(torch.Size([5]), torch.Size([]))

In [7]:
x_first, y_first

(tensor([4.2615e+03, 4.2615e+03, 4.2615e+03, 4.2615e+03, 1.7752e+00]),
 tensor(4261.4800))

In [8]:
x_last, y_last = data[-1]
x_last.shape, y_last.shape

(torch.Size([5]), torch.Size([]))

In [9]:
x_last, y_last

(tensor([4.3019e+03, 4.3019e+03, 4.3019e+03, 4.3019e+03, 3.7240e-01]),
 tensor(4303.1802))

# Part B: Sliding Window

In [10]:
from torch.utils.data import Dataset
import torch


class SlidingWindowDataset(Dataset):
    def __init__(self, data: torch.Tensor, look_back: int, look_ahead: int, 
                 input_indices: list, target_indices: list):
        super().__init__()

        self.data = data
        self.look_back = look_back
        self.look_ahead = look_ahead
        self.input_indices = input_indices
        self.target_indices = target_indices


    def __len__(self):
        return len(self.data) - self.look_back - self. look_ahead + 1
    
    def __getitem__(self, idx: int):
        if idx < 0:
            idx += len(self)
        
        t = idx + self.look_back
        x = self.data[idx : t, self.input_indices]
        y = self.data[t : t + self.look_ahead, self.target_indices]
        return x, y


In [11]:
lb, la = 4, 3
input_indices = [0, 1, 2, 3, 4]
target_indices = [0, 1, 2, 3, 4]

windows = SlidingWindowDataset(tensor_data, look_back=lb, look_ahead=la, 
                               input_indices=input_indices, target_indices=target_indices)

In [12]:
len(windows)

994

In [13]:
torch.equal(tensor_data[:lb+la], torch.cat(windows[0]))

True

In [14]:
torch.equal(torch.cat(windows[len(windows)-1]), tensor_data[-(lb+la):])

True

In [15]:
torch.equal(torch.cat(windows[-1]), torch.cat(windows[len(windows)-1]))

True

# Part C: Data Loader

In [16]:
sequences = DataLoader(windows, 32, shuffle=False)

In [17]:
for sequence in sequences:
    x, y = sequence
    print(x.shape, y.shape)

torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size([32, 3, 5])
torch.Size([32, 4, 5]) torch.Size(