# Import Modules

In [1]:
import torch

# Sample Dataset

- Sampel dataset dibuat menggunakan random number dengan ukuran 50 baris dan 5 kolom.
- Terdapat 100 observasi dan 5 kolom.

In [2]:
torch.manual_seed(42);

dummy_data = torch.rand((50, 5))
dummy_data[:10]

tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904],
        [0.6009, 0.2566, 0.7936, 0.9408, 0.1332],
        [0.9346, 0.5936, 0.8694, 0.5677, 0.7411],
        [0.4294, 0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317, 0.1053],
        [0.2695, 0.3588, 0.1994, 0.5472, 0.0062],
        [0.9516, 0.0753, 0.8860, 0.5832, 0.3376],
        [0.8090, 0.5779, 0.9040, 0.5547, 0.3423],
        [0.6343, 0.3644, 0.7104, 0.9464, 0.7890],
        [0.2814, 0.7886, 0.5895, 0.7539, 0.1952]])

# Split Dataset

In [3]:
X = dummy_data[:, 0:4]
y = dummy_data[:, 4]

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

X shape: torch.Size([50, 4])
y shape: torch.Size([50])


In [4]:
X[:10]

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.6009, 0.2566, 0.7936, 0.9408],
        [0.9346, 0.5936, 0.8694, 0.5677],
        [0.4294, 0.8854, 0.5739, 0.2666],
        [0.2696, 0.4414, 0.2969, 0.8317],
        [0.2695, 0.3588, 0.1994, 0.5472],
        [0.9516, 0.0753, 0.8860, 0.5832],
        [0.8090, 0.5779, 0.9040, 0.5547],
        [0.6343, 0.3644, 0.7104, 0.9464],
        [0.2814, 0.7886, 0.5895, 0.7539]])

In [5]:
y[:10]

tensor([0.3904, 0.1332, 0.7411, 0.6274, 0.1053, 0.0062, 0.3376, 0.3423, 0.7890,
        0.1952])

# Tensor Dataset

- Menggabungkan variabel fitur (X) dan target (y) menjadi suatu object iterable.
- Digunakan pada saat proses training, test, dan validation.

In [6]:
from torch.utils.data import TensorDataset

tensor_dataset = TensorDataset(X, y)
tensor_dataset.tensors

(tensor([[0.8823, 0.9150, 0.3829, 0.9593],
         [0.6009, 0.2566, 0.7936, 0.9408],
         [0.9346, 0.5936, 0.8694, 0.5677],
         [0.4294, 0.8854, 0.5739, 0.2666],
         [0.2696, 0.4414, 0.2969, 0.8317],
         [0.2695, 0.3588, 0.1994, 0.5472],
         [0.9516, 0.0753, 0.8860, 0.5832],
         [0.8090, 0.5779, 0.9040, 0.5547],
         [0.6343, 0.3644, 0.7104, 0.9464],
         [0.2814, 0.7886, 0.5895, 0.7539],
         [0.0050, 0.3068, 0.1165, 0.9103],
         [0.7071, 0.6581, 0.4913, 0.8913],
         [0.5315, 0.1587, 0.6542, 0.3278],
         [0.3958, 0.9147, 0.2036, 0.2018],
         [0.9497, 0.6666, 0.9811, 0.0874],
         [0.1088, 0.1637, 0.7025, 0.6790],
         [0.2418, 0.1591, 0.7653, 0.2979],
         [0.3813, 0.7860, 0.1115, 0.2477],
         [0.6057, 0.3725, 0.7980, 0.8399],
         [0.2331, 0.9578, 0.3313, 0.3227],
         [0.2137, 0.6249, 0.4340, 0.1371],
         [0.1585, 0.0758, 0.2247, 0.0624],
         [0.9998, 0.5944, 0.6541, 0.0337],
         [0

# Dataloader

- Digunakan untuk membuat dataset dapat diakses menggunakan batching. Batching adalah cara mengambil item dari kumpulan dari data secara bertahap.
- Misalnya, suatu dataset terdiri dari 50 observasi, jika diterapkan batching dengan ukuran 25, maka data akan diambil per 25 untuk setiap batch. Pada kasus ini jumlah batching adalah 2, karena 50 / 25 = 2.

In [7]:
from torch.utils.data import DataLoader

dataloader = DataLoader(tensor_dataset, batch_size=25)
data_iter = iter(dataloader)

for idx, data in enumerate(dataloader):
    print(f"Iteration:{idx + 1}\n{data}\n")

Iteration:1
[tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.6009, 0.2566, 0.7936, 0.9408],
        [0.9346, 0.5936, 0.8694, 0.5677],
        [0.4294, 0.8854, 0.5739, 0.2666],
        [0.2696, 0.4414, 0.2969, 0.8317],
        [0.2695, 0.3588, 0.1994, 0.5472],
        [0.9516, 0.0753, 0.8860, 0.5832],
        [0.8090, 0.5779, 0.9040, 0.5547],
        [0.6343, 0.3644, 0.7104, 0.9464],
        [0.2814, 0.7886, 0.5895, 0.7539],
        [0.0050, 0.3068, 0.1165, 0.9103],
        [0.7071, 0.6581, 0.4913, 0.8913],
        [0.5315, 0.1587, 0.6542, 0.3278],
        [0.3958, 0.9147, 0.2036, 0.2018],
        [0.9497, 0.6666, 0.9811, 0.0874],
        [0.1088, 0.1637, 0.7025, 0.6790],
        [0.2418, 0.1591, 0.7653, 0.2979],
        [0.3813, 0.7860, 0.1115, 0.2477],
        [0.6057, 0.3725, 0.7980, 0.8399],
        [0.2331, 0.9578, 0.3313, 0.3227],
        [0.2137, 0.6249, 0.4340, 0.1371],
        [0.1585, 0.0758, 0.2247, 0.0624],
        [0.9998, 0.5944, 0.6541, 0.0337],
        [0.3336, 0.57