# PyTorch Tutoria: 02 Dataset and Iterator
## Overview 
In this tutorial, we will cover the basics of constructing dataset and iterators so that we can train models using gradient descent. 

The best tutorial can be found in the official website (https://pytorch.org/tutorials/beginner/data_loading_tutorial.html).

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader

In [2]:
class MyStupidDataset(Dataset):
    def __init__(self):
        super(MyStupidDataset, self).__init__()
        self.data = torch.randn([1024, 10, 10])
    def __len__(self):
        return 1024
    def __getitem__(self, idx):
        return self.data[idx, :, :] 

In [3]:
my_stupid_dataset = MyStupidDataset()

In [4]:
my_data_loader = DataLoader(my_stupid_dataset, batch_size=64, shuffle=True)

In [5]:
for i in my_data_loader:
    print(i)

tensor([[[ 2.0829,  1.4157, -0.0795,  ..., -0.5246, -2.0926,  0.2937],
         [-1.0649,  0.0584, -0.9167,  ...,  1.1136, -0.3728,  0.4191],
         [ 0.5729, -1.2280, -0.5642,  ...,  1.6796,  0.1244, -0.2625],
         ...,
         [-0.0519, -0.8040, -0.3219,  ...,  0.1253,  0.5020,  0.2908],
         [-0.1032,  0.5154,  1.1972,  ...,  1.3615,  0.7099,  0.3611],
         [-0.5986,  0.1679,  1.4102,  ...,  0.6437,  0.2049, -0.5396]],

        [[ 0.1181, -0.6819,  1.1765,  ...,  0.7980,  0.0276, -0.2738],
         [ 1.4717, -0.7874,  0.6023,  ...,  0.4631,  0.6165, -1.2731],
         [ 1.1521,  1.1743, -0.7890,  ...,  0.3080, -1.1129,  0.5252],
         ...,
         [-2.1914,  0.7708,  1.0579,  ...,  0.5082, -0.4315,  0.8776],
         [-0.4624, -1.0397,  0.1381,  ...,  0.2732, -0.0526,  0.6940],
         [-0.5307,  2.2234,  0.8423,  ..., -0.4978,  1.3656,  0.2588]],

        [[ 0.7895, -0.1815, -1.7783,  ...,  0.2025,  0.6794,  1.1208],
         [-0.9830, -1.2404, -1.0413,  ...,  1

In [6]:
# A common Pattern
class MyDictDataset(Dataset):
    def __init__(self):
        super(MyDictDataset, self).__init__()
        self.x = torch.randn(1024, 10)
        self.y = torch.randn(1024)
    def __len__(self):
        return 1024
    def __getitem__(self, idx):
        return {'x':self.x[idx,:],'y':self.y[idx]}

In [7]:
my_dict_dataset = MyDictDataset()
my_data_loader = DataLoader(my_dict_dataset, batch_size=64, shuffle=True)
for batch in my_data_loader:
    print(batch['x'])
    print(batch['y'])

tensor([[ 9.8082e-01, -2.8023e-01, -1.2525e+00, -3.6965e-01,  1.8818e-01,
         -1.6073e+00,  9.3212e-01, -6.9147e-01, -3.3545e-02,  2.2559e-01],
        [-2.1869e+00,  4.7473e-01,  3.7868e-03, -4.0867e-01,  1.6978e+00,
          2.4035e+00,  1.2119e+00,  1.6577e-01, -6.0310e-01, -2.3801e-01],
        [ 1.0599e-02, -3.6531e-01, -6.1027e-01,  9.3681e-01, -1.5954e-01,
          1.0503e+00,  2.0387e+00,  3.9710e-01, -1.1716e+00,  9.8480e-01],
        [ 2.4740e-01,  2.1640e-01,  6.1070e-01, -5.7585e-01,  2.1275e+00,
         -5.2992e-01, -4.9332e-01,  4.5006e-02,  4.5715e-02, -1.1354e+00],
        [-1.1164e+00,  5.4933e-01,  1.8641e+00,  1.8800e+00, -2.9633e-01,
          5.0617e-01, -8.1875e-02,  1.3482e+00,  1.6780e+00, -1.9506e-01],
        [ 9.7563e-01,  6.3292e-02,  7.3559e-01,  1.1803e+00,  1.6200e+00,
          3.4809e-01, -7.0261e-01, -9.4948e-01,  5.2532e-01,  7.3993e-01],
        [-4.5824e-01, -2.0280e-01, -3.5606e-01,  2.2650e-02, -4.9376e-01,
          2.2427e-02,  3.7425e-0

tensor([[-1.1948e-01, -3.3844e-01, -1.0990e+00, -1.3620e+00,  1.5962e+00,
          4.0207e-01,  1.7554e+00,  7.8194e-01,  2.3851e+00, -1.2792e+00],
        [ 6.7720e-01, -2.3028e-02,  2.8391e-01, -2.6081e-01, -3.0357e-01,
          3.0295e-01,  6.1715e-01, -1.2074e+00, -1.3609e+00,  5.9287e-01],
        [-1.3126e+00, -4.1483e-01, -5.0597e-01,  3.7362e-01, -1.5332e+00,
         -1.1719e+00,  9.1465e-01, -1.8734e+00, -5.8218e-01,  1.3103e+00],
        [-1.3817e-01, -2.3106e-01,  1.6226e+00, -1.0186e+00, -7.0677e-01,
         -1.1695e+00,  1.8919e+00, -8.8633e-01, -1.0983e+00,  1.5716e+00],
        [ 1.2546e-01, -3.1749e-03,  3.7619e-01, -1.2693e+00,  8.4616e-01,
         -3.2801e-01,  7.9962e-02, -2.6675e-01, -5.1432e-01,  7.6496e-01],
        [-1.2961e+00, -7.1657e-01, -6.0917e-01, -7.5575e-01, -6.5881e-01,
         -2.5216e-01, -5.5920e-01,  1.9096e-01,  2.0621e+00, -4.4936e-01],
        [ 2.9514e+00, -1.0681e+00, -1.4430e+00,  1.8782e-01, -3.0654e-01,
          5.3518e-01,  6.8356e-0

In [8]:
from torch.utils.data import TensorDataset
x = torch.randn(10,100)
y = torch.randn(10)
tensor_dataset = TensorDataset(x, y)