<a href="https://colab.research.google.com/github/alwanfa/neural-network-exploration/blob/main/torch_dataloader_anatomy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Datasets load

In [2]:
# prompt: import numpy, pandas, torch dataloader and tensor dataset

import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset


In [3]:

# Create a sample dataset
n_obeservations = 100
n_features = 20
data = np.random.rand(n_obeservations, n_features)

In [6]:
dataT = torch.tensor(data)

print('Numpy data: ')
print(type(data))
print(data.shape)
print(data.dtype)
print()

print('Torch tensor: ')
print(type(dataT))
print(dataT.shape)
print(dataT.dtype)


Numpy data: 
<class 'numpy.ndarray'>
(100, 20)
float64

Torch tensor: 
<class 'torch.Tensor'>
torch.Size([100, 20])
torch.float64


In [23]:
dataT

tensor([[0.4075, 0.2740, 0.4280,  ..., 0.0764, 0.8179, 0.8171],
        [0.7095, 0.3158, 0.3363,  ..., 0.7028, 0.7733, 0.0552],
        [0.8561, 0.4519, 0.8760,  ..., 0.0839, 0.7371, 0.8539],
        ...,
        [0.8188, 0.4282, 0.1110,  ..., 0.5945, 0.5985, 0.0471],
        [0.5895, 0.0453, 0.5771,  ..., 0.2404, 0.3429, 0.1956],
        [0.0669, 0.0146, 0.8544,  ..., 0.3187, 0.3391, 0.9278]],
       dtype=torch.float64)

### converting datatype

In [17]:
dataT2 = torch.tensor(data, dtype=torch.float32)
# dataT2 = torch.tensor(data).float() #Option2

dataT3 = torch.tensor(data, dtype=torch.long)
# dataT3 = torch.tensor(data).long() #Option2
print(f'data T2 : {dataT2.dtype}')
print(f'data T3 : {dataT3.dtype}')

data T2 : torch.float32
data T3 : torch.int64


### converting to tensor *dataset*

In [28]:
dataset = TensorDataset(dataT) # tensor dataset accept tenso type data
# dataset is two element tuple x, and y.

print(f'len dataset without label : {len(dataset.tensors)}')

len dataset without label : 1


### Tensor Dataset

In [47]:
labels = torch.ceil(torch.linspace(.01,4,n_obeservations))
# print(labels)
labels = labels.reshape((len(labels),1))

dataset = TensorDataset(dataT, labels)
print(f'len dataset with label : {len(dataset.tensors)}')
print(dataset.tensors[0].shape)
print(dataset.tensors[1].shape)

dataset[99]

len dataset with label : 2
torch.Size([100, 20])
torch.Size([100, 1])


(tensor([0.0669, 0.0146, 0.8544, 0.9757, 0.2134, 0.5268, 0.2855, 0.7775, 0.8331,
         0.4470, 0.1062, 0.1624, 0.6905, 0.9204, 0.0870, 0.7155, 0.8592, 0.3187,
         0.3391, 0.9278], dtype=torch.float64),
 tensor([4.]))

## Data Loaders

In [50]:
batchsize = 25

dataloader = DataLoader(dataset, batch_size=batchsize, shuffle=True)
dataloader.dataset.tensors[0].shape

torch.Size([100, 20])

In [55]:
for x, y in dataloader:
    print(x.shape)
    # print(x)
    print(y.shape)
    print(y.T)


torch.Size([25, 20])
torch.Size([25, 1])
tensor([[1., 4., 2., 4., 1., 2., 1., 4., 3., 3., 1., 4., 3., 4., 2., 4., 3., 2.,
         3., 4., 4., 1., 2., 4., 2.]])
torch.Size([25, 20])
torch.Size([25, 1])
tensor([[1., 3., 1., 4., 2., 4., 4., 3., 3., 4., 1., 3., 1., 2., 3., 3., 2., 1.,
         3., 1., 2., 4., 2., 2., 1.]])
torch.Size([25, 20])
torch.Size([25, 1])
tensor([[2., 3., 1., 2., 4., 4., 3., 3., 2., 3., 1., 2., 1., 3., 2., 4., 2., 2.,
         1., 3., 4., 4., 2., 2., 1.]])
torch.Size([25, 20])
torch.Size([25, 1])
tensor([[3., 4., 4., 2., 3., 4., 1., 1., 3., 1., 1., 4., 1., 2., 3., 3., 3., 4.,
         4., 3., 2., 1., 1., 2., 1.]])


In [57]:
x,y = next(iter(dataloader))
print(x.shape)
print(y.shape)

torch.Size([25, 20])
torch.Size([25, 1])
