In [10]:
from sklearn.datasets import load_digits

data = load_digits()
X = data.data
y = data.target

print("Feature matrix shape:", X.shape)
print("Target vector shape:", y.shape)
print(f"Dataset Decsription: {data.DESCR}...")

Feature matrix shape: (1797, 64)
Target vector shape: (1797,)
Dataset Decsription: .. _digits_dataset:

Optical recognition of handwritten digits dataset
--------------------------------------------------

**Data Set Characteristics:**

:Number of Instances: 1797
:Number of Attributes: 64
:Attribute Information: 8x8 image of integer pixels in the range 0..16.
:Missing Attribute Values: None
:Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)
:Date: July; 1998

This is a copy of the test set of the UCI ML hand-written digits datasets
https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits

The data set contains images of hand-written digits: 10 classes where
each class refers to a digit.

Preprocessing programs made available by NIST were used to extract
normalized bitmaps of handwritten digits from a preprinted form. From a
total of 43 people, 30 contributed to the training set and different 13
to the test set. 32x32 bitmaps are divided into nonoverlapping blocks

In [9]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader

In [11]:
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

In [12]:
class DigitsDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

In [13]:
dataset = DigitsDataset(X, y)

In [17]:
# find length
# len(dataset)
# access row
# dataset[0]

In [None]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)
# shuffle
# True -> shuffle data while batching
# False -> give data in order

In [20]:
for batch_features, batch_targets in dataloader:
    print("Batch features shape:", batch_features.shape)
    print("Batch targets shape:", batch_targets.shape)
    break # printing first batch only

Batch features shape: torch.Size([2, 64])
Batch targets shape: torch.Size([2])
