# Project Sandbox

Space for testing code.

### Setup

In [11]:
import torch
from torchvision.transforms import transforms
from util.pytorch_dataset import image_dataset
import pandas as pd

# path to MNIST image data directory
MNIST_DIR = "/home/dylan/datasets/mnist_png/"

### Create an MNIST Image PyTorch Dataloader

The MNIST dataset has been previously written to a local directory such that each sample is saved as an image file in either a `train` or `test` directory accompanied by a `csv` file which maps image filenames to labels.

In [14]:
# read train/test label files
train_labels_df = pd.read_csv('{}train_labels.csv'.format(MNIST_DIR))
test_labels_df = pd.read_csv('{}test_labels.csv'.format(MNIST_DIR))

# convert labels column to list
train_labels = train_labels_df['Label'].to_list()
test_labels = test_labels_df['Label'].to_list()

# convert filename column to list of absolute paths
train_files = train_labels_df['Image Filename'].map(lambda x: \
    '{}train/{}'.format(MNIST_DIR, x)).to_list()
test_files = test_labels_df['Image Filename'].map(lambda x: \
    '{}test/{}'.format(MNIST_DIR, x)).to_list()

# package data to dictionary
data_lists = {
    'train_files': train_files,
    'train_labels': train_labels,
    'test_files': test_files,
    'test_labels': test_labels,
}

# get lists of training/testing labelsprint(test_img_files[-10:])

# define the transform chain to process each sample as it is passed to a batch
#   1. resize the sample (image) to 32x32 (h, w)
#   2. convert resized sample to Pytorch tensor
#   3. normalize sample values (pixel values) using mean 0.5 and stdev 0,5; [0, 255] -> [0, 1]
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [15]:
print(train_labels[:32])

[5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1, 1, 2, 4, 3, 2, 7, 3, 8]
