# Testing of food101 dataset

## Import

In [5]:
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split

## Parameter declaration

In [6]:
DATA_PATH = "../data/food101/"
VALIDATION_SPLIT = 0.2
BATCH_SIZE = 16
NUM_WORKERS = 1


## Message function

In [7]:


def msg(
    message: str,
):
    """
    Input:
        message (str): a message of type string, which will be printed to the terminal
            with some decoration.

    Description:
        This function takes a message and prints it nicely

    Output:
        This function has no output, it prints directly to the terminal
    """

    # word_list makes sure that the output of msg is more readable
    sentence_list = message.split(sep="\n")
    # the max-function can apparently be utilised like this:
    longest_sentence = max(sentence_list, key=len)

    n = len(longest_sentence)
    n2 = n // 2 - 1
    print(">" * n2 + "  " + "<" * n2)
    print(message)
    print(">" * n2 + "  " + "<" * n2 + "\n")



## Preparing data

In [15]:
trainval_set = datasets.Food101(
    root = DATA_PATH,
    split = "train",                         
    transform = ToTensor(), 
    download = True
    )

test_set = datasets.Food101(
    root = DATA_PATH, 
    split = "test", 
    transform = ToTensor(),
    download = True
    )

# Creating data indices for training and validation splits:
train_num = int(len(trainval_set) * (1 - VALIDATION_SPLIT))
train_set, val_set = random_split(trainval_set, [train_num, len(trainval_set) - train_num])
msg("Split train data into trainset and validation set.")

train_loader = DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    )

val_loader = DataLoader(
    val_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    )

test_loader = DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    )


>>>>>>>>>>>>>>>>>>>>>>>>  <<<<<<<<<<<<<<<<<<<<<<<<
Split train data into trainset and validation set.
>>>>>>>>>>>>>>>>>>>>>>>>  <<<<<<<<<<<<<<<<<<<<<<<<



### Iterable check
It looks like a normal data set, when I pick them out as Tensors:

In [18]:
blaBla = trainval_set[0]
print(blaBla)

(tensor([[[0.9725, 0.9686, 0.9608,  ..., 0.3216, 0.3490, 0.3686],
         [0.9725, 0.9686, 0.9608,  ..., 0.3412, 0.3608, 0.3725],
         [0.9765, 0.9686, 0.9647,  ..., 0.3647, 0.3765, 0.3804],
         ...,
         [0.5961, 0.5922, 0.5882,  ..., 0.5804, 0.5922, 0.6078],
         [0.5882, 0.5843, 0.5765,  ..., 0.5843, 0.5922, 0.6039],
         [0.5843, 0.5765, 0.5647,  ..., 0.5922, 0.5961, 0.6039]],

        [[0.9569, 0.9529, 0.9451,  ..., 0.0941, 0.1098, 0.1294],
         [0.9569, 0.9529, 0.9451,  ..., 0.1137, 0.1216, 0.1333],
         [0.9608, 0.9529, 0.9490,  ..., 0.1373, 0.1373, 0.1412],
         ...,
         [0.2980, 0.2941, 0.2902,  ..., 0.5451, 0.5647, 0.5804],
         [0.2902, 0.2863, 0.2784,  ..., 0.5490, 0.5647, 0.5765],
         [0.2863, 0.2784, 0.2667,  ..., 0.5569, 0.5686, 0.5765]],

        [[0.9608, 0.9569, 0.9490,  ..., 0.0157, 0.0275, 0.0471],
         [0.9608, 0.9569, 0.9490,  ..., 0.0353, 0.0471, 0.0510],
         [0.9647, 0.9569, 0.9529,  ..., 0.0588, 0.0627, 0

But this is funky beyond my comprehension. Why can I all of a sudden not iterate on the loaders??

In [17]:
train_iter = iter(train_loader)
bla = train_iter.next()

RuntimeError: stack expects each tensor to be equal size, but got [3, 512, 512] at entry 0 and [3, 384, 512] at entry 2