# Testing of food101 dataset

## Import

In [14]:
from torchvision import datasets
from torchvision.transforms import ToTensor, Compose, Resize
from torch.utils.data import DataLoader, random_split

## Parameter declaration

In [15]:
DATA_PATH = "../data/food101/"
VALIDATION_SPLIT = 0.2
BATCH_SIZE = 16
NUM_WORKERS = 1


## Message function

In [16]:


def msg(
    message: str,
):
    """
    Input:
        message (str): a message of type string, which will be printed to the terminal
            with some decoration.

    Description:
        This function takes a message and prints it nicely

    Output:
        This function has no output, it prints directly to the terminal
    """

    # word_list makes sure that the output of msg is more readable
    sentence_list = message.split(sep="\n")
    # the max-function can apparently be utilised like this:
    longest_sentence = max(sentence_list, key=len)

    n = len(longest_sentence)
    n2 = n // 2 - 1
    print(">" * n2 + "  " + "<" * n2)
    print(message)
    print(">" * n2 + "  " + "<" * n2 + "\n")



## Preparing data

In [17]:
trainval_set = datasets.Food101(
    root = DATA_PATH,
    split = "train",                         
    transform = Compose([ToTensor(), Resize([512, 512])]), 
    download = True
    )

test_set = datasets.Food101(
    root = DATA_PATH, 
    split = "test", 
    transform =  Compose([ToTensor(), Resize([512, 512])]),
    download = True
    )

# Creating data indices for training and validation splits:
train_num = int(len(trainval_set) * (1 - VALIDATION_SPLIT))
train_set, val_set = random_split(trainval_set, [train_num, len(trainval_set) - train_num])
msg("Split train data into trainset and validation set.")

train_loader = DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    )

val_loader = DataLoader(
    val_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    )

test_loader = DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=False,
    )


>>>>>>>>>>>>>>>>>>>>>>>>  <<<<<<<<<<<<<<<<<<<<<<<<
Split train data into trainset and validation set.
>>>>>>>>>>>>>>>>>>>>>>>>  <<<<<<<<<<<<<<<<<<<<<<<<



### Iterable check
It looks like a normal data set, when I pick them out as Tensors:

In [18]:
blaBla = trainval_set[1]
print(blaBla)

(tensor([[[0.9412, 0.9490, 0.9569,  ..., 0.8314, 0.8039, 0.7882],
         [0.9412, 0.9490, 0.9569,  ..., 0.8471, 0.8275, 0.8118],
         [0.9451, 0.9490, 0.9529,  ..., 0.8431, 0.8235, 0.8078],
         ...,
         [0.8000, 0.8118, 0.8235,  ..., 0.6039, 0.5922, 0.5843],
         [0.8000, 0.8157, 0.8275,  ..., 0.5725, 0.5686, 0.5647],
         [0.8039, 0.8196, 0.8314,  ..., 0.5647, 0.5725, 0.5804]],

        [[0.9451, 0.9529, 0.9608,  ..., 0.8824, 0.8549, 0.8392],
         [0.9451, 0.9529, 0.9608,  ..., 0.8980, 0.8784, 0.8627],
         [0.9490, 0.9529, 0.9569,  ..., 0.8941, 0.8745, 0.8588],
         ...,
         [0.8588, 0.8706, 0.8745,  ..., 0.6078, 0.5961, 0.5882],
         [0.8588, 0.8745, 0.8784,  ..., 0.5765, 0.5725, 0.5686],
         [0.8627, 0.8784, 0.8824,  ..., 0.5686, 0.5765, 0.5843]],

        [[0.9529, 0.9608, 0.9686,  ..., 0.9137, 0.8863, 0.8706],
         [0.9529, 0.9608, 0.9686,  ..., 0.9294, 0.9098, 0.8941],
         [0.9569, 0.9608, 0.9647,  ..., 0.9255, 0.9059, 0

But this is funky beyond my comprehension. Why can I all of a sudden not iterate on the loaders??

In [23]:
train_iter = iter(train_loader)
bla = train_iter.next()
print(bla[0].shape)
print(bla[1].shape)

torch.Size([16, 3, 512, 512])
torch.Size([16])
