# Split Dataset into Train/Val/Test Directories

**Data loading instructions:** 

First, unzip the dataset and store it in a folder labeled VireoFood172/.
VireoFood172/ contains two sub-directories: ready_chinese_food/ and SplitAndIngreLabel/

This notebook (and its code) should be placed in a directory above VireoFood172/.

After running the cells in this notebook, you should have all the image data stored in a new Dataset/ directory that contains three sub-directories called train/, val/, and test/ which hold the training, validation, and testing images.

In [2]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import os
from PIL import Image
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import models, transforms, datasets
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader, sampler
# from skimage import io, transform 

USE_GPU = True
dtype = torch.float32
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [3]:
# Global constants
DATA_DIR = "VireoFood172"
LABELS_DIR = DATA_DIR + "/SplitAndIngreLabel" # VireoFood172/SplitAndIngreLabel
EXAMPLES_DIR = DATA_DIR + "/ready_chinese_food" # VireoFood172/ready_chinese_food

FOOD_LIST_PATH = LABELS_DIR + "/FoodList.txt" # VireoFood172/SplitAndIngreLabel/FoodList.txt
INGREDIENT_LIST_PATH = LABELS_DIR + "/IngredientList.txt" # VireoFood172/SplitAndIngreLabel/IngredientList.txt
INGRE_LABEL_PATH = LABELS_DIR + "/IngreLabel.txt"

NORMALIZE_ON = True # Enable data normalization
TRANSFORM_ON = True # Enable data transformations.

In [4]:
food_names = [] # List of length 172 where index is the food label, and value is the food name.
with open(FOOD_LIST_PATH) as fp:
    food_names = fp.read().splitlines()

ingredient_names = [] # List of length 353 where index is the ingredient label, and value is the ingredient name.
with open(INGREDIENT_LIST_PATH) as fp:
    ingredient_names = fp.read().splitlines()

print("Number of food labels {}".format(len(food_names)))
print("Number of ingredient labels {}".format(len(ingredient_names)))

Number of food labels 172
Number of ingredient labels 353


In [5]:
def get_relative_image_path(path):
    ''' Takes in a path of the form: /100/xiachufang_1.jpg
        and returns the relative path to the image: 
        "VireoFood172/ready_chinese_food/100/xiachufang_1.jpg" '''
    return EXAMPLES_DIR + path

In [6]:
def get_file_annotations(mode):
    ''' Input: mode (one of "train", "val", or "test")
        Returns image_paths: a list of all the training, validation, or testing image paths (depending
        on the specified "mode").
    '''
    assert(mode == "train" or mode == "val" or mode == "test")
    file_mode = ""
    if mode == "train":
        file_mode = "TR"
    elif mode == "val":
        file_mode = "VAL"
    elif mode == "test":
        file_mode = "TE"
    file = LABELS_DIR + "/{}.txt".format(file_mode)
    image_paths = []
    with open(file) as fp:
        for line in fp.readlines():
            image_path = line.strip()
            image_paths.append(image_path)
            image = Image.open(get_relative_image_path(image_path))
            new_image_path = "./Dataset/" + mode + image_path
            os.makedirs(os.path.dirname(new_image_path), exist_ok=True)
            image.save(new_image_path)
    return image_paths

In [7]:
train_image_paths = get_file_annotations("train")
val_image_paths = get_file_annotations("val")            
test_image_paths = get_file_annotations("test")