In [1]:
import glob
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
from mpl_toolkits.axes_grid1 import ImageGrid

### Validate Data Path

In [2]:
cwd = os.getcwd()

## Place the data in Food-11 directory
data_in_dir = os.path.join(cwd, "Food-11")
assert(os.path.isdir(data_in_dir))

subdirs = {
    'train' : 'training',
    'valid' : 'validation',
    'eval'  : 'evaluation'}
dirs = os.listdir(data_in_dir)

## Validate we have these 3 subdirectories
#assert(len(dirs) == len(subdirs) and sorted(dirs) == sorted(subdirs.values()))

## Create output directory in current path to store images
image_dir = os.path.join(cwd, "food-classification-eda-images")
if not os.path.exists(image_dir): os.makedirs(image_dir)
    
## Create output directory to store the dataframes in pickle format
pickle_dir = os.path.join(cwd, "food-classification-pickle_data")
if not os.path.exists(pickle_dir): os.makedirs(pickle_dir)


### Read the images from directory

In [3]:
## training, validation and evluation data directories
training_data_dir = os.path.join(data_in_dir, subdirs['train'])
validation_data_dir = os.path.join(data_in_dir, subdirs['valid'])
evaluation_data_dir = os.path.join(data_in_dir, subdirs['eval'])

## training, validation and evluation data images
training_images = glob.glob(os.path.join(training_data_dir, "*"))
validation_images = glob.glob(os.path.join(validation_data_dir, "*"))
evaluation_images = glob.glob(os.path.join(evaluation_data_dir, "*"))

all_images = [training_images, validation_images, evaluation_images]
all_directories = [training_data_dir, validation_data_dir, evaluation_data_dir]

In [4]:
# Create dictionary of target classes
label_dict = {
 0: 'Bread',
 1: 'Dairy product',
 2: 'Dessert',
 3: 'Egg',
 4: 'Fried food',
 5: 'Meat',
 6: 'Noodles/Pasta',
 7: 'Rice',
 8: 'Seafood',
 9: 'Soup',
 10: 'Vegetable/Fruit',
}

## Prepare Training Dataframe

In [21]:
training = pd.DataFrame(training_images)
training.columns = ['Path']
training['Label'] = training.Path.apply(lambda x: os.path.basename(x).split('_')[0])
#training.describe()
countDict = training.count()
trainingCount = countDict['Path']
print(trainingCount)

print(training.Path[1])
print(training.Label[1])
myString = training.Path[1] + ' ' + training.Label[1]
print(myString)

9866
C:\Users\WOLVCI10\Desktop\TensorFlow\AI-Food-Classification\Food-11\training\0_1.jpg
0
C:\Users\WOLVCI10\Desktop\TensorFlow\AI-Food-Classification\Food-11\training\0_1.jpg 0


In [24]:
# Create a .txt file which contains path names and lables 

fid = open('.\\trainPathAndLabel.txt','w')

for ix in range(trainingCount):
    img_path  = training.Path[ix]
    img_label = training.Label[ix] # get the radiograph category
    pathLabelString = myString = training.Path[ix] + ' ' + training.Label[ix] + '\n'

    fid.write(pathLabelString)

fid.close()
    
    


In [25]:
validation = pd.DataFrame(validation_images)

validation.columns = ['Path']
validation['Label'] = validation.Path.apply(lambda x: os.path.basename(x).split('_')[0])
validation.describe()
countDict = validation.count()
validationCount = countDict['Path']
print(validationCount)

3430


In [26]:
# Create a .txt file which contains path names and lables 

fid = open('.\\validationPathAndLabel.txt','w')

for ix in range(validationCount):
    img_path  = validation.Path[ix]
    img_label = validation.Label[ix] # get the radiograph category
    pathLabelString = myString = validation.Path[ix] + ' ' + validation.Label[ix] + '\n'

    fid.write(pathLabelString)

fid.close()

In [27]:
evaluation = pd.DataFrame(evaluation_images)

evaluation.columns = ['Path']
evaluation['Label'] = evaluation.Path.apply(lambda x: os.path.basename(x).split('_')[0])
evaluation.describe()
countDict = evaluation.count()
evaluationCount = countDict['Path']
print(evaluationCount)

3347


In [28]:
# Create a .txt file which contains path names and lables 

fid = open('.\\evaluationPathAndLabel.txt','w')

for ix in range(evaluationCount):
    img_path  = evaluation.Path[ix]
    img_label = evaluation.Label[ix] # get the radiograph category
    pathLabelString = myString = evaluation.Path[ix] + ' ' + evaluation.Label[ix] + '\n'

    fid.write(pathLabelString)

fid.close()