<a href="https://colab.research.google.com/github/cxbxmxcx/EatNoEat/blob/master/Chapter_9_Data_Prep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports


In [0]:
import tensorflow as tf
import matplotlib.pyplot as plt

# Scikit-learn includes many helpful utilities
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import re
import numpy as np
import os
import time
import json
from glob import glob
from PIL import Image
import pickle

Download Recipe Data

In [0]:
data_folder = 'data'
recipes_zip = tf.keras.utils.get_file('recipes.zip',                                     
                                          origin = 'https://www.dropbox.com/s/i1hvs96mnahozq0/Recipes5k.zip?dl=1',
                                          extract = True)
print(recipes_zip)
data_folder = os.path.dirname(recipes_zip)
os.remove(recipes_zip)
print(data_folder)

Downloading data from https://www.dropbox.com/s/i1hvs96mnahozq0/Recipes5k.zip?dl=1
/root/.keras/datasets/recipes.zip
/root/.keras/datasets


Setup Folder Paths

In [0]:
!dir /root/.keras/datasets
data_folder = data_folder + '/Recipes5k/'
images_folder = data_folder + 'images/'
print(images_folder)

Recipes5k
/root/.keras/datasets/Recipes5k/images/


In [0]:
%ls {images_folder}

[0m[01;34mapple_pie[0m/            [01;34mdumplings[0m/                [01;34momelette[0m/
[01;34mbaby_back_ribs[0m/       [01;34medamame[0m/                  [01;34monion_rings[0m/
[01;34mbaklava[0m/              [01;34meggs_benedict[0m/            [01;34moysters[0m/
[01;34mbeef_carpaccio[0m/       [01;34mescargots[0m/                [01;34mpad_thai[0m/
[01;34mbeef_tacos[0m/           [01;34mfalafel[0m/                  [01;34mpaella[0m/
[01;34mbeef_tartare[0m/         [01;34mfilet_mignon[0m/             [01;34mpancakes[0m/
[01;34mbeet_salad[0m/           [01;34mfish_and_chips[0m/           [01;34mpanna_cotta[0m/
[01;34mbeignets[0m/             [01;34mfoie_gras[0m/                [01;34mpeking_duck[0m/
[01;34mbibimbap[0m/             [01;34mfrench_fries[0m/             [01;34mpho[0m/
[01;34mbread_pudding[0m/        [01;34mfrench_onion_soup[0m/        [01;34mpizza[0m/
[01;34mbreakfast_burrito[0m/    [01;34mfrench_toast[0m

Extra Imports

In [0]:
foods_txt = tf.keras.utils.get_file('foods.txt',
                        origin = 'https://www.dropbox.com/s/6jjje7mhqo9ycyi/foods.txt?dl=1')

print(foods_txt)

Downloading data from https://www.dropbox.com/s/6jjje7mhqo9ycyi/foods.txt?dl=1
/root/.keras/datasets/foods.txt


In [0]:
def get_category_array(keto, carbs, health):
  return np.array([float(keto)-5, float(carbs)-5, float(health)-5])

In [0]:
import csv

def get_food_nutrients(nutrient_file):
  foods = {}
  with open(foods_txt) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
      if line_count == 0:
        print(f'Column names are {", ".join(row)}')
        line_count += 1
      else:        
        categories = get_category_array(row[1],row[2],row[3])
        foods[row[0]] = categories
        line_count += 1
    print(f'Processed {line_count} lines.')
  return foods

In [0]:
food_nutrients = get_food_nutrients(foods_txt)
print(food_nutrients)

Column names are name, fat, protein, carbs
Processed 102 lines.
{'apple_pie': array([ 6. , -3.1, 29. ]), 'baby_back_ribs': array([15., 18.,  0.]), 'baklava': array([ 6., -2., 25.]), 'beef_carpaccio': array([ 3., 20., -5.]), 'beef_tacos': array([ 8.,  7., 27.]), 'beef_tartare': array([39. , 28. ,  0.7]), 'beet_salad': array([22. ,  0.9,  8. ]), 'beignets': array([ 3.5,  1. , 35. ]), 'bibimbap': array([ 17.,  31., 150.]), 'bread_pudding': array([ 2.3,  2.7, 67. ]), 'breakfast_burrito': array([23., 24., 67.]), 'bruschetta': array([-0.1, -3.7,  1.2]), 'caesar_salad': array([35.,  5., 18.]), 'cannoli': array([10.,  1., 13.]), 'caprese_salad': array([12.,  0.,  7.]), 'carrot_cake': array([25. ,  1.6, 68. ]), 'ceviche': array([-3.7, 10. , -2.4]), 'cheesecake': array([23. ,  1.9, 27. ]), 'cheese_plate': array([45. , 40. ,  3.3]), 'chicken_curry': array([ 6. , 23. ,  2.5]), 'chicken quesadilla': array([45., 56., 87.]), 'chicken_wings': array([ 1.5, -0.5, -2.4]), 'chocolate_cake': array([17. , -

In [0]:
from fastprogress.fastprogress import master_bar, progress_bar
from IPython.display import Image
from os import listdir
from pickle import dump

In [0]:
def load_image(image_path):
  img = tf.io.read_file(image_path)
  img = tf.image.decode_jpeg(img, channels=3)
  img = tf.image.resize(img, (299, 299))
  img = tf.keras.applications.inception_v3.preprocess_input(img)
  return img, image_path

In [0]:
def load_images(food_w_nutrients, directory):
  X = []
  Y = []
  mb = master_bar(listdir(directory))
  for food_group in mb: 
    try:
      for pic in progress_bar(listdir(directory + food_group),
                              parent=mb, comment='food = ' + food_group):
        filename = directory + food_group + '/' + pic
        image, img_path = load_image(filename)
        Y.append(food_w_nutrients[food_group])
        X.append(image)
    except:
      continue
  return X,Y


In [0]:
X, Y = load_images(food_nutrients, images_folder)
print(len(X), len(Y))

4776 4776
