In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
import ast

In [12]:
with open("/content/drive/MyDrive/ML_Project/Food_not_food/imagenet1000_clsidx_to_labels.txt", "r") as f:
    imagenet_classes = ast.literal_eval(f.read())

In [14]:
for k, v in imagenet_classes.items():
    if "banana" in v:
        print(k)

954


## Import nltk to get name of food list !

In [15]:
import nltk 
nltk.download('wordnet') 

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [16]:
from nltk.corpus import wordnet as wn

In [17]:
food = wn.synset('food.n.02')
food_list = list(set([w for s in food.closure(lambda s:s.hyponyms()) for w in s.lemma_names()]))

In [18]:
len(food_list)

1621

In [19]:
food_list[:10]

['perch',
 'Mexican_husk_tomato',
 'takeaway',
 'sea_squab',
 'side_of_beef',
 'black_cherry',
 'pie_crust',
 'lasagna',
 'Chinese_gooseberry',
 'milk_chocolate']

In [20]:
'banana' in food_list

True

## **Filter food items out of ImageNet list**

In [21]:
# Remove punctuation and lower
food_list = [food_item.lower().split("_") for food_item in food_list]

In [22]:
# Create a list from sublist
flat_food_list = [food for food_sub_list in food_list for food in food_sub_list]
flat_food_list[:10]

['perch',
 'mexican',
 'husk',
 'tomato',
 'takeaway',
 'sea',
 'squab',
 'side',
 'of',
 'beef']

In [23]:
# Check ImageNet classes for foods
imagenent_food_classes = {}
# Look at imagenet classes
for k, v in imagenet_classes.items():
    # Get value from imagenet clases (string)

    imagenet_class_set = set([space_word.strip() for space_word in v.lower().split(",")])
    # print(imagenet_class_set)

    # BECAUSE space_word CONTAINS LISTS OF FOOD SO WE NEED TO DO LIST COMPARISON
    # ONE WAY TO DO IT IS USE SET INTERSECTION
    
    # See if value appears in flat_food_list
    if imagenet_class_set.intersection(flat_food_list):
        imagenent_food_classes[k] = v
        
imagenent_food_classes

{8: 'hen',
 82: 'ruffed grouse, partridge, Bonasa umbellus',
 85: 'quail',
 86: 'partridge',
 99: 'goose',
 113: 'snail',
 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
 124: 'crayfish, crawfish, crawdad, crawdaddy',
 331: 'hare',
 339: 'sorrel',
 341: 'hog, pig, grunter, squealer, Sus scrofa',
 390: 'eel',
 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
 397: 'puffer, pufferfish, blowfish, globefish',
 457: 'bow tie, bow-tie, bowtie',
 494: 'chime, bell, gong',
 567: 'frying pan, frypan, skillet',
 599: 'honeycomb',
 600: 'hook, claw',
 626: 'lighter, light, igniter, ignitor',
 723: 'pinwheel',
 738: 'pot, flowerpot',
 750: 'quilt, comforter, comfort, puff',
 760: 'refrigerator, icebox',
 923: 'plate',
 931: 'bagel, beigel',
 932: 'pretzel',
 934: 'hotdog, hot dog, red hot',
 937: 'broccoli',
 938: 'cauliflower',
 939: 'zucchini, courgette',
 943: 'cucumber, cuke',
 944: 'artichoke, globe artichoke',
 946: 'cardoon',

In [24]:
len(imagenent_food_classes) 

49

In [25]:
# Going through imagenet_food_classes manually to drop out some non food classes
non_food_classes_manual_sort = [457, 494, 567, 626, 723, 738, 760, 923, 972, 976]

## Remove the manually filtered food classes from imagenet food classes
Some classes in the `imagenet_food_classes` weren't actually food, so now we are going to filter them.

In [26]:
imagenet_manual_filtered_food_classes = {}
for k, v in imagenent_food_classes.items():
    if k not in non_food_classes_manual_sort:
        imagenet_manual_filtered_food_classes[k] = v

imagenet_manual_filtered_food_classes

{8: 'hen',
 82: 'ruffed grouse, partridge, Bonasa umbellus',
 85: 'quail',
 86: 'partridge',
 99: 'goose',
 113: 'snail',
 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
 124: 'crayfish, crawfish, crawdad, crawdaddy',
 331: 'hare',
 339: 'sorrel',
 341: 'hog, pig, grunter, squealer, Sus scrofa',
 390: 'eel',
 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
 397: 'puffer, pufferfish, blowfish, globefish',
 599: 'honeycomb',
 600: 'hook, claw',
 750: 'quilt, comforter, comfort, puff',
 931: 'bagel, beigel',
 932: 'pretzel',
 934: 'hotdog, hot dog, red hot',
 937: 'broccoli',
 938: 'cauliflower',
 939: 'zucchini, courgette',
 943: 'cucumber, cuke',
 944: 'artichoke, globe artichoke',
 946: 'cardoon',
 947: 'mushroom',
 949: 'strawberry',
 950: 'orange',
 951: 'lemon',
 952: 'fig',
 953: 'pineapple, ananas',
 954: 'banana',
 955: 'jackfruit, jak, jack',
 957: 'pomegranate',
 962: 'meat loaf, meatloaf',
 968: 'cup',
 987: 'c

In [27]:
len(imagenet_manual_filtered_food_classes)

39

In [28]:
# Get food class keys
food_class_keys = list(imagenet_manual_filtered_food_classes.keys())
food_class_keys[:10]

[8, 82, 85, 86, 99, 113, 123, 124, 331, 339]

## Get list of food and non-food classes from ImageNet

In [29]:
imagenet_non_food_classes = {}
for k, v in imagenet_classes.items():
    if k not in food_class_keys:
        imagenet_non_food_classes[k] = v

len(imagenet_non_food_classes) # we have 39 food classes 

961

## Need ImageNet Keys
Turns out the download script downloads using ImageNet class keys, for example: `n09858165 n01539573 n03405111`

So we need to map the class keys to the classes that we want.

ImageNet data downloader script - https://github.com/mf1024/ImageNet-Datasets-Downloader/blob/master/classes_in_imagenet.csv

In [30]:
df = pd.read_csv("https://raw.githubusercontent.com/mf1024/ImageNet-Datasets-Downloader/master/classes_in_imagenet.csv")
# Lower strings
df['class_name'] = df["class_name"].str.lower()
df.head()


Unnamed: 0,synid,class_name,urls,flickr_urls
0,n00004475,organism,8,6
1,n00005787,benthos,1264,626
2,n00006024,heterotroph,1,0
3,n00006484,cell,1251,628
4,n00007846,person,1242,1138


In [31]:
len(df)

21841

In [32]:
# Remove NA values
df.dropna(inplace=True)

In [33]:
# Manually removing not foods from flat_food_list
# This was made by going through flat_food_list by hand
not_food_list = ["ball",
                 "puppy",
                 "game",
                 "bar",
                 "blade",
                 "garden",
                 "hand",
                 "head",
                 "jacket",
                 "key",
                 "junk",
                 "leg",
                 "oven",
                 "pin",
                 "pinwheel",
                 "plate",
                 "pot",
                 "rack",
                 "refrigerator",
                 'saddle',
                 "shank",
                 "spring",
                 "steamer",
                 "stick",
                 "temple",
                 "truck",
                 "turban",
                 "ring",
                 "cup",
                 "rock",
                 "shell",
                 "pilot",
                 "runner",
                 "smith",
                 "ash",
                 "sand"]

not_food_list[:5]

['ball', 'puppy', 'game', 'bar', 'blade']

In [35]:
# Filter dataframe from food classes
df_non_food = df[~df['class_name'].isin(flat_food_list)]
df_food = df[df["class_name"].isin(flat_food_list)]
df_food = df_food[~df["class_name"].isin(not_food_list)]
len(df_non_food), len(df_food)

  after removing the cwd from sys.path.


(20898, 861)

## Get list of non-food and food class keys

In [36]:
imagenet_food_class_ids = df_food["synid"].tolist()
imagenet_food_class_names = df_food["class_name"].tolist()
imagenet_food_class_ids_and_names_dict = dict(zip(imagenet_food_class_ids,imagenet_food_class_names))
len(imagenet_food_class_ids_and_names_dict)

861

In [37]:
imagenet_non_food_class_ids = df_non_food["synid"].tolist()
imagenet_non_food_class_names = df_non_food["class_name"].tolist()
imagenet_non_food_class_ids_and_names_dict = dict(zip(imagenet_non_food_class_ids,imagenet_non_food_class_names))
len(imagenet_non_food_class_ids_and_names_dict)

20898

In [38]:
list(imagenet_food_class_ids_and_names_dict.keys())

['n00017222',
 'n00021265',
 'n00479887',
 'n01321123',
 'n01323261',
 'n01439514',
 'n01503061',
 'n01514859',
 'n01520576',
 'n01662784',
 'n01698434',
 'n01790398',
 'n01791625',
 'n01792530',
 'n01792640',
 'n01793159',
 'n01793249',
 'n01794158',
 'n01795088',
 'n01803078',
 'n01806567',
 'n01807496',
 'n01811909',
 'n01812337',
 'n01814549',
 'n01846331',
 'n01847170',
 'n01855672',
 'n01906749',
 'n01915811',
 'n01944390',
 'n01947396',
 'n01948446',
 'n01948573',
 'n01956481',
 'n01958038',
 'n01958346',
 'n01958435',
 'n01959985',
 'n01960459',
 'n01963571',
 'n01965889',
 'n01970164',
 'n01971280',
 'n01976957',
 'n01982650',
 'n01985128',
 'n01986806',
 'n01987545',
 'n02084071',
 'n02118333',
 'n02131653',
 'n02135220',
 'n02282257',
 'n02301935',
 'n02324045',
 'n02326432',
 'n02331046',
 'n02381364',
 'n02388917',
 'n02389026',
 'n02389261',
 'n02389943',
 'n02395406',
 'n02404186',
 'n02405302',
 'n02412440',
 'n02412787',
 'n02416519',
 'n02484322',
 'n02512053',
 'n025

# Try to download images

In [39]:
test_list_images = ['n13918387', 'n13919547']
empty_string = ""
for item in test_list_images:
    empty_string += " " + item
empty_string = empty_string[1:]
empty_string

'n13918387 n13919547'

# Get strings to download images

In [40]:
# Get food and non-food string class IDs
food_class_id_list = list(imagenet_food_class_ids_and_names_dict.keys())
non_food_class_id_list = list(imagenet_non_food_class_ids_and_names_dict.keys())

food_class_id_list[:5], non_food_class_id_list[:5]
len(food_class_id_list), len(non_food_class_id_list) # I removed "game" while Daniel Bourke didn't (he had 862)

(861, 20898)

In [41]:
# Write function to turn list of strings into a single long string (to be executed on command line)
def convert_list_to_long_string(targ_list):
    long_string = ""
    for item in targ_list:
        long_string += " " + item
    long_string = long_string[1:]
    return long_string

food_class_id_string = convert_list_to_long_string(food_class_id_list)
non_food_class_id_string = convert_list_to_long_string(non_food_class_id_list)

In [42]:
import os
image_files = []
image_dirs = os.listdir("/content/drive/MyDrive/ML_Project/Food_not_food/data/imagenet_images")
for dirs, sub_dirs, files in os.walk("/content/drive/MyDrive/ML_Project/Food_not_food/data/imagenet_images"):
    for item in files:
        image_files.append(item)

len(image_files)

55339

In [43]:
image_files[:5], image_dirs[:5]

(['386460634_f488b1b40e.jpg',
  '386460478_b43b1dfa95.jpg',
  '1680334209_f9e54e540b.jpg',
  '88818548_17f95d5cd4.jpg',
  '261931770_a18953b562.jpg'],
 ['power drill', 'open-air market', 'pigeon hawk', 'preceptor', 'dustpan'])

In [44]:
len(image_dirs)

992

# Filter downloaded images

In [45]:
df_non_food.head()

Unnamed: 0,synid,class_name,urls,flickr_urls
0,n00004475,organism,8,6
1,n00005787,benthos,1264,626
2,n00006024,heterotroph,1,0
3,n00006484,cell,1251,628
4,n00007846,person,1242,1138


In [46]:
food_list_filter = df_food.class_name.tolist()
non_food_list_filter = df_non_food.class_name.tolist()

In [47]:
food_list_filter[:5], non_food_list_filter[:5]

(['plant', 'food', 'squash', 'hen', 'suckling'],
 ['organism', 'benthos', 'heterotroph', 'cell', 'person'])

In [48]:
# Get list of downloaded ImageNet class folder names
imagenet_downloaded_image_folder = [folder_name.lower() for folder_name in os.listdir("/content/drive/MyDrive/ML_Project/Food_not_food/data/imagenet_images")]
imagenet_downloaded_image_folder[:5]

['power drill', 'open-air market', 'pigeon hawk', 'preceptor', 'dustpan']

# Moving downloaded food images to `food_images`

In [None]:
# Move food images from ImageNet downloaded folders to data/food_images
from shutil import copy2
start_dir = "/content/drive/MyDrive/ML_Project/Food_not_food/data/imagenet_images"
dest_dir = "/content/drive/MyDrive/ML_Project/Food_not_food/data/food_images"
for image_folder in os.listdir(start_dir):
    target_image_folder = None
    print(f"Image folder: ... {image_folder}...")
    if image_folder.lower() in food_list_filter:
        # Make new target dir
        new_dest_dir = os.path.join(dest_dir, image_folder)
        print(f"Making folder: {new_dest_dir}...")
        os.makedirs(new_dest_dir, exist_ok=True)

        # Image to copy
        target_image_folder = os.path.join(start_dir, image_folder)
    else:
        pass

    if target_image_folder:
        print(f"Target image folder: {target_image_folder}...")
        images_to_copy = os.listdir(target_image_folder)
        for image_to_copy in images_to_copy:
            image_filename = image_to_copy.split()[-1]
            start_path = os.path.join(target_image_folder, image_filename)
            dest_path = os.path.join(new_dest_dir, image_filename)
            print(f"Copying: {start_path} to {dest_path}...")
            copy2(start_path, dest_path)


# Moving non food images to `non_food_images`

In [None]:
# Move non food images from ImageNet downloaded folders to data/non_food_images
from shutil import copy2
start_dir = "/content/drive/MyDrive/ML_Project/Food_not_food/data/imagenet_images"
dest_dir = "/content/drive/MyDrive/ML_Project/Food_not_food/data/non_food_images"
for image_folder in os.listdir(start_dir):
    target_image_folder = None
    print(f"Image folder: ... {image_folder}...")
    if image_folder.lower() in non_food_list_filter:
        # Make new target dir
        new_dest_dir = os.path.join(dest_dir, image_folder)
        print(f"Making folder: {new_dest_dir}...")
        os.makedirs(new_dest_dir, exist_ok=True)

        # Image to copy
        target_image_folder = os.path.join(start_dir, image_folder)
    else:
        pass

    if target_image_folder:
        print(f"Target image folder: {target_image_folder}...")
        images_to_copy = os.listdir(target_image_folder)
        for image_to_copy in images_to_copy:
            image_filename = image_to_copy.split()[-1]
            start_path = os.path.join(target_image_folder, image_filename)
            dest_path = os.path.join(new_dest_dir, image_filename)
            print(f"Copying: {start_path} to {dest_path}...")
            copy2(start_path, dest_path)