In [1]:
import turicreate as tc
import shutil # copy, move file
import os # miscellaneous operation system interfaces
import pathlib
import pandas as pd
from tqdm import tqdm
import cv2


In [2]:
import tensorflow as tf
print(tf.__version__)
num_gpus = len(tf.config.experimental.list_physical_devices('GPU'))
print("Num GPUs Available: ",num_gpus)
print(tf.test.gpu_device_name())

2.2.0
Num GPUs Available:  1
/device:GPU:0


In [3]:
tc.config.set_num_gpus(num_gpus)

In [4]:
!pip install pandarallel
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True,verbose=0,nb_workers=4,use_memory_fs=True)




In [5]:
!pwd
%cd ../

/home/jupyter/FoodDetector/FoodClassification
/home/jupyter/FoodDetector


# Remove dataset folder

In [6]:
# Remove dataset folder
root_path = os.getcwd()
datasets_path = os.path.join(root_path,'FoodClassification/datasets')
turicreate_dataset_path = os.path.join(datasets_path,'image_classifier')
try:
    shutil.rmtree(datasets_path)
except OSError as e:
    print("Error: %s : %s" % (datasets_path, e.strerror))

if not os.path.isdir(datasets_path):
    os.mkdir(datasets_path)
if not os.path.isdir(turicreate_dataset_path):
    os.mkdir(turicreate_dataset_path)

# Load data

In [7]:
labels_paths_path = os.path.join(root_path,'FoodClassification/food_labels_paths.csv')
print(labels_paths_path)

/home/jupyter/FoodDetector/FoodClassification/food_labels_paths.csv


In [8]:
df = pd.read_csv(labels_paths_path)
df.shape[0]

55457

In [9]:
df['Label'].value_counts()

beer         3399
cocktail     1973
cake         1910
bread        1708
ice cream    1066
             ... 
peach          99
burrito        82
cream          79
cabbage        76
coconut        75
Name: Label, Length: 291, dtype: int64

In [10]:
df.sample(5)

Unnamed: 0,Label,Path
31986,sukiyaki,/home/jupyter/FoodDetector/datasets/UECFOOD256...
47886,beef in oyster sauce,/home/jupyter/FoodDetector/datasets/UECFOOD256...
48619,stewed pork leg,/home/jupyter/FoodDetector/datasets/UECFOOD256...
54494,bean curd family style,/home/jupyter/FoodDetector/datasets/UECFOOD256...
51254,laulau,/home/jupyter/FoodDetector/datasets/UECFOOD256...


In [11]:
df["Label"] = df["Label"].apply(lambda x: 'sandwich' if x == 'sandwiches' else x)
seleted_labels = ['french bread','croissant',
                  'sandwiches','hamburger','sandwich',
                  'apple','banana','pork bun',
                  'fried chicken','french fries','rice ball',
                  'roast chicken','sauteed vegetables','spaghetti']
# filter_df = df

# filter_df = df.groupby("Label").filter(lambda x: len(x) > 150 and len(x) < 2000)
filter_df = df[df["Label"].isin(seleted_labels)]
filter_df['Label'].value_counts()
# filter_df['Label'].nunique()
# filter_df = df[df['Label'] == 'rice']

# for idx,row in filter_df.sample(5).iterrows():
#     annotation_str = row[2]
#     annotation_str = annotation_str.replace('[','') \
#                                    .replace(']','') \
#                                    .replace('\'','') \
#                                    .split(',')
#     print(annotation_str)

hamburger             676
banana                589
sandwich              498
apple                 393
french fries          297
fried chicken         154
spaghetti             151
croissant             120
sauteed vegetables    120
pork bun              114
roast chicken         110
rice ball             108
french bread          105
Name: Label, dtype: int64

In [12]:
# def str_to_annotations(content):
#     content = content.lstrip()
#     return [float(x) for x in content.split()]

# def convert_to_turicreate_format(row):
#     # {'label':'pineapple','type':'rectangle','coordinates':{'x':410,'y':171,'width':138,'height':273}}
#     label = row[0]
#     path = row[1]
#     annotation_strs = row[2]
#     annotation_strs = annotation_strs.replace('[','') \
#                                .replace(']','') \
#                                .replace('\'','') \
#                                .split(',')
#     img = cv2.imread(path)
#     dh = img.shape[0]
#     dw = img.shape[1]
#     annotations = []
#     for annotation_str in annotation_strs:
#         annotation = str_to_annotations(annotation_str)   
#         x = round(annotation[0] * dw)
#         y = round(annotation[1] * dh)
#         w = round(annotation[2] * dw)
#         h = round(annotation[3] * dh)
#         coordinates = {'x':x,'y':y,'width':w,'height':h}
#         annotation = {'label':label,'type':'rectangle','coordinates':coordinates}
#         annotations.append(annotation)
#     return annotations

# filter_df.loc[:,'Annotation'] = filter_df.parallel_apply(convert_to_turicreate_format,axis=1)
# filter_df.sample(5)

In [13]:
# def copy_to_dataset(row):
#     path = row[1]
#     new_path = shutil.copy(path,str(turicreate_dataset_path))
#     return new_path

# filter_df.loc[:,'Path'] = filter_df.parallel_apply(copy_to_dataset,axis=1)
# filter_df.sample(5)

In [14]:
import imageio
from imgaug import augmenters as iaa 

def image_augmentation(image):
    seq = iaa.Sequential([
        iaa.Fliplr(0.5), # horizontal flips
        iaa.Crop(percent=(0, 0.1)), # random crops
        # Small gaussian blur with random sigma between 0 and 0.5.
        # But we only blur about 50% of all images.
        iaa.Sometimes(
            0.5,
            iaa.GaussianBlur(sigma=(0, 0.5))
        ),
        # Strengthen or weaken the contrast in each image.
        iaa.LinearContrast((0.75, 1.5)),
        # Add gaussian noise.
        # For 50% of all images, we sample the noise once per pixel.
        # For the other 50% of all images, we sample the noise per pixel AND
        # channel. This can change the color (not only brightness) of the
        # pixels.
        iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
        # Make some images brighter and some darker.
        # In 20% of all cases, we sample the multiplier once per channel,
        # which can end up changing the color of the images.
        iaa.Multiply((0.8, 1.2), per_channel=0.2),
        # Apply affine transformations to each image.
        # Scale/zoom them, translate/move them, rotate them and shear them.
        iaa.Affine(
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            rotate=(-25, 25),
            shear=(-8, 8)
        )
    ], random_order=True)
    return seq(image=image)
    
def image_augmentation_and_copy_to_dataset(path):
    newImagePaths = []
    for i in range(10):
        image = cv2.imread(path)
        if i > 0:
            image = image_augmentation(image)
    
        name = path.split('/')[-1].replace('.jpg','')
        name = name + '_' + str(i) +'.jpg'
        new_path = os.path.join(turicreate_dataset_path,name)
        if not os.path.isfile(new_path):
            cv2.imwrite(new_path,image)    
            newImagePaths.append(new_path)
        
    return newImagePaths

filter_df['AugPaths'] = filter_df['Path'].parallel_apply(image_augmentation_and_copy_to_dataset)
filter_df.sample(5)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=430), Label(value='0 / 430'))), HB…

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,Label,Path,AugPaths
23052,apple,/home/jupyter/FoodDetector/datasets/images/app...,[/home/jupyter/FoodDetector/FoodClassification...
35638,roast chicken,/home/jupyter/FoodDetector/datasets/UECFOOD256...,[/home/jupyter/FoodDetector/FoodClassification...
23351,apple,/home/jupyter/FoodDetector/datasets/images/app...,[/home/jupyter/FoodDetector/FoodClassification...
32491,fried chicken,/home/jupyter/FoodDetector/datasets/UECFOOD256...,[/home/jupyter/FoodDetector/FoodClassification...
11652,banana,/home/jupyter/FoodDetector/datasets/images/ban...,[/home/jupyter/FoodDetector/FoodClassification...


In [15]:
filter_df = filter_df.explode('AugPaths')
filter_df.sample(5)
filter_df.rename(columns={'Path':'path', 'Label':'label'},inplace=True)

In [16]:

filter_df.dropna(axis=0,inplace=True)
print(filter_df.info())
filter_df.drop(columns=['path'],inplace=True)
filter_df.rename(columns={'AugPaths':'path'},inplace=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 33560 entries, 4491 to 50590
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   label     33560 non-null  object
 1   path      33560 non-null  object
 2   AugPaths  33560 non-null  object
dtypes: object(3)
memory usage: 1.0+ MB
None


In [17]:
images = tc.image_analysis.load_images(turicreate_dataset_path)

In [18]:
annotations = tc.SFrame(filter_df)
annotations.head(5)
# annotations.explore()

label,path
sandwich,/home/jupyter/FoodDetecto r/FoodClassification/ ...
sandwich,/home/jupyter/FoodDetecto r/FoodClassification/ ...
sandwich,/home/jupyter/FoodDetecto r/FoodClassification/ ...
sandwich,/home/jupyter/FoodDetecto r/FoodClassification/ ...
sandwich,/home/jupyter/FoodDetecto r/FoodClassification/ ...


In [19]:
joined_sframe = images.join(annotations)
joined_sframe.head(5)

path,image,label
/home/jupyter/FoodDetecto r/FoodClassification/ ...,Height: 680 Width: 1024,sandwich
/home/jupyter/FoodDetecto r/FoodClassification/ ...,Height: 680 Width: 1024,sandwich
/home/jupyter/FoodDetecto r/FoodClassification/ ...,Height: 680 Width: 1024,sandwich
/home/jupyter/FoodDetecto r/FoodClassification/ ...,Height: 680 Width: 1024,sandwich
/home/jupyter/FoodDetecto r/FoodClassification/ ...,Height: 680 Width: 1024,sandwich


In [20]:
sample = joined_sframe.sample(0.01,seed=5)
# sample.explore()

In [21]:
joined_sframe.save(os.path.join(turicreate_dataset_path,'image_classifier.sframe'))