<a href="https://colab.research.google.com/github/andreidore/aicrowd_blitz_may_2020/blob/master/foodc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install wandb
!pip install tqdm

Collecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/d1/c7/8bf2c62c3f133f45e135a8a116e4e0f162043248e3db54de30996eaf1a8a/wandb-0.8.36-py2.py3-none-any.whl (1.4MB)
[K     |████████████████████████████████| 1.4MB 2.8MB/s 
[?25hCollecting gql==0.2.0
  Downloading https://files.pythonhosted.org/packages/c4/6f/cf9a3056045518f06184e804bae89390eb706168349daa9dff8ac609962a/gql-0.2.0.tar.gz
Collecting configparser>=3.8.1
  Downloading https://files.pythonhosted.org/packages/4b/6b/01baa293090240cf0562cc5eccb69c6f5006282127f2b846fad011305c79/configparser-5.0.0-py3-none-any.whl
Collecting subprocess32>=3.5.3
[?25l  Downloading https://files.pythonhosted.org/packages/32/c8/564be4d12629b912ea431f1a50eb8b3b9d00f1a0b1ceff17f266be190007/subprocess32-3.5.4.tar.gz (97kB)
[K     |████████████████████████████████| 102kB 8.9MB/s 
Collecting GitPython>=1.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/44/33/917e6fde1cad13daa7053f39b7c8af3be287314f75f1b1ea8d3fe37a857

In [0]:
import os
import shutil
from collections import Counter
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.applications import vgg16,resnet50,vgg19
from tensorflow.keras.applications import mobilenet
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.metrics import categorical_crossentropy
import wandb
from wandb.keras import WandbCallback
import pandas as pd
from tqdm import tqdm
import random
from sklearn.model_selection import train_test_split

In [0]:
!wget -q https://s3.eu-central-1.wasabisys.com/aicrowd-practice-challenges/public/foodc/v0.1/train_images.zip
!wget -q https://s3.eu-central-1.wasabisys.com/aicrowd-practice-challenges/public/foodc/v0.1/test_images.zip
!wget -q https://s3.eu-central-1.wasabisys.com/aicrowd-practice-challenges/public/foodc/v0.1/train.csv
!wget -q https://s3.eu-central-1.wasabisys.com/aicrowd-practice-challenges/public/foodc/v0.1/test.csv

In [0]:
!mkdir data
!mkdir data/test
!mkdir data/train
!unzip train_images -d data/train
!unzip test_images -d data/test

In [0]:
BATCH_SIZE=64


In [0]:
classes=pd.read_csv('train.csv')
test_classes=pd.read_csv("test.csv")  

In [7]:
classes.head()

Unnamed: 0,ImageId,ClassName
0,f27632d7e5.jpg,water
1,efa87919ed.jpg,pizza-margherita-baked
2,4f169e8c8d.jpg,broccoli
3,a6956654bf.jpg,salad-leaf-salad-green
4,d99ce8c3bf.jpg,egg


In [59]:
NUM_CLASSES=len(classes["ClassName"].unique())

print("Count classes:",NUM_CLASSES)

Count classes: 61


In [8]:
test_classes.head()

Unnamed: 0,ImageId
0,90e63a2fde.jpg
1,a554d1ca8d.jpg
2,48317e8ee8.jpg
3,79528df667.jpg
4,6d2f2f63f5.jpg


In [61]:
classes_list=[(x,y) for x, y in zip(classes["ImageId"],classes["ClassName"])]
classes_list[0:10]

[('f27632d7e5.jpg', 'water'),
 ('efa87919ed.jpg', 'pizza-margherita-baked'),
 ('4f169e8c8d.jpg', 'broccoli'),
 ('a6956654bf.jpg', 'salad-leaf-salad-green'),
 ('d99ce8c3bf.jpg', 'egg'),
 ('0c2b1641a8.jpg', 'butter'),
 ('3f7e5ed3a9.jpg', 'bread-white'),
 ('ffcfba255c.jpg', 'butter'),
 ('b0687e0bfc.jpg', 'bread-white'),
 ('d6dc2e4278.jpg', 'bread-white')]

In [66]:


shutil.rmtree("images")

os.makedirs("images",exist_ok=True)
for c in tqdm(classes["ClassName"].unique()):
  #print(c)
  os.makedirs(os.path.join("images",c))

for r in tqdm(classes_list):
  shutil.copyfile(os.path.join("data","train","train_images",r[0]),os.path.join("images",r[1],r[0]))

  

100%|██████████| 61/61 [00:00<00:00, 16263.19it/s]
100%|██████████| 9323/9323 [00:02<00:00, 3439.94it/s]


In [67]:
! ls -l images/ | wc -l

62


In [73]:


image_generator = ImageDataGenerator(
        preprocessing_function = vgg19.preprocess_input,
        horizontal_flip = True, 
        validation_split = 0.1,
)

train_dataset = image_generator.flow_from_directory(
        "images/",
        target_size = (224, 224), 
        batch_size = BATCH_SIZE,
        subset = 'training', 
    )

val_dataset = image_generator.flow_from_directory(
        "images/",
        target_size = (224, 224), 
        batch_size = BATCH_SIZE,
        subset = 'validation'
    )

counter = Counter(train_dataset.classes)       
print(counter)                   
print(len(counter))
print(list(counter.keys()))
max_val = float(max(counter.values()))       
class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}    

print(class_weights)




Found 8421 images belonging to 61 classes.
Found 902 images belonging to 61 classes.
Counter({55: 777, 9: 536, 44: 482, 11: 359, 18: 340, 53: 323, 13: 312, 14: 268, 43: 231, 58: 217, 36: 214, 23: 189, 24: 176, 1: 172, 3: 163, 51: 160, 35: 153, 30: 147, 42: 141, 59: 136, 15: 124, 17: 123, 57: 110, 28: 101, 41: 99, 54: 99, 21: 97, 16: 96, 39: 88, 60: 81, 2: 80, 22: 80, 31: 79, 49: 79, 56: 78, 6: 77, 12: 76, 50: 76, 34: 73, 4: 72, 20: 68, 25: 68, 10: 67, 26: 64, 0: 63, 32: 63, 52: 63, 5: 62, 38: 62, 27: 60, 29: 59, 8: 57, 7: 55, 47: 55, 46: 46, 45: 45, 48: 44, 33: 39, 37: 37, 19: 34, 40: 26})
61
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60]
{0: 12.333333333333334, 1: 4.517441860465116, 2: 9.7125, 3: 4.766871165644171, 4: 10.791666666666666, 5: 12.53225806451613, 6: 10.090909090909092, 7: 14.127272727272

In [81]:
base_model = resnet50.ResNet50(weights = "imagenet", include_top=False, input_shape = (224,224, 3))
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_4[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [82]:
# iterate through its layers and lock them to make them not trainable with this code
for layer in base_model.layers:
    layer.trainable = False

base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_4[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [83]:


# use “get_layer” method to save the last layer of the network
# save the output of the last layer to be the input of the next layer
#last_layer = base_model.get_layer('block5_pool')
#last_output = last_layer.output

#print(last_output)

# flatten the classifier input which is output of the last layer of VGG16 model
x = Flatten()(base_model.output)

# add 2 FC layers, each has 4096 units and relu activation 
#x = Dense(100, activation='relu', name='FC_3')(x)
#x = BatchNormalization()(x)
#x = Dropout(0.5)(x)
# add 2 FC layers, each has 4096 units and relu activation 
#x = Dense(100, activation='relu', name='FC_2')(x)
#x = BatchNormalization()(x)
#x = Dropout(0.5)(x)
x = Dense(256, activation='relu', name='FC_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
# add our new softmax layer with 3 hidden units
x = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x)

# instantiate a new_model using keras’s Model class
model = Model(inputs=base_model.input, outputs=x)

# print the new_model summary
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_4[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
____________________________________________________________________________________________

In [84]:
wandb.init(name='CNN-transfer-6', project="foodc",config={"hyper": "parameter"})

W&B Run: https://app.wandb.ai/andrei-dore/foodc/runs/3qtkbm8i

In [0]:
optimizer = Adam(lr=0.0001,decay=1e-6)
optimizer= tf.keras.optimizers.RMSprop(learning_rate=0.001)

model.compile(loss='categorical_crossentropy', optimizer=optimizer, 
              metrics=['accuracy'])

In [0]:
# compile the model


lrr= ReduceLROnPlateau(
                      monitor='val_loss', #Metric to be measured
                       factor=.5, #Factor by which learning rate will be reduced
                       patience=5,  #No. of epochs after which if there is no improvement in the val_acc, the learning rate is reduced
                       min_lr=1e-7) #The minimum learning rate 

model_checkpoint=ModelCheckpoint(filepath="best_model.hdf5",verbose=1,save_best_only=True)
wand_callback=WandbCallback()
early_stop_callback = EarlyStopping(monitor='val_loss',verbose=2,patience=15)

model.fit(train_dataset, steps_per_epoch=270,
                   validation_data=val_dataset, validation_steps=40, epochs=50, verbose=2,callbacks=[model_checkpoint,wand_callback,lrr,early_stop_callback],class_weight=class_weights)



Epoch 1/50

Epoch 00001: val_loss improved from inf to 1.95730, saving model to best_model.hdf5
270/270 - 157s - loss: 11.4467 - accuracy: 0.4595 - val_loss: 1.9573 - val_accuracy: 0.4947 - lr: 0.0010
Epoch 2/50

Epoch 00002: val_loss improved from 1.95730 to 1.91539, saving model to best_model.hdf5
270/270 - 156s - loss: 4.0563 - accuracy: 0.7598 - val_loss: 1.9154 - val_accuracy: 0.5050 - lr: 0.0010
Epoch 3/50

Epoch 00003: val_loss did not improve from 1.91539
270/270 - 155s - loss: 1.9377 - accuracy: 0.8819 - val_loss: 2.0417 - val_accuracy: 0.5013 - lr: 0.0010
Epoch 4/50


In [19]:
model

<tensorflow.python.keras.engine.training.Model at 0x7f516005f5f8>