<a href="https://colab.research.google.com/github/hw-tan/Capstone-Project/blob/main/2_Image_Embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive',  force_remount=True) 

Mounted at /content/drive


# Image Embedding

In this notebook we will generate image embeddings from [EfficientNet](https://arxiv.org/abs/1905.11946). EfficientNet is a Convolutional Neural Network pre-trained on ImageNet data. It has proven effective on a wide variety of tasks with transfer learning.

We generate the image embeddings by taking the final layer (before output layer) of the EfficientNet model. In this notebook, we will be experimenting on different levels on re-training on the EfficientNetB3 model. \

In [None]:
!nvidia-smi

Wed Aug  4 09:17:49 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P0    58W / 149W |    378MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install efficientnet



In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import gc


# Neural Network
import tensorflow as tf

from efficientnet.tfkeras import EfficientNetB3
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense


In [None]:
# Preprocess dataset
directory = '/content/drive/MyDrive/Capstone/'
train = pd.read_csv(directory + 'Data/train.csv')

train['filepath'] = train['image'].apply(lambda x: f'{directory}Data/train_images/{x}').values

# Create dictionary of items by label group
label_dict = train.groupby('label_group')['posting_id'].unique().to_dict()

# Create list of matching products
train['matches'] = train['label_group'].map(label_dict)

# Create dictionary of items by image_phash
label_dict = train.groupby('image_phash')['posting_id'].unique().to_dict()

# Create list of image duplicates by phash
train['image_duplicates'] = train['image_phash'].map(label_dict)

In [None]:
train.head()

Unnamed: 0,posting_id,image,image_phash,title,label_group,filepath,matches,image_duplicates
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_129225211, train_2278313361]",[train_129225211]
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",2937985045,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_3386243561, train_3423213080]",[train_3386243561]
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_2288590299, train_3803689425]",[train_2288590299]
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Camp...,4093212188,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_2406599165, train_3342059966]",[train_2406599165]
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_3369186413, train_921438619]",[train_3369186413]


In [None]:
train.shape

(34250, 8)

Make the pipeline for loading and resizing the images

Reference: https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/

In [None]:
# Set input shape for EfficientNetB3
im_size = 300

# Set image paths of all images
image_paths = train['filepath']

# Set batch size
batch = 32

# To deal with the dataset size, we run the model in groups
group_size = 5000
groups = np.arange(np.ceil(len(train) / group_size))


In [None]:
# Pipeline to process images
def process_image(image_path):
  
  # Read image from file path
  image = tf.io.read_file(image_path)

  # Decode a JPEG-encoded image to a uint8 tensor
  image = tf.image.decode_jpeg(image, channels = 3)

  # Resize image to efficientnet size
  image = tf.image.resize(image, (im_size,im_size))

  # Scale image vector
  image = tf.cast(image, tf.float32) / 255.0
  return image

In [None]:
# Create tensorflow dataset from image paths
def get_data(image_paths):
  dataset = tf.data.Dataset.from_tensor_slices(image_paths)

  # Process dataset with the image processing function created above. Set parallel calls to autotune
  dataset = dataset.map(process_image, num_parallel_calls = tf.data.AUTOTUNE)

  # Set batch size
  dataset = dataset.batch(batch_size = batch)

  # Set prefetch to autotune - allows later elements to be prepared while the current element is being processed
  dataset = dataset.prefetch(tf.data.AUTOTUNE)
  return dataset

In [None]:
# Initialize the EfficientNetB3 architecture with pre-trained ImageNet weight
# include_top=False removes the final classification layer and gives the 2nd last layer of 1792 numerical values as output
B3_model = EfficientNetB3(include_top=False, weights='imagenet',  pooling="avg")

In [None]:
# Generate image embeddings from EfficientNetB3
# Create empty list for embeddings
embeddings = []

for i in groups:
  # Start and end index
  start = int(i * group_size)
  end = int((i + 1) * group_size)

  # Get image dataset
  image_dataset = get_data(image_paths[start:end])

  # Generate embeddings
  image_embeddings = B3_model.predict(image_dataset, verbose=2)

  # Append to embeddings list
  embeddings.append(image_embeddings)

  # Print status
  print(f'Group {i} completed')

train_image_embeddings = np.concatenate(embeddings)

# Delete temporary variables to free memory
del embeddings
del image_dataset
del image_embeddings

157/157 - 1540s
Group 0.0 completed
157/157 - 1631s
Group 1.0 completed
157/157 - 1702s
Group 2.0 completed
157/157 - 1613s
Group 3.0 completed
157/157 - 1593s
Group 4.0 completed
157/157 - 1619s
Group 5.0 completed
133/133 - 1435s
Group 6.0 completed


In [None]:
# Save default embeddings as npy file
np.save(f'{directory}Data/image_embedding_1.npy', train_image_embeddings)

In [None]:
len(train.label_group.unique())

### Transfer Learning


In [None]:
# Number of unique labels
num_labels = len(train['label_group'].unique())

# Create a dictionary of labels
label_dict = dict(zip(train['label_group'].unique(),list(range(num_labels))))

# Add new column for renamed labels (for one-hot encoding)
train['label'] = train['label_group'].map(label_dict)
train.head()

Unnamed: 0,posting_id,image,image_phash,title,label_group,filepath,matches,image_duplicates,label
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_129225211, train_2278313361]",[train_129225211],0
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",2937985045,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_3386243561, train_3423213080]",[train_3386243561],1
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_2288590299, train_3803689425]",[train_2288590299],2
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Camp...,4093212188,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_2406599165, train_3342059966]",[train_2406599165],3
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,/content/drive/MyDrive/Capstone/Data/train_ima...,"[train_3369186413, train_921438619]",[train_3369186413],4


In [None]:
# Set input shape for EfficientNetB3
im_size = 300

# Set image paths of all images
image_paths = train['filepath']

# Set batch size
batch = 32

# To deal with the dataset size, we run the model in groups
group_size = 5000
groups = np.arange(np.ceil(len(train) / group_size))

# Set number of classes
num_class = len(train['label_group'].unique())

# List of labels
label_list = train['label'].to_list()

In [None]:
# Pipeline to process images MAYBE REMOVE IF NO USE
def process_image_wlabels(image_path, label):
  
  # Read image from file path
  image = tf.io.read_file(image_path)

  # Decode a JPEG-encoded image to a uint8 tensor
  image = tf.image.decode_jpeg(image, channels = 3)

  # Resize image to efficientnet size
  image = tf.image.resize(image, (im_size,im_size))

  # Scale image vector
  image = tf.cast(image, tf.float32) / 255.0

  label = tf.one_hot(label, num_class)
  return image, label

In [None]:
# Create tensorflow dataset from image paths
def get_data_wlabels(image_paths, labels):
  dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))

  # Process dataset with the image processing function created above. Set parallel calls to autotune
  dataset = dataset.map(process_image_wlabels, num_parallel_calls = tf.data.AUTOTUNE)

  # Set batch size
  dataset = dataset.batch(batch_size = batch)

  # Set prefetch to autotune - allows later elements to be prepared while the current element is being processed
  dataset = dataset.prefetch(tf.data.AUTOTUNE)
  return dataset

In [None]:
# Create tensorflow dataset with train data
train_data = get_data_wlabels(image_paths,label_list)

Embeddings 2:

Re-training on top_conv (Conv2D)

In [None]:
# Initialize the EfficientNetB3 architecture with pre-trained ImageNet weight
B3_model = EfficientNetB3(include_top=False, weights='imagenet',  pooling="avg")
B3_model.trainable = True
for layer in B3_model.layers:
  layer.trainable = False
B3_model.layers[-4].trainable = True

In [None]:
B3_model.summary()

Model: "efficientnet-b3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
stem_conv (Conv2D)              (None, None, None, 4 1080        input_8[0][0]                    
__________________________________________________________________________________________________
stem_bn (BatchNormalization)    (None, None, None, 4 160         stem_conv[0][0]                  
__________________________________________________________________________________________________
stem_activation (Activation)    (None, None, None, 4 0           stem_bn[0][0]                    
____________________________________________________________________________________

In [None]:
model_2 = Sequential()
model_2.add(B3_model)
model_2.add(Dense(11014, activation='softmax'))
model_2.compile(
    loss="categorical_crossentropy",
    optimizer='adam',
    metrics=["acc"],
)

In [None]:
history = model_2.fit(
    train_data,
    batch_size = 32,
    epochs = 5,
    )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model_trained_2 = Sequential(model_2.layers[0])

In [None]:
model_2.save('new model')



INFO:tensorflow:Assets written to: new model/assets


In [None]:
# Generate image embeddings from EfficientNetB3
# Create empty list for embeddings
embeddings = []

for i in groups:
  # Start and end index
  start = int(i * group_size)
  end = int((i + 1) * group_size)

  # Get image dataset
  image_dataset = get_data(image_paths[start:end])

  # Generate embeddings
  image_embeddings = model_trained_2.predict(image_dataset, verbose=2)

  # Append to embeddings list
  embeddings.append(image_embeddings)

  # Print status
  print(f'Group {i} completed')

train_image_embeddings = np.concatenate(embeddings)

# Delete temporary variables to free memory
del embeddings
del image_dataset
del image_embeddings

In [None]:
# Save default embeddings as npy file
np.save(f'{directory}Data/image_embedding_2.npy', train_image_embeddings)

Embeddings 3:

Re-training on top_conv (Conv2D), block7b layers

In [None]:
# Initialize the EfficientNetB3 architecture with pre-trained ImageNet weight
B3_model = EfficientNetB3(include_top=False, weights='imagenet',  pooling="avg")
B3_model.trainable = True
for layer in B3_model.layers:
  layer.trainable = False
for layer in B3_model.layers[-19:]:
  layer.trainable = True

In [None]:
model_3 = Sequential()
model_3.add(B3_model)
model_3.add(Dense(11014, activation='softmax'))
model_3.compile(
    loss="categorical_crossentropy",
    optimizer='adam',
    metrics=["acc"],
)

In [None]:
model_3.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
module_wrapper (ModuleWrappe (None, 1536)              10783528  
_________________________________________________________________
dense (Dense)                (None, 11014)             16928518  
Total params: 27,712,046
Trainable params: 19,766,374
Non-trainable params: 7,945,672
_________________________________________________________________


In [None]:
history = model_3.fit(
    train_data,
    batch_size = 32,
    epochs = 5,
    )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model_3.save(directory + 'models')



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Capstone/models/assets


In [None]:
model_trained_3 = Sequential(model_3.layers[0])

In [None]:
model_trained_3.summary()

ValueError: ignored

In [None]:
!pip install pyyaml h5py



In [None]:
model = Sequential(model_train.layers[0])

In [None]:
model_trained_3.save(directory + "model.h5py")



ValueError: ignored

In [None]:
model_3.save(directory + 'Models/my_h5_model.h5')

NotImplementedError: ignored

In [None]:
# Generate image embeddings from EfficientNetB3
# Create empty list for embeddings
embeddings = []

for i in groups:
  # Start and end index
  start = int(i * group_size)
  end = int((i + 1) * group_size)

  # Get image dataset
  image_dataset = get_data(image_paths[start:end])

  # Generate embeddings
  image_embeddings = model_trained_3.predict(image_dataset, verbose=2)

  # Append to embeddings list
  embeddings.append(image_embeddings)

  # Print status
  print(f'Group {i} completed')

train_image_embeddings = np.concatenate(embeddings)

# Delete temporary variables to free memory
del embeddings
del image_dataset
del image_embeddings

In [None]:
# Save default embeddings as npy file
np.save(f'{directory}Data/image_embedding_3.npy', train_image_embeddings)