In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout,BatchNormalization 
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator



In [2]:

data_dir='C:\\Users\\suriy\\OneDrive\\Desktop\\fer\\train'

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   validation_split = 0.2,
                                  
        rotation_range=5,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        #zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest')

valid_datagen = ImageDataGenerator(rescale = 1./255,
                                  validation_split = 0.2)


In [3]:


train_dataset  = train_datagen.flow_from_directory(directory = data_dir,
                                                   target_size = (48,48),
                                                   class_mode = 'categorical',
                                                   subset = 'training',
                                                   batch_size = 64,
                                                   color_mode='grayscale')
valid_dataset = valid_datagen.flow_from_directory(directory = data_dir,
                                                  target_size = (48,48),
                                                  class_mode = 'categorical',
                                                  subset = 'validation',
                                                  batch_size = 64,
                                                  color_mode='grayscale')


Found 22968 images belonging to 7 classes.
Found 5741 images belonging to 7 classes.


In [5]:
vgg_model = VGG16(include_top=False, weights='imagenet')
print(vgg_model.summary())

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [6]:
from tensorflow.keras.models import Model

#Get the dictionary of config for vgg16
vgg_config = vgg_model.get_config()

# Change the input shape to new desired shape
h, w, c = 48, 48, 1
vgg_config["layers"][0]["config"]["batch_input_shape"] = (None, h, w, c)
#Create new model with the updated configuration
vgg_updated = Model.from_config(vgg_config)
print(vgg_updated.summary())

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 48, 48, 1)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 48, 48, 64)        640       
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 48, 48, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 24, 24, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 24, 24, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 24, 24, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0     

In [7]:
# Check Weights of first conv layer in the original model...
orig_model_conv1_block1_wts = vgg_model.layers[1].get_weights()[0]

print(orig_model_conv1_block1_wts[:,:,0,0])
print(' ')
print(orig_model_conv1_block1_wts[:,:,1,0])
print(' ')
print(orig_model_conv1_block1_wts[:,:,2,0])

[[ 0.42947057  0.373467   -0.06136011]
 [ 0.27476987  0.03868078 -0.36722335]
 [-0.05746817 -0.26224968 -0.35009676]]
 
[[ 0.55037946  0.44007453 -0.08138704]
 [ 0.34573907  0.04063221 -0.4535013 ]
 [-0.05863491 -0.33066967 -0.4850302 ]]
 
[[ 0.4800154   0.4085474  -0.06514555]
 [ 0.31047726  0.05020237 -0.40338343]
 [-0.05087169 -0.2852275  -0.41851634]]


In [8]:
# Check Weights of first conv layer in the new model...
new_model_conv1_block1_wts = vgg_updated.layers[1].get_weights()[0]
print(new_model_conv1_block1_wts[:,:,0,0])
#Notice the Random weights....

[[-0.02031171 -0.09611569  0.07441065]
 [ 0.03286584  0.01689783 -0.00397398]
 [ 0.02323868 -0.0769002  -0.09419598]]


In [9]:
#Let us average weights for all RGB channels for the first convolutional layer
#and assign it to the first conv layer in our new model. 

# Function that calculates average of weights along the channel axis  
def avg_wts(weights):  
  average_weights = np.mean(weights, axis=-2).reshape(weights[:,:,-1:,:].shape)  #Find mean along the channel axis (second to last axis)
  return(average_weights)

In [10]:
#Get the configuration for the updated model and extract layer names. 
#We will use these names to copy over weights from the original model. 
vgg_updated_config = vgg_updated.get_config()
vgg_updated_layer_names = [vgg_updated_config['layers'][x]['name'] for x in range(len(vgg_updated_config['layers']))]


In [11]:
#Name of the first convolutional layer.
#Remember that this is the only layer with new additional weights. All other layers
#will have same weights as the original model. 
first_conv_name = vgg_updated_layer_names[1]

In [12]:
#Update weights for all layers. And for the first conv layer replace weights with average of all 3 channels. 
for layer in vgg_model.layers:
    if layer.name in vgg_updated_layer_names:
     
      if layer.get_weights() != []:  #All convolutional layers and layers with weights (no input layer or any pool layers)
        target_layer = vgg_updated.get_layer(layer.name)
    
        if layer.name in first_conv_name:    #For the first convolutionl layer
          weights = layer.get_weights()[0]
          biases  = layer.get_weights()[1]
          
          weights_single_channel = avg_wts(weights)
                                                    
          target_layer.set_weights([weights_single_channel, biases])  #Now set weights for the first conv. layer
          target_layer.trainable = False   #You can make this trainable if you want. 
    
        else:
          target_layer.set_weights(layer.get_weights())   #Set weights to all other layers. 
          target_layer.trainable = False  #You can make this trainable if you want.

In [13]:
# Check Weights of first conv layer in the new model...
#Compare against the original model weights
new_model_conv1_block1_wts_updated = vgg_updated.layers[1].get_weights()[0]
print(new_model_conv1_block1_wts_updated[:,:,0,0])


[[ 0.48662183  0.40736297 -0.06929757]
 [ 0.31032872  0.04317178 -0.40803602]
 [-0.05565825 -0.29271564 -0.4178811 ]]


In [14]:
vgg_updated.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 48, 48, 1)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 48, 48, 64)        640       
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 48, 48, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 24, 24, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 24, 24, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 24, 24, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0     

In [15]:
model=Sequential()

for layer in vgg_updated.layers:
        layer.trainable=False

model.add(vgg_updated)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(32,kernel_initializer='he_uniform',activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(32,kernel_initializer='he_uniform',activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(32,kernel_initializer='he_uniform',activation='relu'))
model.add(BatchNormalization())
model.add(Dense(7, activation='softmax'))

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 1, 1, 512)         14713536  
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 512)               262656    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 512)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 512)               2048      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1

In [17]:
model.compile(optimizer=Adam(learning_rate=0.001),loss='categorical_crossentropy',metrics=['accuracy'])


In [None]:
history = model.fit(train_dataset, validation_data=valid_dataset, epochs=50,verbose=2)

Epoch 1/50
359/359 - 297s - loss: 2.0526 - accuracy: 0.1949 - val_loss: 1.8073 - val_accuracy: 0.2639
Epoch 2/50
359/359 - 287s - loss: 1.8408 - accuracy: 0.2392 - val_loss: 1.7737 - val_accuracy: 0.2695
Epoch 3/50
359/359 - 279s - loss: 1.8099 - accuracy: 0.2471 - val_loss: 1.7587 - val_accuracy: 0.2707
Epoch 4/50
359/359 - 296s - loss: 1.7929 - accuracy: 0.2594 - val_loss: 1.7514 - val_accuracy: 0.2747
Epoch 5/50
359/359 - 286s - loss: 1.7856 - accuracy: 0.2621 - val_loss: 1.7402 - val_accuracy: 0.2770
Epoch 6/50
359/359 - 290s - loss: 1.7768 - accuracy: 0.2698 - val_loss: 1.7318 - val_accuracy: 0.2851
Epoch 7/50
359/359 - 295s - loss: 1.7705 - accuracy: 0.2693 - val_loss: 1.7290 - val_accuracy: 0.2904
Epoch 8/50
