In [154]:
import keras
from keras import layers

SentenceInput = keras.Input(shape=(32, 9, 11, 276, 1), name = 'SentenceInput')
WordInput = keras.Input(shape=(9, 11, 276, 16), name = 'WordInput')

In [157]:
#Create TimeAttention class
class WordTimeAttention(keras.Model):
    def __init__(self, dense1_units:int, dense2_units:int, i:int, name: str):
        super(WordTimeAttention, self).__init__(name = name)
        i = str(i)
        self.maxpool = layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'TimeMaxPool'+i)
        self.avgpool = layers.AveragePooling3D(pool_size = (9, 11, 1), name = 'TimeAvgPool'+i)
        self.dense1 = layers.Dense(dense1_units, activation='relu', name = 'TimeDense1'+i)
        self.dense2 = layers.Dense(dense2_units, activation='relu', name = 'TimeDense2'+i)
        self.add = layers.Add(name = 'FreqTimeAdd'+i)
        self.sigmoid = layers.Activation('sigmoid', name = 'FreqTimeSigmoid'+i)
        

    def call(self, inputs):
        xmax = self.maxpool(inputs)
        xavg = self.avgpool(inputs)
        xmax = self.dense1(xmax)
        xmax = self.dense2(xmax)
        xavg = self.dense1(xavg)
        xavg = self.dense2(xavg)
        x = self.add([xmax, xavg])
        return self.sigmoid(x)

    def calculate_output_shape(self, input_shape):
        return (input_shape[0], 1, 1, input_shape[3], input_shape[4])

In [158]:
FreqWordTimeAttention1 = WordTimeAttention(32, 16, 1, name = 'FreqWordTimeAttention1')
FreqWordTimeAttention2 = WordTimeAttention(64, 32, 2, name = 'FreqWordTimeAttention2')

In [159]:
WordOutput = FreqWordTimeAttention1(WordInput)

In [160]:
FreqWordTimeAttention1.summary()

In [179]:
#Create SpaceAttention class
class WordSpaceAttention(keras.Model):
    def __init__(self, i: int, name: str):
        super(WordSpaceAttention, self).__init__(name = name)
        i = str(i)
        self.maxpool = layers.MaxPooling3D(pool_size = (1, 1, 276), padding='same', name = 'SpaceMaxPool'+i)
        self.avgpool = layers.AveragePooling3D(pool_size = (1, 1, 276), padding = 'same', name = 'SpaceAvgPool'+i)
        self.concat = layers.Concatenate(axis = 3, name = 'SpaceConcat'+i)
        self.Conv3D = layers.Conv3D(filters = 16, kernel_size = (1,1,2), strides = (1,1,2), padding= 'same', activation='sigmoid', name = 'SpaceConvAttn'+i)
        
    def call(self, inputs):
        xmax = self.maxpool(inputs)
        xavg = self.avgpool(inputs)
        x = self.concat([xmax, xavg])
        return self.Conv3D(x)

    def calculate_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1], input_shape[2], input_shape[3], input_shape[4] // 2)

In [180]:
FreqWordSpaceAttention1 = WordSpaceAttention(1, name = 'FreqWordSpaceAttention1')
FreqWordSpaceAttention2 = WordSpaceAttention(2, name = 'FreqWordSpaceAttention2')

In [181]:
WordOutput = FreqWordSpaceAttention1(WordInput)
FreqWordSpaceAttention1.summary()

In [182]:
# Create a class that inherits from keras.Model 
class CNN(keras.Model):
    def __init__(self, filters: int, kernel_size: tuple, time_attn_dense1:int, time_attn_dense2:int, i:int, name:str):
        super(CNN, self).__init__(name = name)
        i = str(i)
        self.conv1 = layers.Conv3D(filters, kernel_size, padding = 'same', activation='relu', name = name+'Conv')
        self.TimeAttention = WordTimeAttention(time_attn_dense1, time_attn_dense2, i, name = name+'TimeAttention')
        self.Multiply1 = layers.Multiply(name = name+'Multiply1')
        self.SpaceAttention = WordSpaceAttention(i, name = name+'SpaceAttention')
        self.Multiply2 = layers.Multiply(name = name+'Multiply2')
        self.Multiply3 = layers.Multiply(name = name+'Multiply3')
        
    def call(self, inputs):
        x = self.conv1(inputs)
        time_attn_x = self.TimeAttention(x)
        attn_x = self.Multiply1([x, time_attn_x])
        space_attn_x = self.SpaceAttention(x)
        attn_x = self.Multiply2([attn_x, space_attn_x])
        return self.Multiply3([x, attn_x])

In [183]:
FreqCNN1 = CNN(16, (1,1,2), 32, 16, 1, name = 'FreqCNN1')

In [184]:
WordOutput = FreqCNN1(WordInput)
FreqCNN1.summary()

In [None]:
## Figure out why the output shape is a question mark but the number of parameters is correct


Word Frequency Module

In [7]:
#Conv Block 1
freqx = layers.TimeDistributed(layers.BatchNormalization(name = 'FreqBatchNorm1'))(CNN_input) 
freqx = layers.TimeDistributed(layers.Conv3D(16, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'FreqConv1'))(freqx)

#Find time attention for each channel
freqtime_dense1 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'FreqTimeDense1_1'))
freqtime_dense2 = layers.TimeDistributed(layers.Dense(16, activation = 'relu', name = 'FreqTimeDense1_2'))

ffreqx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'FreqTimeMaxPool1'))(freqx)
ffreqx_max = freqtime_dense1(ffreqx_max)
ffreqx_max = freqtime_dense2(ffreqx_max)

ffreqx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'FreqTimeAvgPool1'))(freqx)
ffreqx_avg = freqtime_dense1(ffreqx_avg)
ffreqx_avg = freqtime_dense2(ffreqx_avg)

freqtime_attention = layers.Add(name = 'FreqTimeAdd1')([ffreqx_avg, ffreqx_max])
freqtime_attention = layers.Activation('sigmoid', name = 'FreqTimeSigmoid1')(freqtime_attention)

ffreqx = layers.Multiply(name = 'FreqTimeAttnCreate1')([freqtime_attention, freqx])

#Find space attention for each channel
ffreqx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'FreqSpaceMaxPool1'))(ffreqx)
ffreqx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'FreqSpaceAvgPool1'))(ffreqx)
freqspace_attention = layers.Concatenate(axis = 3, name = 'FreqSpaceConcat1')([ffreqx_avg_space, ffreqx_max_space])
freqspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,1), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'FreqSpaceConvAttn1'))(freqspace_attention)

ffreqx = layers.Multiply(name = 'FreqSpaceAttnCreate1')([freqspace_attention, ffreqx])

#Apply Time and Space attention
freqblock1out = layers.Add(name = 'FreqAttnApply1')([ffreqx, freqx])

# #Conv Block 2

# x = layers.TimeDistributed(layers.BatchNormalization(name = 'BatchNorm2'))(block1out)
# x = layers.TimeDistributed(layers.Conv3D(32, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'Conv2'))(x)

# #Find time attention for each channel
# time_dense3 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'TimeDense2_1'))
# time_dense4 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'TimeDense2_2'))

# fx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'TimeMaxPool2'))(x)
# fx_max = time_dense3(fx_max)
# fx_max = time_dense4(fx_max)

# fx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'TimeAvgPool2'))(x)
# fx_avg = time_dense3(fx_avg)
# fx_avg = time_dense4(fx_avg)

# time_attention = layers.Add(name = 'TimeAdd2')([fx_avg, fx_max])
# time_attention = layers.Activation('sigmoid', name = 'TimeSigmoid2')(time_attention)

# fx = layers.Multiply(name = 'TimeAttnCreate2')([time_attention, x])

# #Find space attention for each channel
# fx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'SpaceMaxPool2'))(fx)
# fx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'SpaceAvgPool2'))(fx)
# space_attention = layers.Concatenate(axis = 3, name = 'SpaceConcat2')([fx_avg_space, fx_max_space])
# space_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'SpaceConvAttn2'))(space_attention)

# fx = layers.Multiply(name = 'SpaceAttnCreate2')([space_attention, fx])
# block2out = layers.Add(name = 'TimeSpaceAttnApply2')([fx, x])

# #Conv Block 3
# x = layers.TimeDistributed(layers.BatchNormalization(name = 'BatchNorm3'))(block2out)
# x = layers.TimeDistributed(layers.Conv3D(64, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'Conv3'))(x)

# time_dense5 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'TimeDense3_1'))
# time_dense6 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'TimeDense3_2'))

# fx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'TimeMaxPool3'))(x)
# fx_max = time_dense5(fx_max)
# fx_max = time_dense6(fx_max)

# fx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'TimeAvgPool3'))(x)
# fx_avg = time_dense5(fx_avg)
# fx_avg = time_dense6(fx_avg)

# time_attention = layers.Add(name = 'TimeAdd3')([fx_avg, fx_max])
# time_attention = layers.Activation('sigmoid', name = 'TimeSigmoid3')(time_attention)

# fx = layers.Multiply(name = 'TimeAttnCreate3')([time_attention, x])

# #Find space attention for each channel
# fx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'SpaceMaxPool3'))(fx)
# fx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'SapceAvgPool3'))(fx)
# space_attention = layers.Concatenate(axis = 3, name = 'SpaceConcat3')([fx_avg_space, fx_max_space])
# space_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'SpaceConv3'))(space_attention)

# fx = layers.Multiply(name = 'SpaceAttnCreate3')([space_attention, fx])
# block3out = layers.Add(name = 'TimeSpaceAttnApply3')([fx, x])

#Conv Block 4

freqx = layers.TimeDistributed(layers.BatchNormalization(name = 'freqBatchNorm4'))(freqblock1out)
freqx = layers.TimeDistributed(layers.Conv3D(32, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'freqConv4'))(freqx)

freqtime_dense7 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'freqTimeDense4_1'))
freqtime_dense8 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'freqTimeDense4_2'))

ffreqx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name='freqTimeMaxPool4'))(freqx)
ffreqx_max = freqtime_dense7(ffreqx_max)
ffreqx_max = freqtime_dense8(ffreqx_max)

ffreqx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'freqTimeAvgPool4'))(freqx)
ffreqx_avg = freqtime_dense7(ffreqx_avg)
ffreqx_avg = freqtime_dense8(ffreqx_avg)

freqtime_attention = layers.Add(name = 'freqTimeAdd4')([ffreqx_avg, ffreqx_max])
freqtime_attention = layers.Activation('sigmoid', name = 'freqTimeSigmoid4')(freqtime_attention)

ffreqx = layers.Multiply(name = 'freqTimeAttnCreate4')([freqtime_attention, freqx])

#Find space attention for each channel
ffreqx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'freqSpaceMaxPool4'))(ffreqx)
ffreqx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'freqSpaceAvgPool4'))(ffreqx)
freqspace_attention = layers.Concatenate(axis = 3, name = 'freqSpaceConcat4')([ffreqx_avg_space, ffreqx_max_space])
freqspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'freqSpaceConv4'))(freqspace_attention)

ffreqx = layers.Multiply(name = 'freqSpaceAttnCreate4')([freqspace_attention, ffreqx])
freqblock4out = layers.Add(name = 'freqTimeSpaceAttnApply4')([ffreqx, freqx])

# Final word vector maker
freqx = layers.TimeDistributed(layers.MaxPool3D(pool_size = (3,3,25), name = 'freqFinalMaxPool'))(freqblock4out)
freqx = layers.TimeDistributed(layers.Flatten(name = 'freqFlatten'))(freqx)
freqx = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'freqFinalDense1'))(freqx)
freqx = layers.TimeDistributed(layers.Dense(32, activation='relu', name = 'freqFinalDense2'))(freqx)
freqx = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'freqFinalDense3'))(freqx)
freq_output = layers.TimeDistributed(layers.Dense(16, name = 'freqOutput'))(freqx)
freq = layers.TimeDistributed(layers.Dense(1))(freq_output)

wordfreq_model = keras.Model(CNN_input, freq)
wordfreq_model.summary()

wordfreq_model.compile(
    optimizer=keras.optimizers.Adadelta(),
    loss=keras.losses.MeanSquaredError(),
    metrics=keras.metrics.MeanAbsoluteError()
)

wordfreq_model.save('FrequencyModel.keras')
wordfreq_model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 SentenceInput (InputLayer)     [(None, 32, 9, 11,   0           []                               
                                276, 1)]                                                          
                                                                                                  
 time_distributed_82 (TimeDistr  (None, 32, 9, 11, 2  4          ['SentenceInput[0][0]']          
 ibuted)                        76, 1)                                                            
                                                                                                  
 time_distributed_83 (TimeDistr  (None, 32, 9, 11, 2  416        ['time_distributed_82[0][0]']    
 ibuted)                        76, 16)                                                     

Word Length Module

In [9]:

#Conv Block 1
lenx = layers.TimeDistributed(layers.BatchNormalization(name = 'lenBatchNorm1'))(CNN_input)
lenx = layers.TimeDistributed(layers.Conv3D(16, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'lenConv1'))(lenx)

#Find time attention for each channel
lentime_dense1 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'lenTimeDense1_1'))
lentime_dense2 = layers.TimeDistributed(layers.Dense(16, activation = 'relu', name = 'lenTimeDense1_2'))

flenx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'lenTimeMaxPool1'))(lenx)
flenx_max = lentime_dense1(flenx_max)
flenx_max = lentime_dense2(flenx_max)

flenx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'lenTimeAvgPool1'))(lenx)
flenx_avg = lentime_dense1(flenx_avg)
flenx_avg = lentime_dense2(flenx_avg)

lentime_attention = layers.Add(name = 'lenTimeAdd1')([flenx_avg, flenx_max])
lentime_attention = layers.Activation('sigmoid', name = 'lenTimeSigmoid1')(lentime_attention)

flenx = layers.Multiply(name = 'lenTimeAttnCreate1')([lentime_attention, lenx])

#Find space attention for each channel
flenx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'lenSpaceMaxPool1'))(flenx)
flenx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'lenSpaceAvgPool1'))(flenx)
lenspace_attention = layers.Concatenate(axis = 3, name = 'lenSpaceConcat1')([flenx_avg_space, flenx_max_space])
lenspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,1), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'lenSpaceConvAttn1'))(lenspace_attention)

flenx = layers.Multiply(name = 'lenSpaceAttnCreate1')([lenspace_attention, flenx])

#Apply Time and Space attention
lenblock1out = layers.Add(name = 'lenAttnApply1')([flenx, lenx])

# #Conv Block 2

# x = layers.TimeDistributed(layers.BatchNormalization(name = 'BatchNorm2'))(block1out)
# x = layers.TimeDistributed(layers.Conv3D(32, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'Conv2'))(x)

# #Find time attention for each channel
# time_dense3 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'TimeDense2_1'))
# time_dense4 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'TimeDense2_2'))

# fx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'TimeMaxPool2'))(x)
# fx_max = time_dense3(fx_max)
# fx_max = time_dense4(fx_max)

# fx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'TimeAvgPool2'))(x)
# fx_avg = time_dense3(fx_avg)
# fx_avg = time_dense4(fx_avg)

# time_attention = layers.Add(name = 'TimeAdd2')([fx_avg, fx_max])
# time_attention = layers.Activation('sigmoid', name = 'TimeSigmoid2')(time_attention)

# fx = layers.Multiply(name = 'TimeAttnCreate2')([time_attention, x])

# #Find space attention for each channel
# fx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'SpaceMaxPool2'))(fx)
# fx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'SpaceAvgPool2'))(fx)
# space_attention = layers.Concatenate(axis = 3, name = 'SpaceConcat2')([fx_avg_space, fx_max_space])
# space_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'SpaceConvAttn2'))(space_attention)

# fx = layers.Multiply(name = 'SpaceAttnCreate2')([space_attention, fx])
# block2out = layers.Add(name = 'TimeSpaceAttnApply2')([fx, x])

# #Conv Block 3
# x = layers.TimeDistributed(layers.BatchNormalization(name = 'BatchNorm3'))(block2out)
# x = layers.TimeDistributed(layers.Conv3D(64, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'Conv3'))(x)

# time_dense5 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'TimeDense3_1'))
# time_dense6 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'TimeDense3_2'))

# fx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'TimeMaxPool3'))(x)
# fx_max = time_dense5(fx_max)
# fx_max = time_dense6(fx_max)

# fx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'TimeAvgPool3'))(x)
# fx_avg = time_dense5(fx_avg)
# fx_avg = time_dense6(fx_avg)

# time_attention = layers.Add(name = 'TimeAdd3')([fx_avg, fx_max])
# time_attention = layers.Activation('sigmoid', name = 'TimeSigmoid3')(time_attention)

# fx = layers.Multiply(name = 'TimeAttnCreate3')([time_attention, x])

# #Find space attention for each channel
# fx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'SpaceMaxPool3'))(fx)
# fx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'SapceAvgPool3'))(fx)
# space_attention = layers.Concatenate(axis = 3, name = 'SpaceConcat3')([fx_avg_space, fx_max_space])
# space_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'SpaceConv3'))(space_attention)

# fx = layers.Multiply(name = 'SpaceAttnCreate3')([space_attention, fx])
# block3out = layers.Add(name = 'TimeSpaceAttnApply3')([fx, x])

#Conv Block 4

# x = layers.TimeDistributed(layers.BatchNormalization(name = 'BatchNorm4'))(block3out)
lenx = layers.TimeDistributed(layers.BatchNormalization(name = 'lenBatchNorm4'))(lenblock1out)
lenx = layers.TimeDistributed(layers.Conv3D(32, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'lenConv4'))(lenx)

lentime_dense7 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'lenTimeDense4_1'))
lentime_dense8 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'lenTimeDense4_2'))

flenx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name='lenTimeMaxPool4'))(lenx)
flenx_max = lentime_dense7(flenx_max)
flenx_max = lentime_dense8(flenx_max)

flenx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'lenTimeAvgPool4'))(lenx)
flenx_avg = lentime_dense7(flenx_avg)
flenx_avg = lentime_dense8(flenx_avg)

lentime_attention = layers.Add(name = 'lenTimeAdd4')([flenx_avg, flenx_max])
lentime_attention = layers.Activation('sigmoid', name = 'lenTimeSigmoid4')(lentime_attention)

flenx = layers.Multiply(name = 'lenTimeAttnCreate4')([lentime_attention, lenx])

#Find space attention for each channel
flenx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'lenSpaceMaxPool4'))(flenx)
flenx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'lenSpaceAvgPool4'))(flenx)
lenspace_attention = layers.Concatenate(axis = 3, name = 'lenSpaceConcat4')([flenx_avg_space, flenx_max_space])
lenspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'lenSpaceConv4'))(lenspace_attention)

flenx = layers.Multiply(name = 'lenSpaceAttnCreate4')([lenspace_attention, flenx])
lenblock4out = layers.Add(name = 'lenTimeSpaceAttnApply4')([flenx, lenx])

# Final word vector maker
lenx = layers.TimeDistributed(layers.MaxPool3D(pool_size = (3,3,25), name = 'lenFinalMaxPool'))(lenblock4out)
lenx = layers.TimeDistributed(layers.Flatten(name = 'lenFlatten'))(lenx)
lenx = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'lenFinalDense1'))(lenx)
lenx = layers.TimeDistributed(layers.Dense(32, activation='relu', name = 'lenFinalDense2'))(lenx)
lenx = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'lenFinalDense3'))(lenx)
wf_output = layers.TimeDistributed(layers.Dense(16, activation='relu', name = 'lenOutput'))(lenx)
wf = layers.TimeDistributed(layers.Dense(1))(wf_output)

wordform_model = keras.Model(CNN_input, wf)

#Compile
wordform_model.compile(
    optimizer=keras.optimizers.Adadelta(),
    loss=keras.losses.MeanSquaredError(),
    metrics=keras.metrics.MeanAbsoluteError()
)

wordform_model.save('WordFormModel.keras')
wordform_model.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 SentenceInput (InputLayer)     [(None, 32, 9, 11,   0           []                               
                                276, 1)]                                                          
                                                                                                  
 time_distributed_112 (TimeDist  (None, 32, 9, 11, 2  4          ['SentenceInput[0][0]']          
 ributed)                       76, 1)                                                            
                                                                                                  
 time_distributed_113 (TimeDist  (None, 32, 9, 11, 2  416        ['time_distributed_112[0][0]']   
 ributed)                       76, 16)                                                     

Semantic Vector Module

In [50]:

#Conv Block 1
semx = layers.TimeDistributed(layers.BatchNormalization(), name = 'semBatchNorm1')(CNN_input)
semx = layers.TimeDistributed(layers.Conv3D(16, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu'), name = 'semConv1')(semx)

#Find time attention for each channel
semtime_dense1 = layers.TimeDistributed(layers.Dense(32, activation = 'relu'), name = 'semTimeDense1_1')
semtime_dense2 = layers.TimeDistributed(layers.Dense(16, activation = 'relu', name = 'semTimeDense1_2'))

fsemx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'semTimeMaxPool1'))(semx)
fsemx_max = semtime_dense1(fsemx_max)
fsemx_max = semtime_dense2(fsemx_max)

fsemx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'semTimeAvgPool1'))(semx)
fsemx_avg = semtime_dense1(fsemx_avg)
fsemx_avg = semtime_dense2(fsemx_avg)

semtime_attention = layers.Add(name = 'semTimeAdd1')([fsemx_avg, fsemx_max])
semtime_attention = layers.Activation('sigmoid', name = 'semTimeSigmoid1')(semtime_attention)

fsemx = layers.Multiply(name = 'semTimeAttnCreate1')([semtime_attention, semx])

#Find space attention for each channel
fsemx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'semSpaceMaxPool1'))(fsemx)
fsemx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'semSpaceAvgPool1'))(fsemx)
semspace_attention = layers.Concatenate(axis = 3, name = 'semSpaceConcat1')([fsemx_avg_space, fsemx_max_space])
semspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,1), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'semSpaceConvAttn1'))(semspace_attention)

fsemx = layers.Multiply(name = 'semSpaceAttnCreate1')([semspace_attention, fsemx])

#Apply Time and Space attention
semblock1out = layers.Add(name = 'semAttnApply1')([fsemx, semx])

# #Conv Block 2

# semx = layers.TimeDistributed(layers.BatchNormalization(name = 'semBatchNorm2'))(semblock1out)
# semx = layers.TimeDistributed(layers.Conv3D(32, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'semConv2'))(semx)

# #Find time attention for each channel
# semtime_dense3 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'semTimeDense2_1'))
# semtime_dense4 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'semTimeDense2_2'))

# fsemx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'semTimeMaxPool2'))(semx)
# fsemx_max = time_dense3(fsemx_max)
# fsemx_max = time_dense4(fsemx_max)

# fsemx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'semTimeAvgPool2'))(semx)
# fsemx_avg = time_dense3(fsemx_avg)
# fsemx_avg = time_dense4(fsemx_avg)

# semtime_attention = layers.Add(name = 'semTimeAdd2')([fsemx_avg, fsemx_max])
# semtime_attention = layers.Activation('sigmoid', name = 'semTimeSigmoid2')(semtime_attention)

# fsemx = layers.Multiply(name = 'semTimeAttnCreate2')([semtime_attention, semx])

# #Find space attention for each channel
# fsemx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'semSpaceMaxPool2'))(fsemx)
# fsemx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'semSpaceAvgPool2'))(fsemx)
# semspace_attention = layers.Concatenate(axis = 3, name = 'semSpaceConcat2')([fsemx_avg_space, fsemx_max_space])
# semspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'semSpaceConvAttn2'))(semspace_attention)

# fsemx = layers.Multiply(name = 'semSpaceAttnCreate2')([semspace_attention, fsemx])
# semblock2out = layers.Add(name = 'semTimeSpaceAttnApply2')([fsemx, semx])

# #Conv Block 3
# semx = layers.TimeDistributed(layers.BatchNormalization(name = 'semBatchNorm3'))(semblock2out)
# semx = layers.TimeDistributed(layers.Conv3D(64, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'semConv3'))(semx)

# semtime_dense5 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'semTimeDense3_1'))
# semtime_dense6 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'semTimeDense3_2'))

# fsemx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'semTimeMaxPool3'))(semx)
# fsemx_max = time_dense5(fsemx_max)
# fsemx_max = time_dense6(fsemx_max)

# fsemx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'semTimeAvgPool3'))(semx)
# fsemx_avg = time_dense5(fsemx_avg)
# fsemx_avg = time_dense6(fsemx_avg)

# semtime_attention = layers.Add(name = 'semTimeAdd3')([fsemx_avg, fsemx_max])
# semtime_attention = layers.Activation('sigmoid', name = 'semTimeSigmoid3')(semtime_attention)

# fsemx = layers.Multiply(name = 'semTimeAttnCreate3')([semtime_attention, semx])

# #Find space attention for each channel
# fsemx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'semSpaceMaxPool3'))(fsemx)
# fsemx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'semSpaceAvgPool3'))(fsemx)
# semspace_attention = layers.Concatenate(axis = 3, name = 'semSpaceConcat3')([fsemx_avg_space, fsemx_max_space])
# semspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'semSpaceConv3'))(semspace_attention)

# fsemx = layers.Multiply(name = 'semSpaceAttnCreate3')([semspace_attention, fsemx])
# semblock3out = layers.Add(name = 'semTimeSpaceAttnApply3')([fsemx, semx])

#Conv Block 4

semx = layers.TimeDistributed(layers.BatchNormalization(name = 'semBatchNorm4'))(semblock1out)
semx = layers.TimeDistributed(layers.Conv3D(32, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'semConv4'))(semx)

semtime_dense7 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'semTimeDense4_1'))
semtime_dense8 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'semTimeDense4_2'))

fsemx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name='semTimeMaxPool4'))(semx)
fsemx_max = semtime_dense7(fsemx_max)
fsemx_max = semtime_dense8(fsemx_max)

fsemx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'semTimeAvgPool4'))(semx)
fsemx_avg = semtime_dense7(fsemx_avg)
fsemx_avg = semtime_dense8(fsemx_avg)

semtime_attention = layers.Add(name = 'semTimeAdd4')([fsemx_avg, fsemx_max])
semtime_attention = layers.Activation('sigmoid', name = 'semTimeSigmoid4')(semtime_attention)

fsemx = layers.Multiply(name = 'semTimeAttnCreate4')([semtime_attention, semx])

#Find space attention for each channel
fsemx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'semSpaceMaxPool4'))(fsemx)
fsemx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'semSpaceAvgPool4'))(fsemx)
semspace_attention = layers.Concatenate(axis = 3, name = 'semSpaceConcat4')([fsemx_avg_space, fsemx_max_space])
semspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'semSpaceConv4'))(semspace_attention)

fsemx = layers.Multiply(name = 'semSpaceAttnCreate4')([semspace_attention, fsemx])
semblock4out = layers.Add(name = 'semTimeSpaceAttnApply4')([fsemx, semx])

# Final word vector maker
semx = layers.TimeDistributed(layers.MaxPool3D(pool_size = (3,3,25), name = 'semFinalMaxPool'))(semblock4out)
semx = layers.TimeDistributed(layers.Flatten(name = 'semFlatten'))(semx)
semx = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'semFinalDense1'))(semx)
semx = layers.TimeDistributed(layers.Dense(32, activation='relu', name = 'semFinalDense2'))(semx)
semx = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'semFinalDense3'))(semx)
sem_output = layers.TimeDistributed(layers.Dense(300, name = 'semOutput'))(semx)

semantic_encoder = keras.Model(CNN_input, sem_output)

#Compile
# semantic_encoder.compile(
#     optimizer=keras.optimizers.Adadelta(),
#     loss=keras.losses.CosineSimilarity(),
#     metrics=[keras.metrics.CosineSimilarity(), keras.metrics.MeanAbsoluteError()]
# )

#semantic_encoder.save('SemanticModel.keras')
semantic_encoder.summary()



Model: "model_17"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 SentenceInput (InputLayer)     [(None, 32, 9, 11,   0           []                               
                                276, 1)]                                                          
                                                                                                  
 time_distributed_209 (TimeDist  (None, 32, 9, 11, 2  4          ['SentenceInput[0][0]']          
 ributed)                       76, 1)                                                            
                                                                                                  
 time_distributed_210 (TimeDist  (None, 32, 9, 11, 2  416        ['time_distributed_209[0][0]']   
 ributed)                       76, 16)                                                    

Part of Speech Module

In [12]:

#Conv Block 1
posx = layers.TimeDistributed(layers.BatchNormalization(name = 'posBatchNorm1'))(CNN_input)
posx = layers.TimeDistributed(layers.Conv3D(16, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'posConv1'))(posx)

#Find time attention for each channel
postime_dense1 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'posTimeDense1_1'))
postime_dense2 = layers.TimeDistributed(layers.Dense(16, activation = 'relu', name = 'posTimeDense1_2'))

fposx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'posTimeMaxPool1'))(posx)
fposx_max = postime_dense1(fposx_max)
fposx_max = postime_dense2(fposx_max)

fposx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'posTimeAvgPool1'))(posx)
fposx_avg = postime_dense1(fposx_avg)
fposx_avg = postime_dense2(fposx_avg)

postime_attention = layers.Add(name = 'posTimeAdd1')([fposx_avg, fposx_max])
postime_attention = layers.Activation('sigmoid', name = 'posTimeSigmoid1')(postime_attention)

fposx = layers.Multiply(name = 'posTimeAttnCreate1')([postime_attention, posx])

#Find space attention for each channel
fposx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'posSpaceMaxPool1'))(fposx)
fposx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'posSpaceAvgPool1'))(fposx)
posspace_attention = layers.Concatenate(axis = 3, name = 'posSpaceConcat1')([fposx_avg_space, fposx_max_space])
posspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,1), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'posSpaceConvAttn1'))(posspace_attention)

fposx = layers.Multiply(name = 'posSpaceAttnCreate1')([posspace_attention, fposx])

#Apply Time and Space attention
posblock1out = layers.Add(name = 'posAttnApply1')([fposx, posx])

# #Conv Block 2

# x = layers.TimeDistributed(layers.BatchNormalization(name = 'BatchNorm2'))(block1out)
# x = layers.TimeDistributed(layers.Conv3D(32, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'Conv2'))(x)

# #Find time attention for each channel
# time_dense3 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'TimeDense2_1'))
# time_dense4 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'TimeDense2_2'))

# fx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'TimeMaxPool2'))(x)
# fx_max = time_dense3(fx_max)
# fx_max = time_dense4(fx_max)

# fx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'TimeAvgPool2'))(x)
# fx_avg = time_dense3(fx_avg)
# fx_avg = time_dense4(fx_avg)

# time_attention = layers.Add(name = 'TimeAdd2')([fx_avg, fx_max])
# time_attention = layers.Activation('sigmoid', name = 'TimeSigmoid2')(time_attention)

# fx = layers.Multiply(name = 'TimeAttnCreate2')([time_attention, x])

# #Find space attention for each channel
# fx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'SpaceMaxPool2'))(fx)
# fx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'SpaceAvgPool2'))(fx)
# space_attention = layers.Concatenate(axis = 3, name = 'SpaceConcat2')([fx_avg_space, fx_max_space])
# space_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'SpaceConvAttn2'))(space_attention)

# fx = layers.Multiply(name = 'SpaceAttnCreate2')([space_attention, fx])
# block2out = layers.Add(name = 'TimeSpaceAttnApply2')([fx, x])

# #Conv Block 3
# x = layers.TimeDistributed(layers.BatchNormalization(name = 'BatchNorm3'))(block2out)
# x = layers.TimeDistributed(layers.Conv3D(64, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'Conv3'))(x)

# time_dense5 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'TimeDense3_1'))
# time_dense6 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'TimeDense3_2'))

# fx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name = 'TimeMaxPool3'))(x)
# fx_max = time_dense5(fx_max)
# fx_max = time_dense6(fx_max)

# fx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'TimeAvgPool3'))(x)
# fx_avg = time_dense5(fx_avg)
# fx_avg = time_dense6(fx_avg)

# time_attention = layers.Add(name = 'TimeAdd3')([fx_avg, fx_max])
# time_attention = layers.Activation('sigmoid', name = 'TimeSigmoid3')(time_attention)

# fx = layers.Multiply(name = 'TimeAttnCreate3')([time_attention, x])

# #Find space attention for each channel
# fx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'SpaceMaxPool3'))(fx)
# fx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'SapceAvgPool3'))(fx)
# space_attention = layers.Concatenate(axis = 3, name = 'SpaceConcat3')([fx_avg_space, fx_max_space])
# space_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'SpaceConv3'))(space_attention)

# fx = layers.Multiply(name = 'SpaceAttnCreate3')([space_attention, fx])
# block3out = layers.Add(name = 'TimeSpaceAttnApply3')([fx, x])

#Conv Block 4

# x = layers.TimeDistributed(layers.BatchNormalization(name = 'BatchNorm4'))(block3out)
posx = layers.TimeDistributed(layers.BatchNormalization(name = 'posBatchNorm4'))(posblock1out)
posx = layers.TimeDistributed(layers.Conv3D(32, kernel_size=(5,5, 1), strides=1, padding = 'same', activation='relu', name = 'Conv4'))(posx)

postime_dense7 = layers.TimeDistributed(layers.Dense(64, activation = 'relu', name = 'posTimeDense4_1'))
postime_dense8 = layers.TimeDistributed(layers.Dense(32, activation = 'relu', name = 'posTimeDense4_2'))

fposx_max = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (9, 11, 1), name='posTimeMaxPool4'))(posx)
fposx_max = postime_dense7(fposx_max)
fposx_max = postime_dense8(fposx_max)

fposx_avg = layers.TimeDistributed(layers.AveragePooling3D(pool_size= (9, 11, 1), name = 'posTimeAvgPool4'))(posx)
fposx_avg = postime_dense7(fposx_avg)
fposx_avg = postime_dense8(fposx_avg)

postime_attention = layers.Add(name = 'posTimeAdd4')([fposx_avg, fposx_max])
postime_attention = layers.Activation('sigmoid', name = 'posTimeSigmoid4')(postime_attention)

fposx = layers.Multiply(name = 'posTimeAttnCreate4')([postime_attention, posx])

#Find space attention for each channel
fposx_max_space = layers.TimeDistributed(layers.MaxPooling3D(pool_size = (1,1,276), name = 'posSpaceMaxPool4'))(fposx)
fposx_avg_space = layers.TimeDistributed(layers.AveragePooling3D(pool_size = (1,1,276), name = 'posSpaceAvgPool4'))(fposx)
posspace_attention = layers.Concatenate(axis = 3, name = 'posSpaceConcat4')([fposx_avg_space, fposx_max_space])
posspace_attention = layers.TimeDistributed(layers.Conv3D(filters = 1, kernel_size = (2,2,2), strides = (1,2,1), padding= 'same', activation='sigmoid', name = 'posSpaceConv4'))(posspace_attention)

fposx = layers.Multiply(name = 'posSpaceAttnCreate4')([posspace_attention, fposx])
posblock4out = layers.Add(name = 'posTimeSpaceAttnApply4')([fposx, posx])

# Final word vector maker
posx = layers.TimeDistributed(layers.MaxPool3D(pool_size = (3,3,25), name = 'posFinalMaxPool'))(posblock4out)
posx = layers.TimeDistributed(layers.Flatten(name = 'posFlatten'))(posx)
posx = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'posFinalDense1'))(posx)
posx = layers.TimeDistributed(layers.Dense(32, activation='relu', name = 'posFinalDense2'))(posx)
posx = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'posFinalDense3'))(posx)
POS_output = layers.TimeDistributed(layers.Dense(18, name = 'posOutput', activation='softmax'))(posx)

pos_model = keras.Model(CNN_input, POS_output)

pos_model.compile(
    optimizer=keras.optimizers.Adadelta(),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    loss_weights=[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #PAD class has a 0
    weighted_metrics = [keras.metrics.SparseCategoricalAccuracy(), keras.metrics.SparseTopKCategoricalAccuracy(k=3)],
    run_eagerly=True
)

pos_model.save('PosModel.keras')
pos_model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 SentenceInput (InputLayer)     [(None, 32, 9, 11,   0           []                               
                                276, 1)]                                                          
                                                                                                  
 time_distributed_137 (TimeDist  (None, 32, 9, 11, 2  4          ['SentenceInput[0][0]']          
 ributed)                       76, 1)                                                            
                                                                                                  
 time_distributed_138 (TimeDist  (None, 32, 9, 11, 2  416        ['time_distributed_137[0][0]']   
 ributed)                       76, 16)                                                     

LSTM 

In [1]:
from gensim import downloader
import pickle as pkl
from keras import layers

2024-12-13 22:17:52.633277: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-12-13 22:17:57.883755: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-12-13 22:17:57.883779: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-12-13 22:18:12.819323: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-

In [10]:
LSTMin = layers.Concatenate(axis = -1)([sem_output, wf_output, POS_output, freq_output])

LSTMout = layers.LSTM(units=25, return_sequences= True)(LSTMin)

full = keras.Model(CNN_input, LSTMout)

full.compile(
    optimizer=keras.optimizers.Adadelta(),
    loss=keras.losses.CosineSimilarity(),
    metrics=keras.metrics.CosineSimilarity()
)


In [11]:
full.save('Full.keras')

In [15]:
FrequencyModel = keras.models.load_model('Models/FrequencyModelv1.1.keras')
POSModel = keras.models.load_model('Models/PosModelv1.1.keras')
SemanticModel = keras.models.load_model('Models/SemanticModelv1.1.keras')
WordFormModel = keras.models.load_model('Models/WordFormModelv1.1.keras')


In [16]:
FrequencyModel.trainable = False
POSModel.trainable = False
SemanticModel.trainable = False
WordFormModel.trainable = False

In [None]:
x = FrequencyModel(CNN_input, training=False)
# Convert features of shape `base_model.output_shape[1:]` to vectors

# A Dense classifier with a single unit (binary classification)
outputs = keras.layers.Dense(1)(x)
model = keras.Model(inputs, outputs)

In [52]:
SemanticModel.get_layer('time_distributed_132').output,

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 SentenceInput (InputLayer)     [(None, 32, 9, 11,   0           []                               
                                276, 1)]                                                          
                                                                                                  
 time_distributed_91 (TimeDistr  (None, 32, 9, 11, 2  4          ['SentenceInput[0][0]']          
 ibuted)                        76, 1)                                                            
                                                                                                  
 time_distributed_92 (TimeDistr  (None, 32, 9, 11, 2  416        ['time_distributed_91[0][0]']    
 ibuted)                        76, 16)                                                     

In [61]:

wf = keras.Model(WordFormModel.input, WordFormModel.get_layer('time_distributed_345').output)
freq = keras.Model(FrequencyModel.input, FrequencyModel.get_layer('time_distributed_320').output)
pos = keras.Model(POSModel.input, POSModel.get_layer('time_distributed_47').output)
sem = keras.Model(SemanticModel.input, SemanticModel.get_layer('time_distributed_132').output)

wf.trainable = False
freq.trainable = False
pos.trainable = False
sem.trainable = False

length = wf(CNN_input)
frequency = freq(CNN_input)
POS = pos(CNN_input)
Semantic = sem(CNN_input)

LSTMin = layers.Concatenate(axis = -1)([length, frequency, POS, Semantic])

LSTMout = layers.LSTM(units=26, return_sequences= True)(LSTMin)

full = keras.Model(CNN_input, LSTMout)

full.compile(
    optimizer=keras.optimizers.Adadelta(),
    loss=keras.losses.CosineSimilarity(),
    metrics=keras.metrics.CosineSimilarity()
)

In [62]:
full.summary()

Model: "model_36"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 SentenceInput (InputLayer)     [(None, 32, 9, 11,   0           []                               
                                276, 1)]                                                          
                                                                                                  
 model_32 (Functional)          (None, 32, 16)       226950      ['SentenceInput[0][0]']          
                                                                                                  
 model_33 (Functional)          (None, 32, 16)       226950      ['SentenceInput[0][0]']          
                                                                                                  
 model_34 (Functional)          (None, 32, 18)       227080      ['SentenceInput[0][0]']   

In [60]:
full.save('FullModel.keras')