In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, LSTMCell, Conv2D, MaxPool2D, BatchNormalization, Flatten, Dense, Dropout
import pandas as pd
import cv2
import numpy as np

2023-01-12 13:03:32.834629: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-12 13:03:33.015347: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-01-12 13:03:33.055328: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-01-12 13:03:33.763612: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

In [2]:
def pre_his(img):
    yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
    y, u, v = cv2.split(yuv)
    y = cv2.equalizeHist(y)
    yuv = cv2.merge([y, u, v])
    his_img = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)
    return his_img

In [3]:
info_csv = pd.read_csv('label.csv')
train_data = []

for x in range(10):
    img = cv2.imread(f'./Segments/{info_csv["filename"][x]}.png')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    rect_mask = np.zeros(img.shape[:2], dtype="uint8")           #black mask with shape of spectrogram 
    cv2.rectangle(rect_mask,(79,57),(576,427),255,-1)              #fill area we want to keep with white

    masked_img = cv2.bitwise_and(img, img, mask=rect_mask)   #apply


    output_img = masked_img[57:427,79:576]                    #resize image to fit in model
    output_img = cv2.resize(output_img, (448, 448))
    output_img = pre_his(output_img)

    output_img = cv2.normalize(output_img, None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    train_data.append(output_img)


In [4]:
def Timestep(img_height, img_width, color_channels, num_classes, states):
    
    input_batch = Input(shape=(img_height, img_width, color_channels))
    
    cnn_network = Conv2D(4, (3,3), padding='same', activation='relu')(input_batch)
    cnn_network = Conv2D(4, (3,3), padding='same', activation='relu')(cnn_network)
    cnn_network = BatchNormalization()(cnn_network)
    cnn_network = MaxPool2D(pool_size=(2,2), padding='same', strides=(4,4))(cnn_network)
    
    cnn_network = Conv2D(8, (3,3), padding='same', activation='relu')(cnn_network)
    cnn_network = Conv2D(8, (3,3), padding='same', activation='relu')(cnn_network)
    cnn_network = BatchNormalization()(cnn_network)
    cnn_network = MaxPool2D(pool_size=(2,2), padding='same', strides=(4,4))(cnn_network)
    
    cnn_network = Conv2D(8, (3,3), padding='same', activation='relu')(cnn_network)
    cnn_network = Conv2D(8, (3,3), padding='same', activation='relu')(cnn_network)
    cnn_network = BatchNormalization()(cnn_network)
    cnn_network = MaxPool2D(pool_size=(2,2), padding='same', strides=(4,4))(cnn_network)
    
    cnn_network = Conv2D(16, (3,3), padding='same', activation='relu')(cnn_network)
    cnn_network = Conv2D(16, (3,3), padding='same', activation='relu')(cnn_network)
    cnn_network = BatchNormalization()(cnn_network)
    cnn_network = MaxPool2D(pool_size=(2,2), padding='same', strides=(4,4))(cnn_network)
    
    cnn_network = Flatten()(cnn_network)
    
    lstm_network = LSTM(12, dropout=0.04, recurrent_dropout=0.04, return_state=True)(tf.expand_dims(cnn_network, axis=0))
    dense_network = Dense(4,activation='relu')(lstm_network[0])
    dense_network = Dropout(0.2)(dense_network)
    dense_network = Dense(num_classes, activation='softmax')(dense_network)
    
    
    full_network = Model(input_batch, dense_network)
    return full_network, lstm_network

In [6]:
timestep, lstm_state = Timestep(448, 448, 3, 2, 0)
timestep.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 448, 448, 3)]     0         
                                                                 
 conv2d_8 (Conv2D)           (None, 448, 448, 4)       112       
                                                                 
 conv2d_9 (Conv2D)           (None, 448, 448, 4)       148       
                                                                 
 batch_normalization_4 (Batc  (None, 448, 448, 4)      16        
 hNormalization)                                                 
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 112, 112, 4)      0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 112, 112, 8)       296 

In [10]:
print(timestep(np.expand_dims(train_data[0], axis=0)))
print(lstm_state)

tf.Tensor([[0.5 0.5]], shape=(1, 2), dtype=float32)
[<KerasTensor: shape=(1, 12) dtype=float32 (created by layer 'lstm_1')>, <KerasTensor: shape=(1, 12) dtype=float32 (created by layer 'lstm_1')>, <KerasTensor: shape=(1, 12) dtype=float32 (created by layer 'lstm_1')>]
