In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2

2021-07-26 17:53:56.836344: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1


In [2]:
# with tf.device('/cpu:0'):
with tf.device('/gpu:0'):
    dummy_input = tf.random.normal((1,16,112,112,3))

2021-07-26 17:54:05.175677: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2021-07-26 17:54:06.200192: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:17:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.645GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2021-07-26 17:54:06.200688: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 1 with properties: 
pciBusID: 0000:65:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.645GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2021-07-26 17:54:06.201391: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 2 with properties: 
pciBusID: 0000:b3:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.645GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2

In [None]:
def Pool3d(kernel_size,stride):
  return tf.keras.layers.MaxPool3D(kernel_size,stride)

def Conv3d(out_channel,kernel_size,activation):
  return tf.keras.layers.Conv3D(out_channel,kernel_size, padding='same', activation=activation)

def Conv3D_nonact(out_channel,kernel_size):
  return tf.keras.layers.Conv3D(out_channel,kernel_size, padding='same', activation=None)

def Softmax():
  return tf.keras.layers.Softmax()

def ReLU():
  return tf.keras.layers.ReLU()

def FC(out_dim):
  return tf.keras.layers.Dense(out_dim)

def BatchnNorm():
  return tf.keras.layers.BatchNormalization()

def DropOut(rate):
  return tf.keras.layers.Dropout(rate)

class AR3D_sequential(tf.keras.Model):
  def __init__(self):
    super(AR3D_sequential, self).__init__()
    # sfe block
    self.conv1 = Conv3d(64,(3,3,3),'relu')
    self.pool1 = Pool3d((1,2,2),(1,2,2))
    self.conv2 = Conv3d(128,(3,3,3),'relu')
    self.pool2 = Pool3d((2,2,2),(2,2,2))
    self.conv3_a = Conv3d(256,(3,3,3),'relu')
    self.conv3_b = Conv3d(256,(3,3,3),'relu')
    self.pool3 = Pool3d((2,2,2),(2,2,2))
    self.conv4_a = Conv3d(512,(3,3,3),'relu')
    self.conv4_b = Conv3d(512,(3,3,3),'relu')
    self.pool4 = Pool3d((2,2,2),(2,2,2))

    # r3d
    self.r_conv1 = Conv3d(128,(1,1,1),'relu')
    self.r_conv1_bn = BatchnNorm()
    self.r_conv2 = Conv3d(128,(1,3,3),'relu')
    self.r_conv2_bn = BatchnNorm()
    self.r_conv3 = Conv3d(128,(3,1,1),'relu')
    self.r_conv3_bn = BatchnNorm()
    self.r_conv4 = Conv3d(512,(1,1,1),None)
    self.r_conv4_bn = BatchnNorm()

    # attention

    self.reduction_ratio = 4
    self.reduction_channel = int(512 / self.reduction_ratio)
    self.k_conv = Conv3d(self.reduction_channel,(1,1,1),None)
    self.q_conv = Conv3d(self.reduction_channel,(1,1,1),None)
    self.v_conv = Conv3d(self.reduction_channel,(1,1,1),None)
    self.restore_conv = Conv3d(512,(1,1,1),None)

    self.fc6 = FC(4096)
    self.fc7 = FC(4096)
    self.fc8 = FC(101)
    # 101 == numclasses

    self.softmax = Softmax()
    self.relu = ReLU()
    self.dropout = DropOut(0.5)

  def call(self, x):
    # sfe block
    x = self.conv1(x)
    x = self.pool1(x)
    x = self.conv2(x)
    x = self.pool2(x)
    x = self.conv3_a(x)
    x = self.conv3_b(x)
    x = self.pool3(x)
    x = self.conv4_a(x)
    x = self.conv4_b(x)
    x = self.pool4(x)

    # r3d
    sc_r3d = x
    x = self.r_conv1(x)
    x = self.r_conv1_bn(x)
    x = self.r_conv2(x)
    x = self.r_conv2_bn(x)
    x = self.r_conv3(x)
    x = self.r_conv3_bn(x)
    x = self.r_conv4(x)
    x = self.r_conv4_bn(x)
    x = self.relu(x + sc_r3d)

    # a3d
    sc_a3d = x
    
    shape = [x.shape[0],x.shape[1]*x.shape[2]*x.shape[3],int(self.reduction_channel)]

    k = self.k_conv(x)
    k = tf.reshape(k,shape)
    q = self.q_conv(x)
    q = tf.reshape(q,shape)

    w_spatio_temporal = tf.einsum('b i c, b j c -> b i j', k, q)
    w_channel = tf.einsum('b d i, b d j -> b i j', k, q)

    w_spatio_temporal = self.softmax(w_spatio_temporal)

    v = self.v_conv(x)
    v = tf.reshape(v,shape)

    # spatio_temporal
    out = tf.einsum('b i f, b f j -> b i j', w_spatio_temporal, v)
    out = tf.reshape(out,[x.shape[0], x.shape[1], x.shape[2], x.shape[3], int(self.reduction_channel)])
    out = self.restore_conv(out)

    x = out + sc_a3d

    # classification(fc_out)
    x = tf.reshape(x,[x.shape[0],-1])
    x = self.fc6(x)
    x = self.dropout(x)
    x = self.fc7(x)
    x = self.dropout(x)
    x = self.fc8(x)

    return x
  
with tf.device('/gpu:0'):
  # dummy_input = tf.random.normal((1,16,112,112,3))
  model = AR3D_sequential()
  # model = mod_seq
  out_seq = model(dummy_input)
  print("out = ", out_seq.shape)

Error: Session cannot generate requests

In [8]:
def Pool3d(kernel_size,stride):
  return tf.keras.layers.MaxPool3D(kernel_size,stride)

def Conv3d(out_channel,kernel_size, activation):
  return tf.keras.layers.Conv3D(out_channel,kernel_size, padding='same', activation=activation)

def Softmax():
  return tf.keras.layers.Softmax()

def ReLU():
  return tf.keras.layers.ReLU()

def FC(out_dim):
  return tf.keras.layers.Dense(out_dim)

def BatchnNorm():
  return tf.keras.layers.BatchNormalization()

def DropOut(rate):
  return tf.keras.layers.Dropout(rate)

class sfe_block(tf.keras.layers.Layer):
    def __init__(self):
        super(sfe_block, self).__init__()
        self.conv1 = Conv3d(64,(3,3,3),'relu')
        self.pool1 = Pool3d((1,2,2),(1,2,2))
        self.conv2 = Conv3d(128,(3,3,3),'relu')
        self.pool2 = Pool3d((2,2,2),(2,2,2))
        self.conv3_a = Conv3d(256,(3,3,3),'relu')
        self.conv3_b = Conv3d(256,(3,3,3),'relu')
        self.pool3 = Pool3d((2,2,2),(2,2,2))
        self.conv4_a = Conv3d(512,(3,3,3),'relu')
        self.conv4_b = Conv3d(512,(3,3,3),'relu')
        self.pool4 = Pool3d((2,2,2),(2,2,2))

    def call(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3_a(x)
        x = self.conv3_b(x)
        x = self.pool3(x)
        x = self.conv4_a(x)
        x = self.conv4_b(x)
        out = self.pool4(x)

        return out


class r3d_module(tf.keras.layers.Layer):
    def __init__(self):
        super(r3d_module, self).__init__()
        self.r_conv1 = Conv3d(128,(1,1,1),'relu')
        self.r_conv1_bn = BatchnNorm()
        self.r_conv2 = Conv3d(128,(1,3,3),'relu')
        self.r_conv2_bn = BatchnNorm()
        self.r_conv3 = Conv3d(128,(3,1,1),'relu')
        self.r_conv3_bn = BatchnNorm()
        self.r_conv4 = Conv3d(512,(1,1,1),None)
        self.r_conv4_bn = BatchnNorm()

    def call(self, x):
        x = self.r_conv1(x)
        x = self.r_conv1_bn(x)
        x = self.r_conv2(x)
        x = self.r_conv2_bn(x)
        x = self.r_conv3(x)
        x = self.r_conv3_bn(x)
        x = self.r_conv4(x)
        out = self.r_conv4_bn(x)

        return out


class a3d_module(tf.keras.layers.Layer):
    def __init__(self, reduction_ratio, attention_method):
        super(a3d_module, self).__init__()
        self.reduction_ratio = reduction_ratio
        self.reduction_channel = int(512 / self.reduction_ratio)
        self.k_conv = Conv3d(self.reduction_channel,(1,1,1),None)
        self.q_conv = Conv3d(self.reduction_channel,(1,1,1),None)
        self.v_conv = Conv3d(self.reduction_channel,(1,1,1),None)
        self.restore_conv = Conv3d(512,(1,1,1),None)

        self.softmax = Softmax()
    
    def call(self, x):
        shape = [x.shape[0],x.shape[1]*x.shape[2]*x.shape[3],self.reduction_channel] ## feature_size x channel

        k = self.k_conv(x)
        k = tf.reshape(k,shape)
        q = self.q_conv(x)
        q = tf.reshape(q,shape)

        w_spatio_temporal = tf.einsum('b i c, b j c -> b i j', k, q)
        # w_channel = tf.einsum('b f i, b f j -> b i j', k, q)

        w_spatio_temporal = self.softmax(w_spatio_temporal)
        # w_channel = self.softmax(w_channel)

        v = self.v_conv(x)
        v = tf.reshape(v,shape)

        # spatio_temporal
        out = tf.einsum('b i f, b f j -> b i j', w_spatio_temporal, v)
        # out = tf.einsum('b c i, b j c -> b i j', w_channel, v) ## ?? recheck this einsum op
        out = tf.reshape(out,[x.shape[0], x.shape[1], x.shape[2], x.shape[3], int(self.reduction_channel)])
        out = self.restore_conv(out)

        return out


class AR3D(tf.keras.Model):
    def __init__(self, num_classes, AR3D_V='v2', SFE_type='t1', attention_method = 'spatio_temporal', reduction_ratio = 4, hidden_unit = 4096):
        super(AR3D, self).__init__()
        ## sfe
        if SFE_type == 't1':
            self.sfe = sfe_block()
            ## fc_input_size = 50176
        else:
            pass
            # ## SFE_type == 't2'
            # ## sfe layer from c3d's convolution layer
            # self.sfe = c3d_conv()
            ## fc_input_size = 8192
        
        ## dfe
        self.version = AR3D_V
        self.residual = r3d_module()
        self.attention = a3d_module(reduction_ratio, attention_method)

        ## prediction (fc)
        self.fc6 = FC(hidden_unit)
        self.fc7 = FC(4096)
        self.fc8 = FC(num_classes)

        self.relu = ReLU()
        self.dropout = DropOut(0.5)
    
    def call(self, x):
        ## sfe
        x = self.sfe(x)

        ## dfe
        if self.version == 'v1':
            ## AR3D_V == 'v1'
            # x = self.relu(self.residual(x) + self.attention(x) + x)

            sc = x
            residual = self.residual(x)
            attention = self.attention(x)
            x = self.relu(attention + residual + sc)
        else:
            ## AR3D_V == 'v2' (default)
            # x = self.residual(x) + x
            # x = self.relu(x)
            # x = self.attention(x) + x
            sc_r3d = x
            x = self.residual(x)
            x = self.relu(x + sc_r3d)

            sc_a3d = x 
            x = self.attention(x)
            x = x + sc_a3d
        
        ## prediction
        x = tf.reshape(x,[x.shape[0],-1])
        x = self.fc6(x)
        x = self.dropout(x)
        x = self.fc7(x)
        x = self.dropout(x)
        x = self.fc8(x)

        return x

# with tf.device('/cpu:0'):
with tf.device('/gpu:0'):
  # dummy_input = tf.random.normal((1,16,112,112,3))
  model = AR3D(101,'v2','t1','channel',4,4096)
  # model = mod_seq
  out = model(dummy_input)
  print("out = ", out.shape)

out =  (1, 101)


In [9]:
out_seq[:,:10]

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[-0.1380957 ,  0.13519843,  0.13575816, -0.08654429,  0.0605948 ,
         0.12852287,  0.22785139, -0.09654778,  0.05900716, -0.03293788]],
      dtype=float32)>

In [10]:
out[:,:10]

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[ 0.20497988, -0.2401298 ,  0.06043866,  0.10944473,  0.18787952,
         0.0169337 ,  0.08604617, -0.00995584, -0.01761821,  0.11421899]],
      dtype=float32)>