In [1]:
import tensorflow as tf
import numpy as np
from tqdm import tqdm
import time
import preprocessing as pre
import tensorflow.contrib.layers as layers

In [2]:
data, label = pre.getDataFrame('../data/raw/')

In [3]:
data.shape

(30, 30, 1080, 1920)

In [None]:
data = tf.expand_dims(data, -1)

In [5]:
data.shape

(30, 30, 1080, 1920, 1)

In [7]:
label.shape

(30, 5)

# Data Split

In [8]:
X_train = data[:21]
y_train = label[:21]
X_test = data[21:]
y_test = label[21:]

# simple CNN

In [6]:
class Model(object):
    def __init__(self,
            num_class = 30,
            keep_prob = 0.6,
            batch_size = 1,
            epoch=40,
            lr = 1e-4):
        self.IMG_WIDTH = 1920
        self.IMG_HEIGHT = 1080
        
        self.graph = tf.Graph()
        self.num_class = num_class
        self.epoch = epoch
        self.CLIP_LENGTH = 30
        self.keep_prob = keep_prob
        self.batch_size = batch_size
        
        self.n_step_epoch=int(30/batch_size)
        with self.graph.as_default():
            self.inputs = tf.placeholder(tf.float32, [None, self.CLIP_LENGTH, self.IMG_HEIGHT, self.IMG_WIDTH])
            self.labels = tf.placeholder(tf.int64, [batch_size,])

            self.initializer = layers.xavier_initializer()
            self.global_step = tf.Variable(0, trainable = False, name = "global_step")
            self.lr = lr
            tf.add_to_collection(tf.GraphKeys.GLOBAL_STEP, self.global_step)
         
    def conv3d(self, inputs, shape, name, w_name, b_name):
        with self.graph.as_default():
            with tf.variable_scope('var_name') as var_scope:
                W = tf.get_variable(name = w_name, shape = shape, initializer = self.initializer, dtype = tf.float32)
                b = tf.get_variable(name = b_name, shape = shape[-1], initializer = tf.zeros_initializer(), dtype = tf.float32)
                tf.add_to_collection(tf.GraphKeys.WEIGHTS, W)
                tf.add_to_collection(tf.GraphKeys.BIASES, b)
            return tf.nn.relu(tf.nn.bias_add(tf.nn.conv3d(inputs, W, strides = [1, 1, 1, 1, 1], padding = "SAME"), b))
        
    def fc(self, inputs, shape, name,w_name,b_name,activation = True):
        with self.graph.as_default():
            with tf.variable_scope('var_name') as var_scope:
                W = tf.get_variable(name = w_name, shape = shape, initializer = self.initializer, dtype = tf.float32)
                b = tf.get_variable(name = b_name, shape = shape[-1], initializer = tf.zeros_initializer(), dtype = tf.float32)
                tf.add_to_collection(tf.GraphKeys.WEIGHTS, W)
                tf.add_to_collection(tf.GraphKeys.BIASES, b)

            if activation:
                return tf.nn.relu(tf.nn.bias_add(tf.matmul(inputs, W), b))
            else:
                return tf.nn.bias_add(tf.matmul(inputs, W), b)
            
    def parseNet(self, net, netstruct, istraining = True):
        for key in netstruct:
            if key[0] == "conv":
                net = self.conv3d(net, key[2], key[1],key[3], key[4])
            elif key[0] == "fc":
                net = self.fc(net, key[2], key[1], key[3], key[4],activation = key[-1])
            elif key[0] == "maxpool":
                net = tf.nn.max_pool3d(net, ksize = key[2], strides = key[2], padding = "SAME", name = key[1])
            elif key[0] == "dropout" and istraining:
                net = tf.nn.dropout(net, key[2], name = key[1])
            elif key[0] == "reshape":
                net = tf.reshape(net, key[-1])
            elif key[0] == "softmax":
                net = tf.nn.softmax(net)
            elif key[0] == "transpose":
                net = tf.transpose(net, perm=key[-1])
        return net

    def test(self, modelpath, data, label):
        with self.graph.as_default():
            # [batch, in_depth, in_height, in_width, in_channels]
            c3d_net = [
                ["conv", "conv1", [3, 3, 3, 3, 64], 'wc1', 'bc1'],
                ["maxpool", "pool1", [1, 1, 2, 2, 1]],
                ["conv", "conv2", [3, 3, 3, 64, 128], 'wc2', 'bc2'],
                ["maxpool", "pool2", [1, 2, 2, 2, 1]],
                ["conv", "conv3a", [3, 3, 3, 128, 256], 'wc3a', 'bc3a'],
                ["conv", "conv3b", [3, 3, 3, 256, 256], 'wc3b', 'bc3b'],
                ["maxpool", "pool3", [1, 2, 2, 2, 1]],
                ["conv", "conv4a", [3, 3, 3, 256, 512], 'wc4a', 'bc4a'],
                ["conv", "conv4b", [3, 3, 3, 512, 512], 'wc4b', 'bc4b'],
                ["maxpool", "pool4", [1, 2, 2, 2, 1]],
                ["conv", "conv5a", [3, 3, 3, 512, 512], 'wc5a', 'bc5a'],
                ["conv", "conv5b", [3, 3, 3, 512, 512], 'wc5b', 'bc5b'],
                ["maxpool", "pool5", [1, 2, 2, 2, 1]],
                ["transpose", [0, 1, 4, 2, 3]],  #only use it if you restore the sports1m_finetuning_ucf101.model, otherwise uncomment it,(e.g use conv3d_deepnetA_sport1m_iter_1900000_TF.model)
                ["reshape", [-1, 8192]],
                ["fc", "fc1", [8192, 4096], 'wd1', 'bd1', True],
                ["dropout", "dropout1", self.keep_prob],
                ["fc", "fc2", [4096, 4096],'wd2','bd2', True],
                ["dropout", "dropout2", self.keep_prob],
                ["fc", "fc3", [4096, self.num_class],'wout','bout',False],
            ]

            # print(tf.trainable_variables())
            # print(var_list)
            # print(tf.get_collection(tf.GraphKeys.WEIGHTS))

            # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5)

            with tf.Session() as sess:
                logits = self.parseNet(self.inputs, c3d_net)
                softmax_logits = tf.nn.softmax(logits)
                # int_label = tf.one_hot(self.labels, self.num_class)
                int_label = self.labels  # [bs,101]-->[bs*4 or 8 or 16,101]
                # int_label=tf.concat(
                #     [int_label,int_label,int_label,int_label,],axis=0)

                # int_label=tf.cast(int_label,dtype=tf.int64)
                task_loss = tf.reduce_sum(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=int_label))
                # task_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = int_label))
                # task_loss = -tf.reduce_sum(int_label*tf.log(logits))
                acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=-1), int_label), tf.float32))
                right_count = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=1), int_label), tf.int32))
    
                reg_loss = layers.apply_regularization(layers.l2_regularizer(5e-4),
                                                       tf.get_collection(tf.GraphKeys.WEIGHTS))
                total_loss = task_loss + reg_loss
                # train_var_list = [v for v in tf.trainable_variables() if v.name.find("conv") == -1]
                train_op = tf.train.GradientDescentOptimizer(self.lr).minimize(
                    total_loss, global_step=self.global_step)
                # train_op = tf.train.MomentumOptimizer(self.lr,0.9).minimize(
                #     total_loss, global_step = self.global_step,var_list=train_var_list)
    
    
                total_para = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])
                print('total_para:', total_para)  # all CDC9 :28613120  #pool5 27655936

                # train clip:762960
                # test  clip:302640
                init = tf.global_variables_initializer()
                # var_list = [v for v in tf.trainable_variables() if v.name.find("conv") != -1]  # 初始化只加载卷积层参数
                # print(var_list)
                # saver = tf.train.Saver(tf.global_variables())
                sess.run(init)
                saver = tf.train.Saver(tf.trainable_variables())
                # saver.restore(sess, tf.train.latest_checkpoint(modelpath))
                # saver.restore(sess, modelpath + "sports1m_finetuning_ucf101.model")
                # print("Model Loading Done!")
                step = 0
                print_freq = 2
                next_start_pos = 0
                for one_epoch in range(1):
                    epostarttime = time.time()
                    starttime = time.time()
                    total_v = 0.0
                    test_correct_num = 0
                    for i in tqdm(range(int(3783 / self.batch_size))):
                        step += 1
                        total_v += self.batch_size
                        train_batch = self.data[next_start_pos:next_start_pos+self.batch_size] 
                        label_batch = self.data[next_start_pos:next_start_pos+self.batch_size] 
                        next_start_pos += self.batch_size 
                        assert len(train_batch)==self.batch_size
                        
                        val_feed = {self.inputs: train_batch, self.labels: label_batch}
                        test_correct_num += sess.run(right_count, val_feed)
                        print('test acc:', test_correct_num / total_v, 'test_correct_num:', test_correct_num,
                              'total_v:', total_v)
            

In [9]:
if __name__ == "__main__":
    c3dnet = Model()
    c3dnet.test(_, X_train, y_train)

ValueError: Shape must be rank 5 but is rank 4 for 'Conv3D' (op: 'Conv3D') with input shapes: [?,30,1080,1920], [3,3,3,3,64].

In [None]:

learning_rate = 0.001
total_epoch = 10
batch_size = 1

n_input = 1080*1920
n_step = 30
n_hidden = 128
n_class = 5

X = tf.placeholder(tf.float32, [None, n_step, n_input])
Y = tf.placeholder(tf.float32, [None, n_class])

W = tf.Variable(tf.random_normal([n_hidden, n_class]))
b = tf.Variable(tf.random_normal([n_class]))

cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)

# RNN 신경망을 생성합니다
# 원래는 다음과 같은 과정을 거쳐야 하지만
# states = tf.zeros(batch_size)
# for i in range(n_step):
#     outputs, states = cell(X[[:, i]], states)
# ...
# 다음처럼 tf.nn.dynamic_rnn 함수를 사용하면
# CNN 의 tf.nn.conv2d 함수처럼 간단하게 RNN 신경망을 만들어줍니다.
# 겁나 매직!!
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

# 결과를 Y의 다음 형식과 바꿔야 하기 때문에
# Y : [batch_size, n_class]
# outputs 의 형태를 이에 맞춰 변경해야합니다.
# outputs : [batch_size, n_step, n_hidden]
#        -> [n_step, batch_size, n_hidden]
#        -> [batch_size, n_hidden]
outputs = tf.transpose(outputs, [1, 0, 2])
outputs = outputs[-1]
model = tf.matmul(outputs, W) + b

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

#########
# 신경망 모델 학습
######
sess = tf.Session()
sess.run(tf.global_variables_initializer())

total_batch = int(len(data)/batch_size)

for epoch in range(total_epoch):
    total_cost = 0

    for i in range(total_batch):
        batch_xs, batch_ys = df
        # X 데이터를 RNN 입력 데이터에 맞게 [batch_size, n_step, n_input] 형태로 변환합니다.
        batch_xs = batch_xs.reshape((batch_size, n_step, n_input))

        _, cost_val = sess.run([optimizer, cost],
                               feed_dict={X: batch_xs, Y: batch_ys})
        total_cost += cost_val

    print('Epoch:', '%04d' % (epoch + 1),
          'Avg. cost =', '{:.3f}'.format(total_cost / total_batch))

print('최적화 완료!')

#########
# 결과 확인
######
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

test_batch_size = len(mnist.test.images)
test_xs = mnist.test.images.reshape(test_batch_size, n_step, n_input)
test_ys = mnist.test.labels

print('정확도:', sess.run(accuracy,
                       feed_dict={X: test_xs, Y: test_ys}))