In [1]:
# install memory util
# import urllib.request
# response = urllib.request.urlopen("https://raw.githubusercontent.com/yaroslavvb/memory_util/master/memory_util.py")
# open("memory_util.py", "wb").write(response.read())

import memory_util
memory_util.vlog(1)

import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random

In [25]:
def conv3d(x, W_shape, b_shape, name, strides=1):
    with tf.variable_scope(name) as scope:
        # Conv3D wrapper, with bias and relu activation
        W = tf.Variable(tf.random_normal(W_shape), name='W')
        b = tf.Variable(tf.random_normal(b_shape), name='b')
        x = tf.nn.conv3d(x, W, strides=[1, strides, strides, strides, 1], padding='SAME', name=scope.name)
        x = tf.nn.bias_add(x, b)
        tf.summary.histogram("weights", W)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", x)
        return tf.nn.relu(x)

def maxpool3d(x, name, k=2):
    with tf.variable_scope(name) as scope:
        # MaxPool2D wrapper
        if k == 1:
            return tf.nn.max_pool3d(x, ksize=[1, k, 2, 2, 1], strides=[1, k, 2, 2, 1],
                              padding='SAME', name=scope.name)
        else:
            return tf.nn.max_pool3d(x, ksize=[1, k, k, k, 1], strides=[1, k, k, k, 1],
                              padding='SAME', name=scope.name)
def fc(x, size_in, size_out, name, relu = True):
    with tf.variable_scope(name) as scope:    
        W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="b")
        # Matrix multiply weights and inputs and add bias
        act = tf.add(tf.matmul(x, W), b)
        tf.summary.histogram("weights", W)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        if relu == True:
            # Apply ReLu non linearity
            act = tf.nn.relu(act)
        return act

def dense_to_one_hot(labels_dense, num_classes=10):
    """ Convert class labels from scalars to one-hot vectors. """
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

def get_xy(start, end, vid, labels, data):
    inp, output = [], []
    for i in range(start,end+1):
        tmp = []
        # check if number of frame in the current video are greater than 16
        if len(data[vid[i]]) > 16:
            current_frame = random.randrange(0,len(data[vid[i]])-16)
            c_start = 0
            c_end = current_frame+16
            
            for frame in sorted(data[vid[i]]):
                if (c_start > current_frame) and (c_end > current_frame):
                    current_frame += 1
                    tmp.append([data[vid[i]][frame]])
                c_start += 1
        else:
            for frame in sorted(data[vid[i]]):
                tmp.append([data[vid[i]][frame]])
            last_frame_id = sorted(data[vid[i]])[-1]
            while len(tmp) <= 15:
                tmp.append([data[vid[i]][last_frame_id]])
        
        inp.append(tmp)
        output.append(labels[vid[i]])
    output = dense_to_one_hot(np.array(output), n_classes)

    return np.array(inp, dtype=np.float32), output


In [3]:
def C3D_model(x, weights, biases, n_classes):
    
#     x = tf.reshape(x, shape=[-1,128,128,16,1]),
    conv1 = conv3d(x, weights['wc1'], biases['bc1'], name='conv1')
    pool1 = maxpool3d(conv1, k=1, name='pool1')
    
    conv2 = conv3d(pool1, weights['wc2'], biases['bc2'], name='conv2')
    pool2 = maxpool3d(conv2, k=2, name='pool2')
    
    conv3a = conv3d(pool2, weights['wc3a'], biases['bc3a'], name='conv3a')
    conv3b = conv3d(conv3a, weights['wc3b'], biases['bc3b'], name='conv3b')
    pool3 = maxpool3d(conv3b, k=2, name='pool3')
    
    conv4a = conv3d(pool3, weights['wc4a'], biases['bc4a'], name='conv4a')
    conv4b = conv3d(conv4a, weights['wc4b'], biases['bc4b'], name='conv4b')
    pool4 = maxpool3d(conv4b, k=2, name='pool4')
    
    conv5a = conv3d(pool4, weights['wc5a'], biases['bc5a'], name='conv5a')
    conv5b = conv3d(conv5a, weights['wc5b'], biases['bc5b'], name='conv5b')
    pool5 = maxpool3d(conv5b, k=2, name='pool5')
    
    dim = np.prod(pool5.get_shape().as_list()[1:])

    flattened = tf.reshape(pool5, [-1, dim])
    fc6 = fc(flattened, dim, 4096, name='fc6')
    
    fc7 = fc(fc6, 4096, 4096, name='fc7')
    
    # Output, class prediction
    out = fc(fc7, 4096, n_classes, name='out', relu=False)
    

    return out

In [19]:
def C3D_model2(x, weights, biases, n_classes):
    
    conv1 = conv3d(x, [3,3,3,1,32], [32], name='conv1')
    pool1 = maxpool3d(conv1, k=1, name='pool1')
    
    dim = 10

    flattened = tf.reshape(pool1, [-1, dim])
    fc6 = fc(flattened, dim, 64, name='fc6')
    
    fc7 = fc(fc6, 64, 64, name='fc7')
    
    # Output, class prediction
    out = fc(fc7, 64, n_classes, name='out', relu=False)
    

    return out

In [5]:
# os.mkdir('/output/logs')
root = 'backup/'
LOGDIR = 'logs/'
w_input = 128
h_input = 128
l_input = 16
n_classes = 7
learning_rate = 1e-3

n_epoch = 100
batch_size = 128
display_step = 10

# Store layers weight & bias
weights = {
    # 5x5x5 conv, 1 input, 64 outputs
    'wc1': [3,3,3,1,64],
    # 5x5x5 conv, 64 inputs, 128 outputs
    'wc2': [3,3,3,64,128],
    # 5x5x5 conv, 128 inputs, 256 outputs
    'wc3a': [3,3,3,128,256],
    # 5x5x5 conv, 256 inputs, 512 outputs
    'wc3b': [3,3,3,256,512],
    # 5x5x5 conv, 512 inputs, 512 outputs
    'wc4a': [3,3,3,512,512],
    # 5x5x5 conv, 512 inputs, 512 outputs
    'wc4b': [3,3,3,512,512],
    # 5x5x5 conv, 512 inputs, 512 outputs
    'wc5a': [3,3,3,512,512],
    # 5x5x5 conv, 512 inputs, 512 outputs
    'wc5b': [3,3,3,512,512]
}

biases = {
    'bc1': [64],
    'bc2': [128],
    'bc3a': [256],
    'bc3b': [512],
    'bc4a': [512],
    'bc4b': [512],
    'bc5a': [512],
    'bc5b': [512]
}

In [6]:
# train_data = np.load(root+'image_data_train_757.npy', encoding='latin1').item(0)
val_data = np.load(root+'image_data_val_372.npy', encoding='latin1').item(0)

# vid_train = np.load(root+'vid_train.npy')
vid_val = np.load(root+'vid_val.npy')

# labels_train = np.load(root+'labels_train.npy').item(0)
labels_val = np.load(root+'labels_val.npy').item(0)

In [26]:
t1,t2 = get_xy(1,1,vid_val, labels_val, val_data)

In [28]:
np.transpose(t1, (0,1,3,4,2)).shape

(1, 16, 128, 128, 1)

In [None]:
tf.reset_default_graph()

with tf.name_scope('input'):
    x = tf.placeholder(tf.float32, [None, l_input, w_input, h_input,1])
    y = tf.placeholder(tf.float32, [None, n_classes])
#     x = tf.placeholder(tf.float32)
#     y = tf.placeholder(tf.float32)

pred = C3D_model2(x, weights, biases, n_classes)
with tf.name_scope("cross_entropy"):
    cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
    logits=pred, labels=y), name="cross_entropy")
    tf.summary.scalar("cross_entropy", cross_entropy)

with tf.name_scope("train"):
    minimize = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar("accuracy", accuracy)

init = tf.global_variables_initializer()

# merge all summaries into a single "operation" which we can execute in a session 
summary_op = tf.summary.merge_all()

with tf.Session() as sess:
    sess.run(init)
#     no_of_batches = int((len(train_data)) / batch_size)
    no_of_batches = int((len(val_data)) / batch_size)
    train_writer = tf.summary.FileWriter('%strain/'%(LOGDIR), sess.graph)
    val_writer = tf.summary.FileWriter('%sval/'%(LOGDIR), sess.graph)

    for i in range(n_epoch):
        ptr = 0
#         np.random.shuffle(vid_train)
        np.random.shuffle(vid_val)
        for j in range(no_of_batches):
#             inp, out = get_xy(ptr,ptr+batch_size, vid_train, labels_train, train_data)
            inp, out = get_xy(ptr,ptr+batch_size, vid_val, labels_val, val_data)
            inp = np.transpose(inp, (0,1,3,4,2))
            sess.run([minimize],{x: inp, y: out})
            
#         r = random.randrange(0, len(train_data)-300)
        r = random.randrange(0, len(val_data)-300)
#         train_input, train_output = get_xy(0,len(train_data)-1, vid_train, labels_train, train_data)
        train_input, train_output = get_xy(0,len(val_data)-1, vid_val, labels_val, val_data)
        val_input, val_output = get_xy(0,len(val_data)-1, vid_val, labels_val, val_data)
                
        loss_trian,  acc_train, summary_train, props_train = sess.run([cross_entropy, accuracy, summary_op, pred],{x: train_input, y: train_output})
        train_writer.add_summary(summary_train,i)
        
        _, acc_val, summary_val, props_val = sess.run([cross_entropy, accuracy, summary_op, pred],{x: val_input, y: val_output})
        val_writer.add_summary(summary_val,i)
        # Save the variables to disk.
        if i %  25 == 0:
            save_path = saver.save(sess, "/output/model_%d.ckpt"%i)
            print("Model saved in file: %s" % save_path)       
        print("Epoch ",str(i), ', acc_train: %.7f' % (acc_train*100), 'acc_val: %.7f' % (acc_val*100), ', loss_trian: %.7f' % loss_trian)
    

In [35]:
tf.reset_default_graph()

tf.global_variables()

[]