Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

baseline table1 #2

Closed
nitba opened this issue Sep 17, 2019 · 2 comments
Closed

baseline table1 #2

nitba opened this issue Sep 17, 2019 · 2 comments

Comments

@nitba
Copy link

nitba commented Sep 17, 2019

Hi @dkulon ,

Would you please provide us with the baseline you mentioned is replaced by spectral decoder?

Thanks,

@dkulon
Copy link
Owner

dkulon commented Sep 18, 2019

Hi @Ivamcoder ,

Unfortunately, I cannot do this due to copyrights for part of the data that was used to create the hand model.

Best,
Dominik

@dkulon
Copy link
Owner

dkulon commented Sep 18, 2019

Below is the network implementation if you want to know the architecture; where mano is a TensorFlow implementation of the hand model and its parameters are not learned.

import tensorflow as tf

from mano import *

from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.layers import GlobalAveragePooling2D

def build_network(next_X, mano, mesh_embedding_size, cam_embedding_size, batch_size, used_betas=10):
    """Build the image-to-mesh network."""
    
    with tf.variable_scope('image_encoder'):
        # Build the image encoder to the mesh embedding.
        mesh_embedding, camera_embedding = import_image_encoder(next_X, mesh_embedding_size, cam_embedding_size)
        betas, thetas = mesh_embedding[:, :used_betas], mesh_embedding[:, used_betas:]
        betas = tf.concat((betas, tf.zeros((batch_size, mano.num_betas - used_betas))), axis=1)
        thetas = tf.concat((tf.zeros((batch_size, 3)), thetas), axis=1)
        scale, trans, rot = camera_regressor(mesh_embedding, camera_embedding)
    
    with tf.variable_scope('mano'):
        output_mesh, output_keypoints = mano(betas, thetas)

    return output_mesh, output_keypoints, mesh_embedding, betas, thetas, scale, trans, rot

def import_image_encoder(next_X, mesh_embedding_size, cam_embedding_size, name=None):
    features = DenseNet121(weights='imagenet', include_top=False)(next_X)
    features = GlobalAveragePooling2D()(features)

    features = tf.layers.flatten(features)
    embedding = tf.layers.dense(features, mesh_embedding_size + cam_embedding_size, name=name)
    mesh_embedding, camera_embedding = embedding[:, :mesh_embedding_size], embedding[:, mesh_embedding_size:]
    return mesh_embedding, camera_embedding

def import_mesh_decoder(mesh_embedding, L, A, U, is_train, filters=[16, 32, 32, 48], poly_order=[3, 3, 3, 3], output_dim=3, batch_norm=False):
    """Load the generator."""
    output_mesh = spectral_ae.MeshDecoder(
        mesh_embedding, output_dim, L, A, U, poly_order, filters, is_train, batch_norm=batch_norm)

    reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='mesh_decoder')
    reuse_vars_dict = dict([(reuse_vars_map[var.op.name], var) for var in reuse_vars])
    restore_saver = tf.train.Saver(reuse_vars_dict)
    return output_mesh, restore_saver

def camera_regressor(mesh_embedding, camera_embedding):  
    with tf.variable_scope("camera_params"):
        cam_net = tf.nn.relu(tf.layers.dense(camera_embedding, 32))
        cam_net = tf.nn.relu(tf.layers.dense(cam_net, 32))
        cam_net = tf.layers.dense(cam_net, 7)
        
        scale = tf.layers.dense(cam_net, 1, bias_initializer=tf.constant_initializer(90))
        scale = tf.nn.relu(scale)
        trans = tf.layers.dense(cam_net, 3, bias_initializer=tf.constant_initializer(100))
        rot = tf.layers.dense(cam_net, 3)
    return scale, trans, rot

@dkulon dkulon closed this as completed Jan 31, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants