In [1]:
import numpy as np
import json
import os
import copy
import pickle

import mesh_sampling
import trimesh
from shape_data import ShapeData

from autoencoder_dataset import cached_autoencoder_dataset
from torch.utils.data import DataLoader

from spiral_utils import get_adj_trigs, generate_spirals
from models import SpiralAutoencoder
from train_funcs import train_autoencoder_dataloader
from test_funcs import test_autoencoder_dataloader


import torch
from tensorboardX import SummaryWriter

from sklearn.metrics.pairwise import euclidean_distances
meshpackage = 'mpi-mesh' # 'mpi-mesh', 'trimesh'
root_dir = '/home/jingwang/Data/data/'

dataset = 'FaceWarehouse'
name = 'try_variational'

torch.backends.cudnn.benchmark = True


GPU = True
device_idx = 0 # 0, 1, 2, 3
device_ids = [0, 1, 2, 3]
for idx in device_ids:
    print(torch.cuda.get_device_name(idx))

GeForce GTX TITAN X
GeForce GTX TITAN X
GeForce GTX TITAN X
GeForce GTX TITAN X


In [2]:
args = {}

generative_model = 'autoencoder'
downsample_method = 'COMA_downsample' # choose'COMA_downsample' or 'meshlab_downsample'


# below are the arguments for the DFAUST run
reference_mesh_file = os.path.join(root_dir, dataset, 'template', 'template.obj')
downsample_directory = os.path.join(root_dir, dataset,'template', downsample_method)
ds_factors = [4, 4, 4, 4]
step_sizes = [2, 2, 1, 1, 1]
filter_sizes_enc = [[3, 16, 32, 64, 128],[[],[],[],[],[]]]
filter_sizes_dec = [[128, 64, 32, 32, 16],[[],[],[],[],3]]
dilation_flag = True
if dilation_flag:
    dilation=[2, 2, 1, 1, 1] 
else:
    dilation = None
reference_points = [[5930]]# [[3567,4051,4597]] # [[414]]  # used for COMA with 3 disconnected components

args = {'generative_model': generative_model,
        'name': name, 'data': os.path.join(root_dir, dataset, 'preprocessed',name),
        'results_folder':  os.path.join(root_dir, dataset,'results/spirals_'+ generative_model),
        'reference_mesh_file':reference_mesh_file, 'downsample_directory': downsample_directory,
        'checkpoint_file': 'checkpoint',
        'seed':2, 'loss':'l1',
        'batch_size': 16, 'num_epochs':300, 'eval_frequency':200, 'num_workers': 40,
        'filter_sizes_enc': filter_sizes_enc, 'filter_sizes_dec': filter_sizes_dec,
        'nz': 128, # 100 identity + 46 expression 
        'ds_factors': ds_factors, 'step_sizes' : step_sizes, 'dilation': dilation,
        
        'lr':1e-3, 
        'regularization': 5e-5,         
        'scheduler': True, 'decay_rate': 0.99,'decay_steps':1,  
        'resume': True,
        
        'mode':'train', 'shuffle': True, 'nVal': 100, 'normalization': True,
        'write_mesh': True,
        'lambda_var': 0.5,
        'worst_face_num': 20,
        'use_cache': True}

args['results_folder'] = os.path.join(args['results_folder'],'latent_'+str(args['nz']))
    
if not os.path.exists(os.path.join(args['results_folder'])):
    os.makedirs(os.path.join(args['results_folder']))

summary_path = os.path.join(args['results_folder'],'summaries',args['name'])
if not os.path.exists(summary_path):
    os.makedirs(summary_path)  
    
checkpoint_path = os.path.join(args['results_folder'],'checkpoints', args['name'])
if not os.path.exists(checkpoint_path):
    os.makedirs(checkpoint_path)
    
samples_path = os.path.join(args['results_folder'],'samples', args['name'])
if not os.path.exists(samples_path):
    os.makedirs(samples_path)
    
prediction_path = os.path.join(args['results_folder'],'predictions', args['name'])
if not os.path.exists(prediction_path):
    os.makedirs(prediction_path)

if not os.path.exists(downsample_directory):
    os.makedirs(downsample_directory)

downsample_mesh_path = os.path.join(args['results_folder'],'downsample_mesh', args['name'])
if not os.path.exists(downsample_mesh_path):
    os.makedirs(downsample_mesh_path)
    
worst_mesh_path = os.path.join(args['results_folder'],'worst_test', args['name'])
if not os.path.exists(downsample_mesh_path):
    os.makedirs(downsample_mesh_path)

In [3]:
np.random.seed(args['seed'])
print("Loading data .. ")
if not os.path.exists(args['data']+'/mean.npy') or not os.path.exists(args['data']+'/std.npy'):
    shapedata =  ShapeData(nVal=args['nVal'], 
                          train_file=args['data']+'/train.npy', 
                          test_file=args['data']+'/test.npy', 
                          reference_mesh_file=args['reference_mesh_file'],
                          normalization = args['normalization'],
                          meshpackage = meshpackage, load_flag = True)
    np.save(args['data']+'/mean.npy', shapedata.mean)
    np.save(args['data']+'/std.npy', shapedata.std)
else:
    shapedata = ShapeData(nVal=args['nVal'], 
                         train_file=args['data']+'/train.npy',
                         test_file=args['data']+'/test.npy', 
                         reference_mesh_file=args['reference_mesh_file'],
                         normalization = args['normalization'],
                         meshpackage = meshpackage, load_flag = False)
    shapedata.mean = np.load(args['data']+'/mean.npy')
    shapedata.std = np.load(args['data']+'/std.npy')
    shapedata.n_vertex = shapedata.mean.shape[0]
    shapedata.n_features = shapedata.mean.shape[1]

if not os.path.exists(os.path.join(args['downsample_directory'],'downsampling_matrices.pkl')):
    if shapedata.meshpackage == 'trimesh':
        raise NotImplementedError('Rerun with mpi-mesh as meshpackage')
    print("Generating Transform Matrices ..")
    if downsample_method == 'COMA_downsample':
        M,A,D,U,F = mesh_sampling.generate_transform_matrices(shapedata.reference_mesh, args['ds_factors'])
        if args['write_mesh']:
            import openmesh
            for i in range(len(M)):
                mesh = openmesh.TriMesh(points=M[i].v,face_vertex_indices=M[i].f)
                openmesh.write_mesh(os.path.join(args['results_folder'],'downsample_mesh',args['name'], '%d.obj'%i),mesh)
    with open(os.path.join(args['downsample_directory'],'downsampling_matrices.pkl'), 'wb') as fp:
        M_verts_faces = [(M[i].v, M[i].f) for i in range(len(M))]
        pickle.dump({'M_verts_faces':M_verts_faces,'A':A,'D':D,'U':U,'F':F}, fp)
else:
    print("Loading Transform Matrices ..")
    with open(os.path.join(args['downsample_directory'],'downsampling_matrices.pkl'), 'rb') as fp:
        #downsampling_matrices = pickle.load(fp,encoding = 'latin1')
        downsampling_matrices = pickle.load(fp)
            
    M_verts_faces = downsampling_matrices['M_verts_faces']
    if shapedata.meshpackage == 'mpi-mesh':
        from psbody.mesh import Mesh
        M = [Mesh(v=M_verts_faces[i][0], f=M_verts_faces[i][1]) for i in range(len(M_verts_faces))]
    elif shapedata.meshpackage == 'trimesh':
        M = [trimesh.base.Trimesh(vertices=M_verts_faces[i][0], faces=M_verts_faces[i][1], process = False) for i in range(len(M_verts_faces))]
    A = downsampling_matrices['A']
    D = downsampling_matrices['D']
    U = downsampling_matrices['U']
    F = downsampling_matrices['F']
        
# Needs also an extra check to enforce points to belong to different disconnected component at each hierarchy level
print("Calculating reference points for downsampled versions..")
for i in range(len(args['ds_factors'])):
    if shapedata.meshpackage == 'mpi-mesh':
        dist = euclidean_distances(M[i+1].v, M[0].v[reference_points[0]])
    elif shapedata.meshpackage == 'trimesh':
        dist = euclidean_distances(M[i+1].vertices, M[0].vertices[reference_points[0]])
    reference_points.append(np.argmin(dist,axis=0).tolist())



Loading data .. 
Loading Transform Matrices ..
Calculating reference points for downsampled versions..


In [4]:
if shapedata.meshpackage == 'mpi-mesh':
    sizes = [x.v.shape[0] for x in M]
elif shapedata.meshpackage == 'trimesh':
    sizes = [x.vertices.shape[0] for x in M]
Adj, Trigs = get_adj_trigs(A, F, shapedata.reference_mesh, meshpackage = shapedata.meshpackage)

spirals_np, spiral_sizes,spirals = generate_spirals(args['step_sizes'], 
                                                    M, Adj, Trigs, 
                                                    reference_points = reference_points, 
                                                    dilation = args['dilation'], random = False, 
                                                    meshpackage = shapedata.meshpackage, 
                                                    counter_clockwise = True)

bU = []
bD = []
for i in range(len(D)):
    d = np.zeros((1,D[i].shape[0]+1,D[i].shape[1]+1))
    u = np.zeros((1,U[i].shape[0]+1,U[i].shape[1]+1))
    d[0,:-1,:-1] = D[i].todense()
    u[0,:-1,:-1] = U[i].todense()
    d[0,-1,-1] = 1
    u[0,-1,-1] = 1
    bD.append(d)
    bU.append(u)


spiral generation for hierarchy 0 (11510 vertices) finished
spiral generation for hierarchy 1 (2878 vertices) finished
spiral generation for hierarchy 2 (720 vertices) finished
spiral generation for hierarchy 3 (180 vertices) finished
spiral generation for hierarchy 4 (45 vertices) finished
spiral sizes for hierarchy 0:  14
spiral sizes for hierarchy 1:  13
spiral sizes for hierarchy 2:  9
spiral sizes for hierarchy 3:  9
spiral sizes for hierarchy 4:  9


In [5]:
torch.manual_seed(args['seed'])

if GPU:
#     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    device = torch.device("cuda:"+str(device_idx) if torch.cuda.is_available() else "cpu")
else:
    device = torch.device("cpu")
print(device)

tspirals = [torch.from_numpy(s).long().to(device) for s in spirals_np]
tD = [torch.from_numpy(s).float().to(device) for s in bD]
tU = [torch.from_numpy(s).float().to(device) for s in bU]

cuda:0


In [6]:
# Building model, optimizer, and loss function

if args['use_cache']:
    dataset_train = cached_autoencoder_dataset(root_dir = args['data'], points_dataset = 'train',
                                               shapedata = shapedata,
                                               normalization = args['normalization'], device=device)
    dataloader_train = DataLoader(dataset_train, batch_size=args['batch_size'],\
                                         shuffle = args['shuffle'], num_workers=0)
else:
    dataset_train = autoencoder_dataset(root_dir = args['data'], points_dataset = 'train',
                                               shapedata = shapedata,
                                               normalization = args['normalization'])
    dataloader_train = DataLoader(dataset_train, batch_size=args['batch_size'],\
                                         shuffle = args['shuffle'], num_workers = args['num_workers'],pin_memory=True)

if args['use_cache']:
    dataset_val = cached_autoencoder_dataset(root_dir = args['data'], points_dataset = 'val', 
                                             shapedata = shapedata,
                                             normalization = args['normalization'], device=device)
    dataloader_val = DataLoader(dataset_val, batch_size=args['batch_size'],\
                                         shuffle = False, num_workers=0)
else:
    dataset_val = autoencoder_dataset(root_dir = args['data'], points_dataset = 'val', 
                                             shapedata = shapedata,
                                             normalization = args['normalization'])
    dataloader_val = DataLoader(dataset_val, batch_size=args['batch_size'],\
                                         shuffle = False, num_workers = args['num_workers'],pin_memory=True)


if 'autoencoder' in args['generative_model']:
        model = SpiralAutoencoder(filters_enc = args['filter_sizes_enc'],   
                                  filters_dec = args['filter_sizes_dec'],
                                  latent_size=args['nz'],
                                  sizes=sizes,
                                  spiral_sizes=spiral_sizes,
                                  spirals=tspirals,
                                  D=tD, U=tU).to(device)
        if torch.cuda.device_count() > 1:
#             model = torch.nn.parallel.DataParallel(model, device_ids=device_ids)
            print('Let\'s use %d GPUs!'%torch.cuda.device_count())
 
    
optim = torch.optim.Adam(model.parameters(),lr=args['lr'],weight_decay=args['regularization'])
if args['scheduler']:
    scheduler=torch.optim.lr_scheduler.StepLR(optim, args['decay_steps'],gamma=args['decay_rate'])
else:
    scheduler = None

if args['loss']=='l1':
    def loss_l1(outputs, targets):
        L = torch.abs(outputs - targets).mean()
        return L 
    loss_fn = loss_l1
elif arg['loss']=='l1_var':
    lambda_var = args['lambda_var']
    def variational_loss(tx,tx_hat):
        x,mu,logvar = tx
        l1_loss = torch.mean(torch.abs(x-tx))
        var_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        return l1_loss + lambda_var * var_loss
    loss_fn = variational_loss



Let's use 4 GPUs!


In [7]:
params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of parameters is: {}".format(params)) 
print(model)
# print(M[4].v.shape)

Total number of parameters is: 1726531
SpiralAutoencoder(
  (conv): ModuleList(
    (0): SpiralConv(
      (conv): Linear(in_features=42, out_features=16, bias=True)
      (activation): ELU(alpha=1.0)
    )
    (1): SpiralConv(
      (conv): Linear(in_features=208, out_features=32, bias=True)
      (activation): ELU(alpha=1.0)
    )
    (2): SpiralConv(
      (conv): Linear(in_features=288, out_features=64, bias=True)
      (activation): ELU(alpha=1.0)
    )
    (3): SpiralConv(
      (conv): Linear(in_features=576, out_features=128, bias=True)
      (activation): ELU(alpha=1.0)
    )
  )
  (fc_latent_enc): Linear(in_features=5888, out_features=128, bias=True)
  (fc_latent_dec): Linear(in_features=128, out_features=5888, bias=True)
  (dconv): ModuleList(
    (0): SpiralConv(
      (conv): Linear(in_features=1152, out_features=64, bias=True)
      (activation): ELU(alpha=1.0)
    )
    (1): SpiralConv(
      (conv): Linear(in_features=576, out_features=32, bias=True)
      (activation):

In [None]:
if args['mode'] == 'train':
    writer = SummaryWriter(summary_path)
    with open(os.path.join(args['results_folder'],'checkpoints', args['name'] +'_params.json'),'w') as fp:
        saveparams = copy.deepcopy(args)
        json.dump(saveparams, fp)
        
    if args['resume']:
            print('loading checkpoint from file %s'%(os.path.join(checkpoint_path,args['checkpoint_file'])))
            checkpoint_dict = torch.load(os.path.join(checkpoint_path,args['checkpoint_file']+'.pth.tar'),map_location=device)
            start_epoch = checkpoint_dict['epoch'] + 1
            model.load_state_dict(checkpoint_dict['autoencoder_state_dict'])
            optim.load_state_dict(checkpoint_dict['optimizer_state_dict'])
            scheduler.load_state_dict(checkpoint_dict['scheduler_state_dict'])
            print('Resuming from epoch %s'%(str(start_epoch)))     
    else:
        start_epoch = 0
        
    if args['generative_model'] == 'autoencoder':
        train_autoencoder_dataloader(dataloader_train, dataloader_val,
                          device, model, optim, loss_fn,
                          bsize = args['batch_size'],
                          start_epoch = start_epoch,
                          n_epochs = args['num_epochs'],
                          eval_freq = args['eval_frequency'],
                          scheduler = scheduler,
                          writer = writer,
                          save_recons=True,
                          shapedata=shapedata,
                          metadata_dir=checkpoint_path, samples_dir=samples_path,
                          checkpoint_path = args['checkpoint_file'])

loading checkpoint from file /home/jingwang/Data/data/FaceWarehouse/results/spirals_autoencoder/latent_128/checkpoints/try_variational/checkpoint


  0%|          | 0/580 [00:00<?, ?it/s]

Resuming from epoch 16


100%|██████████| 580/580 [06:02<00:00,  1.60it/s]
100%|██████████| 7/7 [00:00<00:00,  7.26it/s]


epoch 16 | tr 0.0736729741931 | val 0.0989421790838


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 17 | tr 0.0740792580463 | val 0.105587059259


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 18 | tr 0.0704835960963 | val 0.0953730034828


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 19 | tr 0.0694200358386 | val 0.0922196090221


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 20 | tr 0.0677045895879 | val 0.0944224008918


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 21 | tr 0.0668770822343 | val 0.0945045202971


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 22 | tr 0.0674639995306 | val 0.0939065256715


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.09it/s]


epoch 23 | tr 0.0658795791646 | val 0.0882619777322


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 24 | tr 0.0652708638138 | val 0.0951713106036


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 25 | tr 0.065407196214 | val 0.084745477736


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 26 | tr 0.0627900400938 | val 0.0873714980483


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 27 | tr 0.061959224631 | val 0.093751963377


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.11it/s]


epoch 28 | tr 0.0626770003538 | val 0.0848408234119


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 29 | tr 0.0614681518026 | val 0.0866807445884


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 30 | tr 0.0606260926944 | val 0.0937373268604


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 31 | tr 0.0605161878698 | val 0.0919766399264


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 32 | tr 0.0601327626227 | val 0.0935595461726


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 33 | tr 0.0587995279388 | val 0.0830590614676


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 34 | tr 0.0595444848311 | val 0.0820716997981


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 35 | tr 0.0584011783695 | val 0.0879223880172


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 36 | tr 0.057937136787 | val 0.0841301360726


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 37 | tr 0.0571713261884 | val 0.0829522076249


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.09it/s]


epoch 38 | tr 0.0563189331314 | val 0.0817415553331


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 39 | tr 0.0568614152806 | val 0.088312112391


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 40 | tr 0.0557321260908 | val 0.0832271012664


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 41 | tr 0.0556383272055 | val 0.0858266353607


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 42 | tr 0.0552187342839 | val 0.0851809173822


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 43 | tr 0.0549155903402 | val 0.0841249370575


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 44 | tr 0.0541788438849 | val 0.0803450864553


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 45 | tr 0.0537255445816 | val 0.0832906001806


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 46 | tr 0.0531289971466 | val 0.0840942344069


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 47 | tr 0.0535769395081 | val 0.0787072440982


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 48 | tr 0.0532942036488 | val 0.0824783706665


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 49 | tr 0.052690090496 | val 0.0776695096493


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 50 | tr 0.0522212033834 | val 0.0799286228418


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 51 | tr 0.0519565350537 | val 0.0794578287005


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 52 | tr 0.0517453294376 | val 0.0819175854325


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 53 | tr 0.0512162586674 | val 0.0809000092745


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 54 | tr 0.0515216049899 | val 0.0818503040075


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 55 | tr 0.0507059218812 | val 0.076242839694


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 56 | tr 0.0502552493315 | val 0.0773185184598


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.10it/s]


epoch 57 | tr 0.0497233957695 | val 0.0803504699469


100%|██████████| 580/580 [05:53<00:00,  1.64it/s]
100%|██████████| 7/7 [00:00<00:00,  8.11it/s]


epoch 58 | tr 0.0500977822033 | val 0.0835678943992


 97%|█████████▋| 565/580 [05:44<00:09,  1.64it/s]

In [None]:
model.module.fc_latent_dec.weight.device

In [18]:
if args['mode'] == 'test':
    print('loading checkpoint from file %s'%(os.path.join(checkpoint_path,args['checkpoint_file']+'.pth.tar')))
    checkpoint_dict = torch.load(os.path.join(checkpoint_path,args['checkpoint_file']+'.pth.tar'),map_location=device)
    model.load_state_dict(checkpoint_dict['autoencoder_state_dict'])
        
    predictions, norm_l1_loss, l2_loss = test_autoencoder_dataloader(device, model, dataloader_test, 
                                                                     shapedata, worst_path=worst_mesh_path, 
                                                                     worst_face_num=args['worst_face_num'],
                                                                     mm_constant = 100)    
    np.save(os.path.join(prediction_path,'predictions'), predictions)   
        
    print('autoencoder: normalized loss', norm_l1_loss)
    
    print('autoencoder: euclidean distance in mm=', l2_loss)

loading checkpoint from file /home/jingwang/Data/data/FaceWarehouse/results/spirals_autoencoder/latent_128/checkpoints/align_pose/checkpoint.pth.tar


  0%|          | 0/10 [00:00<?, ?it/s]


RuntimeError: module must have its parameters and buffers on device cuda:0 (device_ids[0]) but found one of them on device: cpu

In [37]:
from pprint import pprint
pprint(list(model.modules()))

[DataParallel(
  (module): SpiralAutoencoder(
    (conv_0): SpiralConv(
      (conv): Linear(in_features=42, out_features=16, bias=True)
      (activation): ELU(alpha=1.0)
    )
    (conv_1): SpiralConv(
      (conv): Linear(in_features=208, out_features=32, bias=True)
      (activation): ELU(alpha=1.0)
    )
    (conv_2): SpiralConv(
      (conv): Linear(in_features=288, out_features=64, bias=True)
      (activation): ELU(alpha=1.0)
    )
    (conv_3): SpiralConv(
      (conv): Linear(in_features=576, out_features=128, bias=True)
      (activation): ELU(alpha=1.0)
    )
    (fc_latent_enc): Linear(in_features=5888, out_features=146, bias=True)
    (fc_latent_dec): Linear(in_features=146, out_features=5888, bias=True)
    (dconv): ModuleList(
      (0): SpiralConv(
        (conv): Linear(in_features=1152, out_features=64, bias=True)
        (activation): ELU(alpha=1.0)
      )
      (1): SpiralConv(
        (conv): Linear(in_features=576, out_features=32, bias=True)
        (activation