In [1]:
#!/usr/bin/env python3
# Copyright 2004-present Facebook. All Rights Reserved.
#  python optim.py -s example1/synth_test.json -e example1
import argparse
import json
import logging
import os
import random
import time
import torch
import numpy as np

import deep_sdf
import deep_sdf.workspace as ws

import pdb

from library.optimiser import *
from library.objective_function import *
from library.post_analysis import *
from library.experiments import *
%matplotlib notebook

In [5]:
def adjust_learning_rate(initial_lr, optimizer, num_iterations, decreased_by, adjust_lr_every):
    lr = initial_lr * ((1 / decreased_by) ** (num_iterations // adjust_lr_every))
    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

def chamfer_distance(p1, p2):
    '''
    Calculate Chamfer Distance between two point sets
    '''
    p1 = p1.unsqueeze(0)
    p2 = p2.unsqueeze(0)

    p1 = p1.repeat(p2.size(1), 1, 1)
    p1 = p1.transpose(0, 1)

    p2 = p2.repeat(p1.size(0), 1, 1)

    # compute distance tensor
    dist = torch.add(p1, torch.neg(p2))
    dist = torch.norm(dist, 2, dim=2)

    dist1, _ = torch.min(dist, dim = 1)
    dist2, _ = torch.min(dist, dim = 0)

    return torch.mean(dist1) + torch.mean(dist2)

class argms:
    def __init__(self):
        self.experiment_directory = "example1"
        self.checkpoint = "latest"
        self.iterations = 100
        self.split_filename = "example1/synth_test.json"
        self.logfile = None
        self.debug = False
        self.quiet = False
args = argms()

def getLatentSourceAndTarget(args, source_id, target_id):
    # pick initialization and samples
    # Load collection of all latent codes
    all_codes_path = os.path.join(
        args.experiment_directory,
        ws.latent_codes_subdir,
        'latest.pth')
    all_codes = torch.load(all_codes_path)['latent_codes']['weight']
    ## sphere
    source_id = 999 # zywvjkvz2492e6xpq4hd1jzy2r9lht        # This will be the source shape (ie starting point)
    latent = all_codes[source_id].unsqueeze(0).detach()#.cuda()   #Add .cuda() if you want to run on GPU
    latent.requires_grad = True

    # This is be the target shape (ie objective)
    latent_target = all_codes[target_id].unsqueeze(0).detach()#.cuda()   #Add .cuda() if you want to run on GPU
    return latent, latent_target

def constructDecoder(args):
    specs_filename = os.path.join(args.experiment_directory, "specs.json")
    specs = json.load(open(specs_filename))
    arch = __import__("networks." + specs["NetworkArch"], fromlist=["Decoder"])
    latent_size = specs["CodeLength"]
    # Load decoder: this is our black box function
    decoder = arch.Decoder(latent_size, **specs["NetworkSpecs"])
    decoder = torch.nn.DataParallel(decoder)
    saved_model_state = torch.load(
        os.path.join(
            args.experiment_directory, ws.model_params_subdir, args.checkpoint + ".pth"
        ),
        map_location=torch.device('cpu') # Remove this if you want to run on GPU
    )
    decoder.load_state_dict(saved_model_state["model_state_dict"])
    # Optionally: put decoder on GPU
    #decoder = decoder.module.cuda()
    return decoder

class decoder_obj(objective_func):
    def __init__(self, latent_target, decoder):
        self.N_MARCHING_CUBE = 64
        self.l2reg= True
        self.regl2 = 1e-3
        self.iter = 0
        self.quick = False
        
        self.latent_target = latent_target
        self.decoder = decoder
        self.optimum = 0
        self.optimal = latent_target
        
        # Get a mesh representation of the target shape
        self.verts_target, faces_target = deep_sdf.mesh.create_mesh_optim(
            decoder, latent_target, N=self.N_MARCHING_CUBE, max_batch=int(2 ** 18)
        )
    
        
    def func(self, latent):
        # from latent to xyz
        verts, faces = deep_sdf.mesh.create_mesh_optim(self.decoder, latent, N=self.N_MARCHING_CUBE, max_batch=int(2 ** 18))
        verts = verts[torch.randperm(verts.shape[0])]
        verts = verts[0:20000, :]
        self.xyz_upstream = torch.tensor(verts.astype(float), requires_grad = True, dtype=torch.float32)#, device=torch.device('cuda:0')) # For GPU,
       
        # from latent_traget to xyz_target
        verts_target_sample = self.verts_target[torch.randperm(self.verts_target.shape[0])]
        verts_target_sample = verts_target_sample[0:20000, :]
        xyz_target = torch.tensor(verts_target_sample.astype(float), requires_grad = False, dtype=torch.float32) # For GPU, add: , device=torch.device('cuda:0'))

        # compare difference
        loss = chamfer_distance(self.xyz_upstream, xyz_target)
        self.last_loss = loss;
        self.last_latent = latent;
        return loss
    
    def dfunc(self, latent):
        
        if latent.grad is not None:
            latent.grad.detach_()
            latent.grad.zero_()
        
        # step 1
        if self.quick and torch.norm(latent - self.last_latent):
            loss = self.last_loss
        else:
            loss = self.func(latent)
        decoder.eval()
        loss.backward()
        dL_dx_i = self.xyz_upstream.grad
        
        # step 2
        # use vertices to compute full backward pass
        xyz = self.xyz_upstream.clone().detach()
        xyz.requires_grad = True
        latent_inputs = latent.expand(xyz.shape[0], -1)
        inputs = torch.cat([latent_inputs, xyz], 1)#.cuda()      #Add .cuda() if you want to run on GPU
        #first compute normals
        pred_sdf = self.decoder(inputs)
        loss_normals = torch.sum(pred_sdf)
        loss_normals.backward(retain_graph = True)
        normals = xyz.grad/torch.norm(xyz.grad, 2, 1).unsqueeze(-1)
                
        # step 3
        # now assemble inflow derivative
        latent.grad.detach_()
        latent.grad.zero_()
        dL_ds_i_fast = -torch.matmul(dL_dx_i.unsqueeze(1), normals.unsqueeze(-1)).squeeze(-1)
        loss_backward = torch.sum(dL_ds_i_fast * pred_sdf)
        if l2reg and self.iter % 20 == 0 and self.iter > 0:
            self.regl2 = self.regl2/2
        if l2reg:
            loss_backward += self.regl2 * torch.mean(latent.pow(2))
        # Backpropagate
        loss_backward.backward()
        
        return latent.grad
        

In [None]:
if __name__ == "__main__":
    
    torch.manual_seed(0)
    # 0 Initialization
    N_MARCHING_CUBE = 64
    lr= 8e-3
    l2reg= True
    regl2 = 1e-3
    decreased_by = 1.5
    adjust_lr_every = 50
    
    # 1 prepare data
    ## sphere
    source_id = 999 # zywvjkvz2492e6xpq4hd1jzy2r9lht        # This will be the source shape (ie starting point)
    ## torus
    target_id = 2 # 0bucd9ryckhaqtqvbiagilujeqzek4  
    latent, latent_target = getLatentSourceAndTarget(args, source_id, target_id)
    
    # 2 prepare model
    decoder = constructDecoder(args)
    # 3 prepare optimiser
    optimizer = torch.optim.Adam([latent], lr=lr)

    losses = []
    lambdas = []
    

    objectiveDe = decoder_obj(latent_target, decoder)

    # Use Adam optimizer, with source as starting point, and a loss defined on meshes
    # latent is the input of our function
    print("Starting optimization:")
    for e in range(int(args.iterations)):
        print("latent: ", latent.detach().numpy())
        
        loss = objectiveDe.func(latent)
        losses.append(loss.detach().cpu().numpy()) 
        print("loss: ", loss.detach().numpy())
        
        grad = objectiveDe.dfunc(latent)
        print("latent grad: ", grad.detach().numpy())

        adjust_learning_rate(lr, optimizer, e, decreased_by, adjust_lr_every)
        optimizer.step()
        print(e, "th iteration\n")


In [None]:
verts_target, faces_target = deep_sdf.mesh.create_mesh_optim(
    decoder, latent_target, N=N_MARCHING_CUBE, max_batch=int(2 ** 18)
)
if __name__ == "__main__":
    
    torch.manual_seed(0)
    # Initialization
    N_MARCHING_CUBE = 64
    lr= 8e-3
    l2reg= True
    regl2 = 1e-3
    decreased_by = 1.5
    adjust_lr_every = 50
    

    
    # pick initialization and samples
    # Load collection of all latent codes
    all_codes_path = os.path.join(
        args.experiment_directory,
        ws.latent_codes_subdir,
        'latest.pth')
    all_codes = torch.load(all_codes_path)['latent_codes']['weight']
    ## sphere
    source_id = 999 # zywvjkvz2492e6xpq4hd1jzy2r9lht        # This will be the source shape (ie starting point)
    latent = all_codes[source_id].unsqueeze(0).detach()#.cuda()   #Add .cuda() if you want to run on GPU
    latent.requires_grad = True

    ## torus
    target_id = 2 # 0bucd9ryckhaqtqvbiagilujeqzek4          # This is be the target shape (ie objective)
    latent_target = all_codes[target_id].unsqueeze(0).detach()#.cuda()   #Add .cuda() if you want to run on GPU

     
    
    specs_filename = os.path.join(args.experiment_directory, "specs.json")
    specs = json.load(open(specs_filename))
    arch = __import__("networks." + specs["NetworkArch"], fromlist=["Decoder"])
    latent_size = specs["CodeLength"]
    # Load decoder: this is our black box function
    decoder = arch.Decoder(latent_size, **specs["NetworkSpecs"])
    decoder = torch.nn.DataParallel(decoder)
    saved_model_state = torch.load(
        os.path.join(
            args.experiment_directory, ws.model_params_subdir, args.checkpoint + ".pth"
        ),
        map_location=torch.device('cpu') # Remove this if you want to run on GPU
    )
    decoder.load_state_dict(saved_model_state["model_state_dict"])
    # Optionally: put decoder on GPU
    #decoder = decoder.module.cuda()


    optimizer = torch.optim.Adam([latent], lr=lr)

    losses2 = []
    lambdas = []
    
    objective = decoder_obj(latent_target, decoder)

    # Use Adam optimizer, with source as starting point, and a loss defined on meshes
    # latent is the input of our function
    print("Starting optimization:")
    for e in range(int(args.iterations)):

        if latent.grad is not None:
            latent.grad.detach_()
            latent.grad.zero_()

        verts, faces = deep_sdf.mesh.create_mesh_optim(decoder, latent, N=N_MARCHING_CUBE, max_batch=int(2 ** 18))

        
        # subsample vertices for gradients computations
        verts = verts[torch.randperm(verts.shape[0])]
        verts = verts[0:20000, :]
        # forward pass within loss layer
        xyz_upstream = torch.tensor(verts.astype(float), requires_grad = True, dtype=torch.float32)#, device=torch.device('cuda:0')) # For GPU,
        # Get a point cloud sampling of the target shape
        verts_target_sample = verts_target[torch.randperm(verts_target.shape[0])]
        verts_target_sample = verts_target_sample[0:20000, :]
        xyz_target = torch.tensor(verts_target_sample.astype(float), requires_grad = False, dtype=torch.float32) # For GPU, add: , device=torch.device('cuda:0'))
        # At this point we have 2 outputs for decoder: the target xyz_target, and the current value xyz_upstream
        # The following lines compute a loss and backpropagate
        # compute loss function: Chamfer between current guess (xyz_upstream) and objective (xyz_target)
        loss = chamfer_distance(xyz_upstream, xyz_target)
        print("Loss at iter", e, ":", loss.item(), ", latent norm: ", torch.norm(latent))
        
        
        losses2.append(loss.detach().cpu().numpy())                                  ## Loss value
        lambdas.append(torch.norm(latent_target-latent).detach().cpu().numpy())     ## Distance in the domain
        decoder.eval()
        loss.backward()
        dL_dx_i = xyz_upstream.grad
        
        # use vertices to compute full backward pass
        xyz = torch.tensor(verts.astype(float), requires_grad = True, dtype=torch.float32)#, device=torch.device('cuda:0')) # For GPU,
        latent_inputs = latent.expand(xyz.shape[0], -1)
        inputs = torch.cat([latent_inputs, xyz], 1)#.cuda()      #Add .cuda() if you want to run on GPU
        #first compute normals
        pred_sdf = decoder(inputs)
        loss_normals = torch.sum(pred_sdf)
        loss_normals.backward(retain_graph = True)
        normals = xyz.grad/torch.norm(xyz.grad, 2, 1).unsqueeze(-1)
        
        # now assemble inflow derivative
        latent.grad.detach_()
        latent.grad.zero_()
        dL_ds_i_fast = -torch.matmul(dL_dx_i.unsqueeze(1), normals.unsqueeze(-1)).squeeze(-1)
        loss_backward = torch.sum(dL_ds_i_fast * pred_sdf)
        if e % 20 == 0 and e > 0:
            regl2 = regl2/2
        if l2reg:
            loss_backward += regl2* torch.mean(latent.pow(2))
        # Backpropagate
        loss_backward.backward()

        
       # print("time to backward:", end-start)
        # update latent
        # Explicit gradient is accessible via latent.grad
        
        adjust_learning_rate(lr, optimizer, e, decreased_by, adjust_lr_every)
        optimizer.step()


        # 1. objective function value
        print("loss backward:", loss_backward.item())
        # 2. its derivative function value on current arguments 
        print("\n")
        #print(latent.grad)
        #print("shape of verts_target, faces_target: ", verts_target.shape, faces_target.shape, xyz_target.shape)
        #raise Exception("Stop");
        

In [7]:
class cma_es(adjust_optimizer):
    def __init__(self, dim=2):
        self.dim = dim
        paras = {'x0': torch.zeros((dim,)),
                 'std': torch.ones((dim,)) * 3, 
                 'tol': 1e-5, 
                 'adjust_func': do_nothing(), 
                 'record': False, 
                 'verbose': False}
        self.set_parameters(paras)
    def set_parameters(self, paras):
        self.paras = paras
        self.x0 = paras['x0'] 
        self.std = paras['std']
        self.tol = paras['tol']
        self.adjust_func = paras['adjust_func']
        self.max_iter = 400 if 'max_iter' not in paras.keys() else paras['max_iter']
        # set none to use default value 
        self.cluster_size = None if 'cluster_size' not in paras.keys() else paras['cluster_size']
        self.survival_size = None if 'survival_size' not in paras.keys() else paras['survival_size']
        self.record = True if 'record' not in paras.keys() else paras['record']
        self.verbose = True if 'verbose' not in paras.keys() else paras['verbose']
    def optimise(self, obj):
        '''
        @param obj: objective function class instance
        return arg: found minimum arguments
               val: found minimum value
               stats: collection of recorded statistics for post-analysis
        '''                  
        def update_mean(x):
            return (weights @ x).reshape(dim, 1)
        def update_ps(ps, sigma, C, mean, mean_old):
            return (1 - cs) * ps + torch.sqrt(cs * (2 - cs) * mueff) * invsqrtC @ (mean - mean_old) / sigma 
        def update_pc(pc, sigma, ps, mean, mean_old):
            hsig = (torch.norm(ps) / torch.sqrt(1 - (1 - cs)**(2 * iter_/lambda_)) / chiN < 1.4 + 2/(dim + 1)).int()
            return (1 - cc) * pc + hsig * torch.sqrt(cc * (2 - cc) * mueff) * (mean - mean_old) / sigma
        def update_C(C, pc, x, mean_old, sigma):
            hsig = (torch.norm(ps) / torch.sqrt(1 - (1 - cs)**(2 * iter_/lambda_)) / chiN < (1.4 + 2/(dim + 1))).int()
            artmp = (1 / sigma) * (x - mean_old.reshape(1, dim))
            return (1 - c1 - cmu) * C + c1 * (pc * pc.T + (1 - hsig) * cc * (2 - cc) * C) + cmu * artmp.T @ torch.diag(weights) @ artmp
        def update_sigma(sigma, ps):
            return sigma * torch.exp((cs / damps) * (torch.norm(ps)/ chiN - 1))
        def is_not_moving(arg, val, pre_arg, pre_val, tol):
            dis_arg = torch.norm(arg - pre_arg, dim=1).mean()
            dis_val = torch.abs(val - pre_val).mean()
            return (dis_arg < tol and dis_val < tol) 

        if self.verbose:
            print("\n\n*******starting optimisation from intitial mean: ", self.x0.squeeze().detach().numpy())
        # User defined input parameters 
        dim = self.dim
        sigma = 0.3
        D = self.std / sigma
        mean = self.x0.reshape(dim, 1)
        # the size of solutions group
        lambda_ = 4 + int(3 * np.log(dim)) if self.cluster_size == None else self.cluster_size  
        # only best "mu" solutions are used to generate iterations
        mu = int(lambda_ / 2) if self.survival_size == None else self.survival_size
        # used to combine best "mu" solutions                                               
        weights = np.log(mu + 1/2) - torch.log(torch.arange(mu, dtype=torch.float) + 1) 
        weights = (weights / torch.sum(weights)).float()    
        mueff = 1 / torch.sum(weights**2) 

        # Strategy parameter setting: Adaptation
        # time constant for cumulation for C
        cc = (4 + mueff / dim) / (dim + 4 + 2 * mueff / dim)  
        # t-const for cumulation for sigma control
        cs = (mueff + 2) / (dim + mueff + 5)  
        # learning rate for rank-one update of C
        c1 = 2 / ((dim + 1.3)**2 + mueff)    
        # and for rank-mu update
        cmu = min(1 - c1, 2 * (mueff - 2 + 1 / mueff) / ((dim + 2)**2 + mueff))  
        # damping for sigma, usually close to 1  
        damps = 1 + 2 * max(0, np.sqrt((mueff - 1)/( dim + 1)) - 1) + cs                                                                 

        # Initialize dynamic (internal) strategy parameters and constants
        # evolution paths for C and sigma
        pc = torch.zeros((dim, 1))     
        ps = torch.zeros((dim, 1)) 
        # B defines the coordinate system
        B = torch.eye(int(dim))       
        # covariance matrix C
        C = B * torch.diag(D**2) * B.T 
        # C^-1/2 
        invsqrtC = B * torch.diag(D**-1) * B.T   
        # expectation of ||N(0,I)|| == norm(randn(N,1)) 
        chiN = dim**0.5 * (1 - 1/(4 * dim) + 1 / (21 * dim**2))  

        # --------------------  Initialization --------------------------------  
        x, x_old, f = torch.zeros((lambda_, dim)), torch.zeros((lambda_, dim)), torch.zeros((lambda_,))
        stats = {}
        inner_stats = {}
        stats['inner'] = []
        stats['val'], stats['arg'] = [], []
        stats['x_adjust'] = []
        iter_eval, stats['evals_per_iter'] = torch.zeros((lambda_, )), []
        inner_stats = [{}] * lambda_
        stats['mean'], stats['std'] = [], []
        stats['status'] = None
        iter_, eval_ = 0, 0
        # initial data in record
        for i in range(lambda_):
            x[i,:] = (mean + 0.1 * torch.randn(dim, 1)).squeeze()
            #f[i] = obj.func(x[i])
            f[i] = torch.tensor([10])
        idx = torch.argsort(f.detach())
        x_ascending = x[idx]
        if self.record:
            stats['inner'].append(inner_stats.detach().numpy())
            stats['arg'].append(x_ascending.detach().numpy())
            stats['val'].append(f[idx].detach().numpy())
            stats['mean'].append(mean.detach().numpy())
            stats['std'].append(sigma * B @ torch.diag(D))
            stats['evals_per_iter'].append(torch.ones((lambda_,)).detach().numpy())
            stats['x_adjust'].append(np.vstack((x.T.clone().detach().numpy(), x.T.clone().detach().numpy())))
        arg = x_ascending
        val = f[idx]
        pre_arg = x_ascending
        pre_val = f[idx]
        best_val = 1e4
        best_arg = None
        
        # optimise by iterations
        try:
            while iter_ < self.max_iter:
                iter_ += 1
                # generate candidate solutions with some stochastic elements
                for i in range(lambda_):
                    x[i] = (mean + sigma * B @ torch.diag(D) @ torch.randn(dim, 1)).squeeze()
                    x_old[i] = x[i]
                    
                    x[i], f[i], inner_stats[i] = self.adjust_func.adjust(x[i].clone().detach().requires_grad_(True), obj)
                    eval_ += inner_stats[i]['evals']
                    iter_eval[i] = inner_stats[i]['evals']
                # sort the value and positions of solutions 
                idx = torch.argsort(f.detach())
                x_ascending = x[idx]

                # update the parameter for next iteration
                mean_old = mean
                mean = update_mean(x_ascending[:mu])
                ps =   update_ps(ps, sigma, C, mean, mean_old)
                pc =   update_pc(pc, sigma, ps, mean, mean_old)
                sigma = update_sigma(sigma, ps)
                C =    update_C(C, pc, x_ascending[:mu], mean_old, sigma)
                C = torch.triu(C) + torch.triu(C, 1).T
                D, B = torch.eig(C, eigenvectors=True)
                D = torch.sqrt(D[:,0])
                invsqrtC = B @ torch.diag(D**-1) @ B
                arg = x_ascending
                val = f[idx]
                if self.verbose:
                    print("iter: ", iter_)
                    print("loss: ", val[0].detach().numpy())
                    print("latent: ", x_ascending[0].detach().numpy())
                    print("\n")
                # record data during process for post analysis
                if self.record:
                    stats['inner'].append(inner_stats.clone().detach().numpy())
                    stats['arg'].append(x_ascending.detach().numpy())
                    stats['val'].append(f[idx].detach().numpy())
                    stats['mean'].append(mean.detach().numpy())
                    stats['std'].append((sigma * B @ np.diag(D)).detach().numpy())
                    stats['evals_per_iter'].append(iter_eval.clone().detach().numpy())
                    stats['x_adjust'].append(np.vstack((x_old.T.clone().detach().numpy(), x.T.clone().detach().numpy())))
                # stopping condition    
                if best_val > val[0]:
                    best_val = val[0]
                    best_arg = arg[0]              
                # check the stop condition
                if torch.max(D) > (torch.min(D) * 1e6):
                    stats['status'] = 'diverge'
                    print('diverge, concentrate in low dimension manifold')
                    break
                if is_not_moving(arg, val, pre_arg, pre_val, self.tol) :
                    break
                pre_arg = arg
                pre_val = val
        except np.linalg.LinAlgError as err:
            stats['status'] = 'diverge'
            print('diverge, raise LinAlgError!')
        finally:
            if self.verbose:
                print('eigenvalue of variance = {}'.format(D))
                print('total iterations = {}, total evaluatios = {}'.format(iter_, eval_))
                print('found minimum position = {}, found minimum = {}'.format(best_arg.detach().numpy(), best_val.detach().numpy()))

        # carry statistics info before quit
        if self.record:
            stats['arg'] = np.array(stats['arg'])
            stats['val'] = np.array(stats['val'])
            stats['mean'] = np.array(stats['mean'])
            stats['std'] = np.array(stats['std'])
            stats['evals_per_iter'] = np.array(stats['evals_per_iter'])
            stats['x_adjust'] = np.array(stats['x_adjust'])
        stats['evals'] = eval_
        return best_arg, best_val, stats

In [69]:
class adam(adjust_optimizer):
    def __init__(self, alpha=0.01, verbose=False, dim=2):
        self.alpha = 0.01
        self.beta_1 = 0.9
        self.beta_2 = 0.999
        self.epsilon = 1e-11
        self.max_iter = 10000
        self.tol = 1e-3
        self.verbose = verbose
        self.record = False
        self.x0 = torch.zeros((dim,))
        
    def set_parameters(self, paras):
        self.paras = paras
        self.x0 = paras['x0']
        self.alpha = paras['alpha']
        self.beta_1 = paras['beta_1']
        self.beta_2 = paras['beta_2']
        self.epsilon = paras['epsilon']
        self.max_iter = paras['max_iter']
        self.tol = paras['tol']
        self.verbose = True if 'verbose' not in paras.keys() else paras['verbose']
        self.record = False if 'record' not in paras.keys() else paras['record']
        
    def optimise(self, obj):
        m_t = 0 
        v_t = 0 
        eval_cnt = 0
        x = self.x0
        stats = {}
        stats['status'] = None
        stats['gradient_before_after'] = []
        stats['arg'] = []
        stats['val'] = []
        if self.record:
            stats['arg'].append(x.clone().detach().numpy())
            stats['val'].append(obj.func(x).detach().numpy())
            stats['gradient_before_after'].append([obj.dfunc(x).detach().numpy(), obj.dfunc(x).detach().numpy()])
        if self.verbose:
            print("\n\n*******starting optimisation from intitial point: ", self.x0.squeeze().detach().numpy())
        while eval_cnt < self.max_iter:					#till it gets converged
            eval_cnt += 1
            x = x.clone().detach().requires_grad_(True)
            loss = obj.func(x)
            g_t = obj.dfunc(x)		#computes the gradient of the stochastic function
            m_t = self.beta_1*m_t + (1-self.beta_1)*g_t	#updates the moving averages of the gradient
            v_t = self.beta_2*v_t + (1-self.beta_2)*(g_t*g_t)	#updates the moving averages of the squared gradient
            m_cap = m_t/(1-(self.beta_1**eval_cnt))		#calculates the bias-corrected estimates
            v_cap = v_t/(1-(self.beta_2**eval_cnt))		#calculates the bias-corrected estimates
            x_prev = x.clone()								
            est_df = (m_cap)/(torch.sqrt(v_cap)+self.epsilon)
            with torch.no_grad():
                x -= self.alpha * est_df 	#updates the parameters
            if self.verbose:
                print("iter: ", eval_cnt)
                print("loss: ", loss.detach().numpy())
                print("gradient: ", g_t.detach().numpy())
                print("latent: ", x.detach().numpy())
                print("\n")
            if self.record:
                stats['arg'].append(x.clone().detach().numpy())
                stats['val'].append(obj.func(x).detach().numpy())
                stats['gradient_before_after'].append([g_t.detach().numpy(), est_df.detach().numpy()])
            if(torch.norm(x-x_prev) < self.tol):		#checks if it is converged or not
                break
        if self.verbose:
            print('total evaluatios = {}'.format(eval_cnt))
            print('gradient at stop position = {},\nmodified graident = {}'.format(g_t, est_df))
            print('found minimum position = {}, found minimum = {}'.format(x.detach().numpy(), obj.func(x).detach().numpy()))
        stats['arg'] = np.array(stats['arg'])
        stats['val'] = np.array(stats['val'])
        stats['gradient_before_after'] = np.array(stats['gradient_before_after'])
        stats['evals'] = eval_cnt
        return x, obj.func(x), stats
    
class line_search(adjust_optimizer):
    def __init__(self, alpha=1, beta=0.1):
        self.alpha = alpha
        self.beta = beta
        self.max_iter = 100
        self.tol = 1e-2
        self.verbose = False
        self.record = False
     
    def set_parameters(self, paras):
        self.paras = paras
        self.x0 = paras['x0']
        self.alpha = paras['alpha']
        self.beta = paras['beta']
        self.max_iter = paras['max_iter']
        self.tol = paras['tol']
        self.verbose = True if 'verbose' not in paras.keys() else paras['verbose']
        self.record = True if 'record' not in paras.keys() else paras['record']
    def optimise(self, obj):
        '''
        @param x0: initial point position
        @param alpha: initial step size
        @param beta: control the armijo condition
        @return x: point position after moving to local minimum
        '''
        x = self.x0
        alpha_ = self.alpha
        tao = 0.5
        fx = obj.func(x)
        p = - obj.dfunc(x)
        fnx = obj.func(x + alpha_ * p)
        eval_cnt = 3
        stats = {}
        stats['status'] = None
        stats['gradient'] = []
        stats['arg'] = []
        stats['val'] = []
        if self.record:
            stats['arg'].append(x.clone().detach().numpy())
            stats['val'].append(fx.detach().numpy())
            stats['gradient'].append(-p.detach().numpy())
        if self.verbose:
            print("\n*******starting optimisation from intitial point: ", self.x0.squeeze().detach().numpy())
        for k in range(self.max_iter):
            while fnx > fx + alpha_ * self.beta * (-p @ p):
                alpha_ *= tao
                fnx = obj.func(x + alpha_ * p)
                eval_cnt += 1
            with torch.no_grad():
                x += alpha_ * p
            fx = fnx
            x = x.clone().detach().requires_grad_(True)
            p = -obj.dfunc(x)
            fnx = obj.func(x + alpha_ * p)
            eval_cnt += 2
            if self.record:
                stats['arg'].append(x.clone().detach().numpy())
                #print(eval_cnt, stats['arg'])
                stats['val'].append(fx.detach().numpy())
                stats['gradient'].append(-p.detach().numpy())
            if torch.norm(p) < self.tol:
                break
        stats['evals'] = eval_cnt
        if self.verbose:
            print('total evaluatios = {}'.format(eval_cnt))
            print('gradient at stop position = {}'.format(-p.detach().numpy()))
            print('found minimum position = {}, found minimum = {}'.format(x.detach().numpy(), fx.detach().numpy()))
        stats['arg'] = np.array(stats['arg'])
        stats['val'] = np.array(stats['val'])
        stats['gradient'] = np.array(stats['gradient'])
        return x, fnx, stats



In [None]:
# init and setup one experiment
exp = single_experiment()
# One experiment: setup objective function
objectiveDe = decoder_obj(latent_target, decoder)
exp.set_objective(objectiveDe)
opt = adam(dim=8)
optParas = {
         'x0': latent,
         'alpha': 0.001,
         'beta_1': 0.9, 
         'beta_2': 0.999, 
         'epsilon': 1e-11, 
         'max_iter': 100,
         'tol': 1e-6,              
         'verbose': True,
         'record': False }
opt.set_parameters(optParas)
exp.set_optimizer(opt)
exp.do()

In [None]:
# init and setup one experiment
exp = single_experiment()
# One experiment: setup objective function
objectiveDe = decoder_obj(latent_target, decoder)
exp.set_objective(objectiveDe)
opt = adam(dim=8)
optParas = {
         'x0': latent,
         'alpha': 0.001,
         'beta_1': 0.9, 
         'beta_2': 0.999, 
         'epsilon': 1e-11, 
         'max_iter': 100,
         'tol': 1e-6,              
         'verbose': True,
         'record': False }
opt.set_parameters(optParas)
exp.set_optimizer(opt)
exp.do()

In [3]:
torch.manual_seed(0)
# 0 Initialization
N_MARCHING_CUBE = 64
lr= 8e-3
l2reg= True
regl2 = 1e-3
decreased_by = 1.5
adjust_lr_every = 50

# 1 prepare data
## sphere
source_id = 999 # zywvjkvz2492e6xpq4hd1jzy2r9lht        # This will be the source shape (ie starting point)
## torus
target_id = 2 # 0bucd9ryckhaqtqvbiagilujeqzek4  
latent, latent_target = getLatentSourceAndTarget(args, source_id, target_id)

# 2 prepare model
decoder = constructDecoder(args)

In [None]:
# init and setup one experiment
exp = single_experiment()
# One experiment: setup objective function
objectiveDe = decoder_obj(latent_target, decoder)
exp.set_objective(objectiveDe)
opt = cma_es(dim=8)
optParas ={'x0': latent,
           'std': torch.ones((8,)) * 0.03, 
           'tol': 1e-3, 
           'adjust_func': do_nothing(), 
           'record': False, 
           'verbose': True}
opt.set_parameters(optParas)
exp.set_optimizer(opt)
exp.do()



*******starting optimisation from intitial mean:  [-0.34123307 -0.47586045 -0.15240912  0.02312643  0.14862083  0.3323207
 -0.16005228 -0.12385195]
iter:  1
loss:  10.0
latent:  [-0.3266223  -0.41677397 -0.12227314 -0.003187    0.17221963  0.277801
 -0.15744506 -0.09102364]


iter:  2
loss:  0.42412472
latent:  [-0.31690824 -0.41054645 -0.10236986 -0.06019158  0.16955937  0.29262227
 -0.17669874 -0.09724864]


iter:  3
loss:  0.40884984
latent:  [-0.30315596 -0.42242423 -0.14098278 -0.05712719  0.13897526  0.28454265
 -0.21493274 -0.06458235]


iter:  4
loss:  0.40071818
latent:  [-0.26251101 -0.4189985  -0.07259955 -0.07264411  0.15222563  0.280191
 -0.22376229 -0.02680844]


iter:  5
loss:  0.36837342
latent:  [-0.3184425  -0.35868132 -0.09833866 -0.1175178   0.10599276  0.2539092
 -0.18219654 -0.03696901]


iter:  6
loss:  0.3465619
latent:  [-0.24295753 -0.3386955  -0.07619581 -0.09698155  0.18401587  0.20837115
 -0.24368806 -0.0746818 ]


iter:  7
loss:  0.32039294
latent:  [-0.

In [12]:
ss = latent

In [14]:
ss

tensor([[-0.3412, -0.4759, -0.1524,  0.0231,  0.1486,  0.3323, -0.1601, -0.1239]],
       requires_grad=True)