In [1]:
# Notebook for smc sampler 
from __future__ import print_function
from __future__ import division

import numpy as np
from scipy.stats import multivariate_normal
from scipy.stats import norm
from scipy.special import gamma

import sys
import os

sys.path.append("/home/alex/Dropbox/smc_hmc/python_smchmc/")

from smc_sampler_functions.functions_smc_help import sequence_distributions


# define the parameters
dim_list = [2, 5, 10, 20, 50, 100, 200, 300]
try:
    dim = dim_list[int(sys.argv[1])-1]
except:
    dim = 5
N_particles = 2**4
T_time = 2000
move_steps_hmc = 1
move_steps_rw_mala = 50
ESStarget = 0.95
M_num_repetions = 1
epsilon = .005
epsilon_hmc = .1
#rs = np.random.seed(1)
targetmean = np.ones(dim)*2
targetvariance = np.eye(dim)*0.1
targetvariance_inv = np.linalg.inv(targetvariance)
l_targetvariance_inv = np.linalg.cholesky(targetvariance_inv)
parameters = {'dim' : dim, 
              'N_particles' : N_particles, 
              'targetmean': targetmean, 
              'targetvariance':targetvariance,
              'targetvariance_inv':targetvariance_inv,
              'l_targetvariance_inv':l_targetvariance_inv,
              'df' : 5,
              'T_time' : T_time,
              'autotempering' : True,
              'ESStarget': ESStarget,
              'adaptive_covariance' : True
             }



# define the target distributions
#from smc_sampler_functions.cython.cython_target_distributions import priorlogdens, priorgradlogdens
from smc_sampler_functions.target_distributions import priorlogdens, priorgradlogdens
from smc_sampler_functions.target_distributions import targetlogdens_logistic, targetgradlogdens_logistic, f_dict_logistic_regression
from smc_sampler_functions.target_distributions import targetlogdens_student, targetgradlogdens_student
parameters_logistic = f_dict_logistic_regression(dim)
from smc_sampler_functions.target_distributions import targetlogdens_logistic_help_safe
parameters_logistic = f_dict_logistic_regression(dim)
#import ipdb; ipdb.set_trace()
parameters.update(parameters_logistic)


The minimum supported version is 2.4.6



In [2]:
from scipy.optimize import minimize
from functools import partial
partial_target_max = partial(targetlogdens_logistic, parameters=parameters_logistic) 
def partial_target(x):
    return(partial_target_max(x)*-1)
x0 = np.ones((1,dim))*0
print(partial_target_max(x0))
targetlogdens_logistic(x0, parameters_logistic)
res = minimize(partial_target, x0, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})
print(res.x)


[-69.31471806]
[ 0.89144232  0.6106106   0.68289645  0.5591443   0.97590696]


In [33]:
x = np.zeros((1,dim))
N_particles = 1000
particles = np.random.normal(size=(N_particles, dim))
from scipy.stats import norm
def targetgradlogdens_probit(particles, parameters):
    """
    the gradient of the logdensity of a probit model
    """
    particles = np.atleast_2d(particles)
    y = parameters['y_all']
    X = parameters['X_all']
    factor_yx = (y*X)[:,:,np.newaxis]
    factordensity = norm.pdf(X.dot(particles.transpose()))[:,np.newaxis,:]
    factorProb = np.clip(norm.cdf(X.dot(particles.transpose()))[:,np.newaxis,:], 4e-16, 1-4e-16)
    numerator =  factor_yx*factordensity - X[:,:,np.newaxis]*factordensity*factorProb
    denominator = (1-factorProb)*factorProb
    gradient_pi_0 = -particles
    return (numerator/denominator).sum(axis=0).transpose()+gradient_pi_0


def targetlogdens_probit(particles, parameters):
    """
    the gradient of the logdensity of a probit model
    """
    particles = np.atleast_2d(particles)
    y = parameters['y_all']
    X = parameters['X_all']

    factorProb = norm.cdf(X.dot(particles.transpose()))
    part1 = y*np.log(np.clip(factorProb, 4e-16, 1-4e-16))
    part2 = (1-y)*np.log(1-np.clip(factorProb, 4e-16, 1-4e-16))
    res = (part1+part2).sum(axis=0)-0.5*np.linalg.norm(particles, axis=1)**2
    return res



partial_target_max = partial(targetlogdens_probit, parameters=parameters_logistic) 
def partial_target(x):
    return(partial_target_max(x)*-1)
particles = np.ones((1,dim))*0
print(x0)
print(partial_target_max(x0))
print(targetlogdens_probit(x0, parameters_logistic))
res = minimize(partial_target, x0, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})
print(res.x)
print(targetgradlogdens_probit(res.x, parameters))
x0prob = np.ones((1,dim))*0
x0log = np.ones((1,dim))*0
for t in range(100):
    x0prob = x0prob +0.01*targetgradlogdens_probit(x0prob, parameters)
    x0log = x0log +0.01*targetgradlogdens_logistic(x0log, parameters)
print(x0prob, x0log)


[[ 0.5801041   0.41368747  0.44417985  0.38967335  0.65197806]]
[-49.90012959]
[-49.90012959]
Optimization terminated successfully.
         Current function value: 49.900130
         Iterations: 195
         Function evaluations: 311
[ 0.5801041   0.41368747  0.44417984  0.38967335  0.65197805]
[[ -3.20659332e-09   1.19006538e-08   2.97993305e-07   3.26799388e-09
    2.08340128e-07]]
[[ 0.5801041   0.41368747  0.44417985  0.38967335  0.65197806]] [[ 0.89127826  0.61045037  0.68281327  0.55897488  0.97564092]]


In [34]:
particles = np.random.normal(size=(N_particles, dim))
print(np.isfinite(targetgradlogdens_probit(particles, parameters)).sum())
print(np.isfinite(targetlogdens_probit(particles, parameters)).sum())
factorProb = norm.cdf(parameters['X_all'].dot(particles.transpose()))
np.min(np.log(1-factorProb+4e-16))


5000
1000


-35.45506712678484

In [33]:
def approx_gradient(function, x, h=0.00000001):
    dim = x.shape[1]
    grad_vector = np.zeros(x.shape)
    for i in range(dim):
        x_1 = np.copy(x)
        x_2 = np.copy(x)
        x_1[:,i] = x[:,i]+h
        x_2[:,i] = x[:,i]-h
        grad_vector[:,i] = (function(x_1)-function(x_2))/(2*h)
    return(grad_vector)
print(approx_gradient(partial_target_max, x0))
print(targetgradlogdens_probit(x0, parameters))
print(approx_gradient(partial_target_max, x0) - targetgradlogdens_student(x0, parameters))

[[ 22.0321887   14.46110005  24.16042122   9.73651666  21.76487897]]
[[ 22.03218874  14.46109951  24.16042125   9.73651502  21.76487952]]
[[ 21.05657894  13.48549029  23.18481146   8.7609069   20.78926921]]


In [5]:
particles = np.zeros(parameters['dim'])
targetlogdens_logistic(particles, parameters_logistic)
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(fit_intercept =  False)
log_reg.fit(parameters_logistic['X_all'], parameters_logistic['y_all'])
log_reg.get_params()
log_reg.coef_

  y = column_or_1d(y, warn=True)


array([[ 0.89143054,  0.61061055,  0.68289088,  0.55914027,  0.97590728]])