In [6]:
# Notebook for smc sampler 
from __future__ import print_function
from __future__ import division

import numpy as np
from scipy.stats import multivariate_normal
from scipy.stats import norm
from scipy.special import gamma

import sys
import os

from smc_sampler_functions.functions_smc_help import sequence_distributions


# define the parameters
dim_list = [2, 5, 10, 20, 50, 100, 200, 300]
try:
    dim = dim_list[int(sys.argv[1])-1]
except:
    dim = 5
N_particles = 2**4
T_time = 2000
move_steps_hmc = 1
move_steps_rw_mala = 50
ESStarget = 0.95
M_num_repetions = 1
epsilon = .005
epsilon_hmc = .1
#rs = np.random.seed(1)
targetmean = np.ones(dim)*2
targetvariance = np.eye(dim)*0.1
targetvariance_inv = np.linalg.inv(targetvariance)
l_targetvariance_inv = np.linalg.cholesky(targetvariance_inv)
parameters = {'dim' : dim, 
              'N_particles' : N_particles, 
              'targetmean': targetmean, 
              'targetvariance':targetvariance,
              'targetvariance_inv':targetvariance_inv,
              'l_targetvariance_inv':l_targetvariance_inv,
              'df' : 5,
              'T_time' : T_time,
              'autotempering' : True,
              'ESStarget': ESStarget,
              'adaptive_covariance' : True
             }



# define the target distributions
#from smc_sampler_functions.cython.cython_target_distributions import priorlogdens, priorgradlogdens
from smc_sampler_functions.target_distributions import priorlogdens, priorgradlogdens
from smc_sampler_functions.target_distributions import targetlogdens_logistic, targetgradlogdens_logistic, f_dict_logistic_regression
from smc_sampler_functions.target_distributions import targetlogdens_student, targetgradlogdens_student
parameters_logistic = f_dict_logistic_regression(dim)
from smc_sampler_functions.target_distributions import targetlogdens_logistic_help_safe

In [None]:
def targetlogdens_logistic_help(particles, X, y):
    """
    likelihood of the logistic regression
    """
    dot_product = np.dot(X, particles.transpose())
    #sigmoid_value = logplus_one(dot_product)
    sigmoid_value = np.log(1+np.exp(-dot_product))
    likelihood_value = (-y*sigmoid_value + (1-y)*(dot_product+sigmoid_value)).sum(axis=0)
    return likelihood_value-np.linalg.norm(particles)**2


In [8]:
from scipy.optimize import minimize
from functools import partial
partial_target_max = partial(targetlogdens_logistic, parameters=parameters_logistic) 
def partial_target(x):
    return(partial_target_max(x)*-1)
x0 = np.ones((1,dim))*0
print(partial_target_max(x0))
targetlogdens_logistic_help_safe(x0, parameters_logistic['X_all'], parameters_logistic['y_all'])
res = minimize(partial_target_max, x0, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})
print(res.x)


[-69.31471806]
[-232.89232507  161.99845377 -143.44916357   76.12906628  -61.46971154]


In [4]:
def approx_gradient(function, x, h=0.00000001):
    dim = x.shape[1]
    grad_vector = np.zeros(x.shape)
    for i in range(dim):
        x_1 = np.copy(x)
        x_2 = np.copy(x)
        x_1[:,i] = x[:,i]+h
        x_2[:,i] = x[:,i]-h
        grad_vector[:,i] = (function(x_1)-function(x_2))/(2*h)
    return(grad_vector)
print(approx_gradient(partial_target_max, x0))
print(targetgradlogdens_student(x0, parameters))
print(approx_gradient(partial_target_max, x0) - targetgradlogdens_student(x0, parameters))

[[ 1.6368805  -6.21344682 -4.29220961 -6.19609466  4.19542765]]
[[ 1.81818182  1.81818182  1.81818182  1.81818182  1.81818182]]
[[-0.18130132 -8.03162864 -6.11039143 -8.01427648  2.37724583]]


In [18]:
particles = np.zeros(parameters['dim'])
targetlogdens_logistic(particles, parameters_logistic)
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(fit_intercept =  False)
log_reg.fit(parameters_logistic['X_all'], parameters_logistic['y_all'])
log_reg.get_params()
log_reg.coef_

  y = column_or_1d(y, warn=True)


array([[ 0.89143054,  0.61061055,  0.68289088,  0.55914027,  0.97590728]])