# Cython/Python of the Bernoulli Distribution with MultiMin

Here we will program the above cells in Python and call a Monte-Carlo function to yield us the outputs we desire. The Monte-Carlo function will be written in Cython and we will use the %timeit cell magic to give us a run-time of our Cython function. 

Here we are forced to use MultiMin because the behaviour of the Bernoulli log likelihood function (LLF) does not behave in such a way that a minimum can be found using standard univariate minimization routines provided by GSL. Moreover, we are forced to transform the probability to lie within [0,1] given the use of logs to prevent the LLF from exploding. 

### Cython Function

In [1]:
%load_ext cython

In [2]:
%%cython -lgsl -lgslcblas

#!python
#cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True

#Import modules: 

from libc.stdlib cimport rand, RAND_MAX, malloc, realloc, free, abort
from libc.math cimport log, sin

#Use the CythonGSL package to get the low-level routines
from cython_gsl cimport *

######################### Define the Data Structure ############################

cdef struct Parameters:
    #Pointer for Y data array
    double* Y
    #size of the array
    int* Size
    
############################# Global Variables ################################

cdef double bounds[2]

################ Support Functions for Monte-Carlo Function ##################

#Transform value to be within the bounds [0,1]
cdef inline double Transform(double y) nogil:
    return ((bounds[0] + bounds[1])/2) + ((bounds[0] - bounds[1])/2)*sin(y)

#Uniform random number generator between [0,1]
cdef inline double RAND() nogil:
    return rand()/<double>RAND_MAX

#Create a function that allocates the memory and verifies integrity
cdef int alloc_struct(Parameters* data, int* N, unsigned int flag) nogil:
    
    cdef int Mem_Int = True
    
    #fill in the size of the array
    data.Size = N
    
    #allocate the data array initially
    if flag==0:
        data.Y = <double*> malloc(N[0] * sizeof(double))
    #reallocate the data array
    else:
        data.Y = <double*> realloc(data.Y, N[0] * sizeof(double))
    
    #If the elements of the struct are not properly allocated, destory it and return null
    if N[0]!=0 and data.Y==NULL:
        
        #return the memory to system
        destroy_struct(data)
        
        #update the memory integrity variable to False
        Mem_Int = False
    
    return Mem_Int

#Create the destructor of the struct to return memory to system
cdef void destroy_struct(Parameters* data) nogil:
    free(data.Y)
    free(data)

#This function fills in the Y observed variable with discreet 0/1
cdef void Y_fill(Parameters* data, double p_true) nogil:
    
    cdef:
        Py_ssize_t i
        double y
    
    for i in range(data.Size[0]):
        
        y = RAND()
        
        if y <= p_true:
            data.Y[i] = 1.0
        else: data.Y[i] = 0.0

#Definition of the function to be maximized: LLF of Bernoulli
cdef double LLF(gsl_vector* v, void* data) nogil:
    
    cdef:
        #the sample structure (considered the parameter here), recast
        Parameters* sample = <Parameters*> data
        
        #the loop variables
        Py_ssize_t i
        int n = sample.Size[0]
        
        #other variables
        double Sum = 0, p = gsl_vector_get(v, 0), p_tran
    
    #Transform variable to bound the value between (0,1)
    p_tran = Transform(p)
    
    for i in range(n):
        
        if sample.Y[i]==1.0: Sum += log(p_tran)
        else: Sum += log(1-p_tran)
    
    return (-(Sum/n))

########################## Monte-Carlo Function ##############################

cpdef void Monte_Carlo(int[::1] Samples, double[:,::1] p_hat, 
                       int Sims, double p_true) nogil:
     
    #Define variables and pointers
    global bounds
    
    bounds[0] = 0; bounds[1] = 1
    
    cdef:
        #Data Structure
        Parameters* Data
            
        #iterators
        Py_ssize_t i, j, k
        int status, Iter, max_Iter = 1000 
        
        #Variables
        int N = Samples.shape[0], Mem_Int
        double tol = 1e-6, size
        
        #GSL objects and pointer
        const gsl_multimin_fminimizer_type* T
        gsl_multimin_fminimizer* s
        gsl_multimin_function F
        gsl_vector *ss
        gsl_vector *x
    
    #Allocate the minimization routine (multimin), use simplex alogrithm
    T = gsl_multimin_fminimizer_nmsimplex
    s = gsl_multimin_fminimizer_alloc(T,1)
    
    #Allocate the Step Size and variable vector
    ss = gsl_vector_alloc(1)
    x = gsl_vector_alloc(1)
    
    #allocate the struct dynamically
    Data = <Parameters*> malloc(sizeof(Parameters))
    
    #verify memory integrity of allocated variables
    if Data==NULL or s==NULL or ss==NULL or x==NULL: abort()
        
    #Set the GSL function
    F.n = 1
    F.f = &LLF
    F.params = <void*> Data
    
    #Allocate the Step Size vector
    gsl_vector_set(ss,0,0.5)
    
    try:
        
        for i in range(N): 

            #allocate the elements of the struct (if i>0, reallocate data array)
            Mem_Int = alloc_struct(Data, &Samples[i], i)

            #verify memory integrity
            if Mem_Int==False: abort() 

            for j in range(Sims):

                #fill the array in the struct
                Y_fill(Data, p_true)
                
                #set the starting value (random number)
                gsl_vector_set(x, 0, Transform(RAND()))
               
                #set the minimizer
                gsl_multimin_fminimizer_set(s, &F, x, ss)

                #initialize conditions
                Iter = 0
                status = GSL_CONTINUE

                while (status == GSL_CONTINUE and Iter <= max_Iter):
                    
                    Iter += 1
                    status = gsl_multimin_fminimizer_iterate(s)

                    if status:
                        break

                    size = gsl_multimin_fminimizer_size(s)
                    status = gsl_multimin_test_size(size, tol)

                    if status==GSL_SUCCESS:
                        p_hat[i,j] = gsl_vector_get(s.x,0)

    finally:
        destroy_struct(Data)
        gsl_multimin_fminimizer_free(s)
        gsl_vector_free(x)
        gsl_vector_free(ss)

### Python Portion and Cython Function Call

In [3]:
import numpy as np

In [4]:
#First we will recreate the first cell in the matlab example in python

#Sample Sizes
N = np.array([5,50,500,5000], dtype='i')

#Parameters for MC
T = 1000
p_true = 0.2

#Array of the outputs from the MC
p_hat = np.empty((N.size,T), dtype='d')
p_hat.fill(np.nan)

In [5]:
%timeit Monte_Carlo(N, p_hat, T, p_true)

1 loop, best of 3: 1.96 s per loop


In [6]:
p_hat.mean(axis=1)

array([ 0.80281815,  0.64786481,  0.64455267,  0.64314473])

# Comparison with Matlab routine

We will implement a similar estimation using fmincon in Matlab, but will not transform the variable to lie within the bounds of [0,1] as we have done above so as to take advantage of fmincon's ability to do this directly. The only difference we will make in the matlab code is that we will allow it to be parallelized. From previous experince, serially running the inner Monte-Carlo loop leads to an unacceptably long runtime. So the only difference (besides also vectorizing Matlab to also take advantage of its vectorization capabilities) is the incorporation of parfor loops. 

In [1]:
%load_ext pymatbridge



Starting MATLAB on ZMQ socket ipc:///tmp/pymatbridge-83831221-0cb1-44b2-8ebc-adecccce5fa7
Send 'exit' command to kill the server
.......MATLAB started and connected!


In [2]:
%%matlab
clear all; clc; parpool

Starting parallel pool (parpool) using the 'local' profile ... connected to 4 workers.

ans = 

 Pool with properties: 

            Connected: true
           NumWorkers: 4
              Cluster: local
        AttachedFiles: {}
          IdleTimeout: 30 minute(s) (30 minutes remaining)
          SpmdEnabled: true



In [4]:
%%matlab

N=[5;50;500;5000];

%population probability
p_true = 0.2;

%Simulation run
T=1000;

%Initialize the vectors for the output of the Monte Carlo
%We will initialize a vector of 1000x4, since we have 4
%sample sizes, that is the size of the N vector we created
%is 4x1.
p_hat = NaN(T,size(N,1));

%A starting value of guess for the p_hat that we will find
p_0 = rand;

%Our options for the maximization routine:

options_2 = optimoptions('fmincon','Algorithm','interior-point',...
                         'Display','off','UseParallel',true);

In [5]:
%%matlab

tic
for i=1:size(N,1)

    sample_size = N(i);
    
    %run inner loop = MC loop - to make this go fast 
    %we will need to parallelize this otherwise it will
    %take forever
    
    parfor j=1:T
    
        %First we generate a random sample of points
        %between 0 and 1. We use the rand function 
        %to do this
        
        Y_raw = rand(sample_size,1);
        
        %Now we need to create the Y vector such that it
        %takes the value 1 if the probability is less than
        %or equal to 0.2, and 0 otherwise.
        
        Y = (Y_raw <= p_true);
        
        %Now we can run the fminunc routine on the sample Y 
        %and find the p that maximizes the likelihood of 
        %observing our sample vector Y:
        
        p_hat(j,i) = fmincon(@(p) Bernoulli(p,sample_size,Y),...
                             p_0,[],[],[],[],0,1,[],options_2)
    
    end

end
toc

Elapsed time is 21.216358 seconds.


In [6]:
%%matlab
p_mean = mean(p_hat)


p_mean =

    0.1954    0.1993    0.2000    0.2001



# Results

Comparing with Matlab we see a **10** times decrease in speed, or an order of magnitude difference. The only left to do is transform the results back in the Python code. 