In [1]:
import numpy as np
import math 
import matplotlib.pyplot as plt
from functools import partial 
import itertools
import random
import copy
import pandas as pd

from numba import jit
import numba

from tqdm import tqdm
import snakeviz
%load_ext snakeviz


# sample posterior of single observation 

In [2]:
N = 10 # chain length
k = 5 # n# of observations
print(f"entire space size: {math.factorial(N)/(math.factorial(N-k)*math.factorial(k))}")


entire space size: 252.0


In [3]:
# single observation posterior : P(W|Yi,X)
## first we pick N arbitrary P(Xi=Yi) for i in N
y_from_x_probs = np.random.rand(N)
not_y_from_x_probs = 1- y_from_x_probs

In [4]:
def __rec_prob_from_w(y_from_x_probs,not_y_from_x_probs,wide_w):
    if len(y_from_x_probs) == 0 : 
        return 1
    
    curr_dim_prob = y_from_x_probs[0] if wide_w[0] == 1 else not_y_from_x_probs[0]
    prob = __rec_prob_from_w(y_from_x_probs[1:],not_y_from_x_probs[1:],wide_w[1:])*curr_dim_prob
    
    return prob

def _rec_msf_creator(y_from_x_probs,not_y_from_x_probs,w) :
    wide_w = np.zeros(len(y_from_x_probs))
    wide_w[w]=1

    prob = __rec_prob_from_w(y_from_x_probs,not_y_from_x_probs,wide_w)
    return prob

@jit
def _msf_creator(y_from_x_probs,not_y_from_x_probs,w) : 
    prob = 1
    i = 0 
    i_w = 0
    while (i < len(y_from_x_probs)) : 
        if i_w > len(y_from_x_probs) :
            raise Exception() 
        
        if i_w >= len(w) : 
            prob = prob*not_y_from_x_probs[i]
        else : 
            if i < w[i_w] : 
                prob = prob*not_y_from_x_probs[i]
            if i == w[i_w] : 
                prob = prob*y_from_x_probs[i]
                i_w += 1
        i+= 1
    return prob

def msf_creator(y_from_x_probs,not_y_from_x_probs,is_rec = False) : 
    if not is_rec : 
        return partial(_msf_creator,y_from_x_probs,not_y_from_x_probs)
    else : 
        return partial(_rec_msf_creator,y_from_x_probs,not_y_from_x_probs)

msf = msf_creator(y_from_x_probs,not_y_from_x_probs,True)

# build sampler

In [5]:
def sample_cond_prob_single_dim(dims_vector,ind_dim_for_sample,msf) : 
    _pre_value = dims_vector[(ind_dim_for_sample - 1)] if ind_dim_for_sample !=0 else -1
    _post_value = dims_vector[(ind_dim_for_sample + 1)] if (ind_dim_for_sample != (k-1)) else N

    possible_options_for_dim = range(_pre_value+1,_post_value)
    
    _dims_vector = copy.copy(dims_vector)
    probs_of_opts = []
    for poss_opt in possible_options_for_dim : 
        _dims_vector[ind_dim_for_sample] = poss_opt
        prob_of_opt = msf(np.array(_dims_vector))
        probs_of_opts.append(prob_of_opt)
    
    probs_of_opts = np.array(probs_of_opts)/sum(probs_of_opts)
    
    return np.random.choice(possible_options_for_dim,p=probs_of_opts)

def sample_msf_using_sim(msf,n_iter) : 
    initial_vector = sorted(random.sample(range(N),k))

    res_samples_per_dim = np.zeros((N,k))
    _curr_dim_vector = copy.copy(initial_vector)
    for _ in range(n_iter):
        for dim in range(k) : 
            _sample = sample_cond_prob_single_dim(_curr_dim_vector,dim,msf)
            _curr_dim_vector[dim] = _sample
            res_samples_per_dim[_sample,dim] += 1

    res_samples_per_dim = res_samples_per_dim/n_iter
    return res_samples_per_dim
    

In [6]:
%%snakeviz

N = 700 # chain length
k = 250 # n# of observations
y_from_x_probs = np.random.rand(N)
not_y_from_x_probs = 1- y_from_x_probs
print(f"entire space size: {math.factorial(N)/(math.factorial(N-k)*math.factorial(k))}")

msf = msf_creator(y_from_x_probs,not_y_from_x_probs,False)
sampled_from_simu = sample_msf_using_sim(msf,10)

entire space size: 4.322191249463774e+196
 
*** Profile stats marshalled to file 'c:\\Temp\\tmpf6y2eqsp'. 
Embedding SnakeViz in this document...
