In [1]:
import numpy as np
import scipy.stats as ss
import matplotlib.pyplot as plt
import os
import sys
sys.path.append(os.path.abspath(os.path.join('..')))
import acm2017_pyabc
from acm2017_pyabc.plots import plot_marginals, plot_particles
from acm2017_pyabc.prior import PriorList
from acm2017_pyabc.utils import flatten_function
%matplotlib notebook

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [2]:
def simulator(mu, sigma, batch_size=1, random_state=None):
    mu, sigma = np.atleast_1d(mu, sigma)
    return ss.norm.rvs(mu[:, None], sigma[:, None], size=(batch_size, 30), random_state=random_state)

def mean(y):
    return np.mean(y, axis=1)

def var(y):
    return np.var(y, axis=1)


# Set the generating parameters that we will try to infer
mean0 = 1
std0 = 3

# Generate some data (using a fixed seed here)
np.random.seed(20170525)
y0 = simulator(mean0, std0)
print(y0)

[[ 3.7990926   1.49411834  0.90999905  2.46088006 -0.10696721  0.80490023
   0.7413415  -5.07258261  0.89397268  3.55462229  0.45888389 -3.31930036
  -0.55378741  3.00865492  1.59394854 -3.37065996  5.03883749 -2.73279084
   6.10128027  5.09388631  1.90079255 -1.7161259   3.86821266  0.4963219
   1.64594033 -2.51620566 -0.83601666  2.68225112  2.75598375 -6.02538356]]


In [3]:
# setup
mu = acm2017_pyabc.Prior('unif', -2,4, name="mu")
sigma = acm2017_pyabc.Prior('unif', 1,4, name="sigma")

rej_samp = acm2017_pyabc.RejectionSampler(
    priors=[mu, sigma], 
    simulator=simulator, 
    summaries=[mean, var],
    observation=y0
)

rej_samp.sample(nr_samples=100, threshold=.5)

Rejection sampler started with threshold: 0.5 and number of samples: 100
Samples:    100 - Threshold: 0.5000 - Iterations:      13000 - Acceptance rate: 0.007692 - Time:     1.65 s


In [4]:
pool_size = 10
nr_groups = 10 

thetas = rej_samp.Thetas
priors = PriorList([mu, sigma])
distances = rej_samp.distances
num_priors = 2


weights = np.ones(len(distances)) / (distances)
weights = weights / sum(weights)
summaries=[mean, var]
distance = lambda x, y: np.linalg.norm(x - y)
stats_x = flatten_function(summaries, y0)

def calculate_fitness(curr_theta, delta, distance):
    error_distribution = ss.distributions.norm(0,delta)
    delta_prior = ss.distributions.expon(20).pdf(delta)
    fitness = priors.pdf(curr_theta)  * error_distribution.pdf(distance) * delta_prior
    return fitness

def sample_non_matching_thetas(pool_size, thetas):     
    if pool_size == 1:
        return 0,0 

    theta_m = np.random.randint(0,pool_size)
    theta_n = np.random.randint(0,pool_size)

    while theta_n == theta_m: 
        theta_m = np.random.randint(0,pool_size) 
        theta_n = np.random.randint(0,pool_size)

    return thetas[theta_m,:], thetas[theta_n,:] 


def crossover(thetas, weights, distances, mode = 'sampling'):
    new_thetas = np.zeros((pool_size, num_priors))
    new_distances = np.zeros((pool_size))
    new_weights = np.zeros((pool_size))
    deltas = np.zeros((pool_size))

    k = 0.9 

    for i in range(pool_size):
        while True:
            #choose params we use for linear combination
            y1 = np.random.uniform(0.5,1)        
            y2 = np.random.uniform(0.5,1)
            b = np.random.uniform(-0.0001,0.0001)

            #In practice, κ is typically set to 1.0 or 0.9, meaning all, or nearly all, parameters are updated to match the proposal vector
            idx_b = np.random.choice(np.arange(pool_size), p=weights)

            theta_t = thetas[i,:]
            theta_b = thetas[idx_b,:] #base_particle
            theta_m, theta_n = sample_non_matching_thetas(pool_size,thetas)

            #find a new theta  as a linear combination in the vector space of thetas within the cluster
            if mode == 'burnin':
                theta_star = theta_t + y1*(theta_m - theta_n) + y2*(theta_b-theta_b) + b
            else:
                 theta_star = theta_t + y1*(theta_m - theta_n) +  b

            #keep some of the old features with probability (1-k)
            reset_probability = np.random.uniform(0,1, size = len(theta_star))

            for j in range(len(theta_star)):
                if reset_probability[j] < (1-k):
                    theta_star[j] = theta_t[j]

            #make sure we found a great theta that works with our prior and then we can simulate and see how well it fits the data
            if priors.pdf(theta_star) > 0:

                Y = simulator(*(np.atleast_1d(theta_star)))  # unpack thetas as single arguments for simulator
                stats_y = flatten_function(summaries, Y)
                d = distance(stats_x, stats_y)
                
                delta = np.random.exponential(20)

                #TODO specify transition_kernel correctly
                proposal_fitness = calculate_fitness(theta_star, delta,d)
                previous_fitness = calculate_fitness(theta_t, delta,distances[i])

                MH_prob = min(1, proposal_fitness / previous_fitness)
                u = np.random.uniform(0,1)
                
                if u < MH_prob:
                    new_thetas[i,:] = theta_star
                    new_distances[i] = d
                    new_weights[i] = proposal_fitness
                else:
                    new_thetas[i,:] = theta_t 
                    new_distances[i] = distances[i - 1]
                    new_weights[i] = previous_fitness
                break


    return new_thetas, new_weights, new_distances
    
def mutate(thetas,weights, distances):

    new_thetas = np.zeros((pool_size, num_priors))
    new_weights = np.zeros(pool_size)
    new_distances = np.zeros(pool_size)
    
    sigma = 2 * np.cov(thetas[:, :].T, aweights=weights)

    for i in range(pool_size):
        while True:
            #TODO figure out if we need this step_size, or we can calculate spme variance based on particles as in SMC?
            theta_star = np.atleast_1d(ss.multivariate_normal(thetas[i,:], sigma, allow_singular=True).rvs())  

            #make sure we found a great theta that works with our prior and then we can simulate and see how well it fits the data
            if priors.pdf(theta_star) > 0:
                Y = simulator(*(np.atleast_1d(theta_star)))  # unpack thetas as single arguments for simulator
                stats_y = flatten_function(summaries, Y)
                d = distance(stats_x, stats_y)

                #TODO only change during burnin 
                delta = np.random.exponential(20)

                proposal_fitness = calculate_fitness(theta_star, delta, d)
                previous_fitness = calculate_fitness(thetas[i,:], delta, distances[i])

                #calculate MH probability
                MH_prob = min(1,proposal_fitness / previous_fitness)

                u = np.random.uniform(0,1)

                if u < MH_prob:
                    new_thetas[i,:] = theta_star
                    new_distances[i] = d
                    #can we even do this?
                    new_weights[i] = proposal_fitness

                else:
                    new_thetas[i,:] = theta_t 
                    new_distances[i] = distances[i - 1]
                    #can we even do this?
                    new_weights[i] = previous_fitness

                break

    return new_thetas, new_weights 


def migrate(thetas, weights):
    #thetas = copy.deepcopy(thetas)
    #weights = copy.deepcopy(weights)
    
    K = np.random.randint(1,nr_groups)
    groups = np.arange(nr_groups)
    np.random.shuffle(groups)
    groups = groups[0:K]
    
    #setup arrays to temporarily store the indices of the particles we want to swap
    weak_particles_idx= [None] * K

    #first choose all the weak_particles by their inverse of their weights and store their index
    for i in range(K):
        curr_group = groups[i]
        group_weights = 1 / (weights[curr_group]+1)
        group_weights = group_weights  / sum(group_weights)
        weak_particles_idx[i] = np.random.choice(np.arange(0,pool_size), p = group_weights)

    #close the cycle by setting the first particles to the last
    previous_theta = thetas[groups[0],weak_particles_idx[0]] 
    previous_weight = thetas[groups[0],weak_particles_idx[0]] 

    thetas[groups[0],weak_particles_idx[0]] = thetas[groups[K-1],weak_particles_idx[K-1]] 
    weights[groups[0],weak_particles_idx[0]] = weights[groups[K-1],weak_particles_idx[K-1]] 

    for i in np.arange(1,K):
        curr_group = groups[i]
        prev_group = groups[i-1]

        #temporarily store our particles before we overwrite them with the previous 
        temp_theta = thetas[curr_group,weak_particles_idx[i]]
        temp_weight =  weights[curr_group,weak_particles_idx[i]]

        thetas[curr_group,weak_particles_idx[i]] = previous_theta
        thetas[curr_group,weak_particles_idx[i]] = previous_weight

        previous_theta = temp_theta 
        previous_weight = temp_weight

    return thetas,weights
    



In [5]:
# import copy

thetas_full= np.zeros((nr_groups, pool_size, num_priors))
weights_full = np.zeros((nr_groups, pool_size))

for i in range(nr_groups):
    thetas_full[i,:,:] = (thetas[i*10:(i+1)*10])
    weights_full[i,:] = (weights[i*10:(i+1)*10])
    print(crossover(thetas[i*10:(i+1)*10],weights[i*10:(i+1)*10] / sum(weights[i*10:(i+1)*10]), distances[i*10:(i+1)*10]))
    pass 

#mutate(thetas[0:10],weights[0:10], distances[0:10])
#thetas_mutated, weights_mutated = migrate(thetas_full,weights_full)

#thetas_mutated - thetas_full

(array([[ 1.14364716,  3.05483024],
       [-0.58950169,  2.84149139],
       [-1.2030605 ,  3.12631889],
       [-0.6394724 ,  3.41472979],
       [ 0.7380061 ,  3.24472368],
       [-0.49237789,  2.83744094],
       [ 1.64193443,  4.11889446],
       [ 0.45824506,  2.13336761],
       [ 0.16117408,  3.73289782],
       [ 1.0009872 ,  2.56080005]]), array([  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         3.79071305e-15,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00]), array([  3.72989326,   2.90323126,   2.51200644,   3.66040362,
         0.77013507,   3.5874267 ,  16.17730411,   3.77582566,
         8.13754997,   1.25454519]))
(array([[ 1.26184324,  2.95274233],
       [ 0.1429487 ,  3.68510441],
       [-0.32780571,  3.32369966],
       [ 1.56953414,  3.79576652],
       [ 0.857486  ,  2.29536424],
       [ 0.46702255,  2.92026564],
       [ 0.82388776,  3.57998356],
       [ 1.11751517,  3.4917

In [6]:
abcde_samp = acm2017_pyabc.ABCDESampler(
    priors=[mu, sigma], 
    simulator=simulator, 
    summaries=[mean, var],
    observation=y0
)

In [7]:
abcde_samp.sample(nr_samples=100, nr_groups = 10, nr_iter = 200)

ABC-Differential-Evolution sampler started with number of samples: 100
initializing pools
starting iteration[ 1 ]


ValueError: could not broadcast input array from shape (2,2) into shape (10)