In [1]:
import numpy as np
from tqdm import tqdm
import time


In [2]:
def meanLogProb(xa, xb, sigma):
    '''
    Mean of the Log-Probability
    '''
    m = xb.shape[0]
    sigma_m = 0.0
    for i in np.arange(m):
        sigma_m += probDensity(xa, xb[i,:], sigma)
    return sigma_m/m

def probDensity(xa, xb, sigma):
    '''
    Probability Density Function
    '''
    k, d = xa.shape
    sigma_d = np.sum(-(xb-xa)**2, axis=1)
    log_p = np.log(1/k) + logsumexp(sigma_d / (2*sigma**2) - d/2 * np.log(2*np.pi*sigma**2))
    return log_p

def logsumexp(arr):
    '''
    Log Sum Exp
    '''
    a_max = np.amax(arr)
    s= np.sum(np.exp(arr - a_max))
    return a_max + np.log(s)

def shuffleData(dataset, n=10000):
    '''
    Shuffle Data
    '''    
    numData = dataset.shape[0]
    index = np.random.permutation(numData)
    train_x = dataset[index][0:n, :]
    valid_x = dataset[index][n:2*n, :]
    return train_x, valid_x

In [4]:
import pickle 
# load MNIST data
file = 'mnist.pkl'
with open(file, 'rb') as fo:
    try:
        train, validate, test = pickle.load(fo, encoding='latin1')
    except:
        print("error.")
        pass

In [47]:
# load CIFAR100 data
files = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5']
train = np.array([]).reshape(0, 3072)
for file in files:
    with open(file, 'rb') as fo:
        try:
            dict = pickle.load(fo, encoding='bytes')
            train = np.vstack((train, dict[b'data']))
        except:
            print("error.")
            pass


In [None]:
def visualize(data, size=10, channel=1):
    from PIL import Image
    from matplotlib import plot
    imsize = np.sqrt(data.shape[1] / channel)
    for i in np.arange(400):
        im = Image.fromarray(data[i,:].reshape[imsize, imsize])
        

In [8]:
# main

train_x, valid_x = shuffleData(train[0])
start = time.time()
result = meanLogProb(train_x, valid_x, .05)
end = time.time()
print(result)
print('time=',end-start)