In [4]:
import numpy as np
import random
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from scipy.stats import multivariate_normal
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import colors, animation
from IPython.display import HTML

In [86]:
def initialize_figure():
    fig, axes = plt.subplots(1,3)
    axes = axes.flatten()
    fig.set_size_inches(18,5)
    for ax in axes:
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    # plt.axis('equal')
    axes[0].set_title("Samples")
    return fig, axes

def get_data(num_dim_pts=20, neg_start=0, neg_end=6, xoffset=6, yoffset=6):
    pos = None; neg = None
    for x in range(num_dim_pts):
        for y in range(num_dim_pts):
            inst = np.array([x,y]).reshape(1,-1)
            if (x >= xoffset+neg_start and x < xoffset+neg_end and
                y >= yoffset+neg_start and y < yoffset+neg_end):
                neg = inst if neg is None else np.vstack((neg, inst))
            else:
                pos = inst if pos is None else np.vstack((pos, inst))
    # Prepend labels
    pos = np.insert(pos, 0, 1, axis=1)
    neg = np.insert(neg, 0, 0, axis=1)
    return np.vstack((pos, neg))

def get_grid_data(data, gp, num_dim_pts=20):
    probs = gp.predict_proba(data)
    grid = np.zeros((num_dim_pts, num_dim_pts))
    for i in range(data.shape[0]):
        point = data[i,:]
        grid[point[0], point[1]] = probs[i,1]
    return grid.T

def plot_scatter_data(data, ax, cname):
    converter = colors.ColorConverter()
    color = converter.to_rgba(colors.cnames[cname])
    ax.scatter(data[:,0], data[:,1], color=color)
        
def plot_prob(probs, ax, title=""):
    ax.set_title(title)
    num_dim_pts = 20
    grid = np.zeros((num_dim_pts, num_dim_pts))
    for point, prob in probs:
        point = point.flatten()
        grid[point[1], point[2]] = prob
    img = ax.imshow(grid, origin="lower")
    return img

def entropy_measure(pos_prob, neg_prob):
    pos_entropy = -(pos_prob * np.log(pos_prob))
    neg_entropy = -(neg_prob * np.log(neg_prob))
    return pos_entropy + neg_entropy

def margin_measure(pos_prob, neg_prob):
    # Only two classes, so this amounts to absval
    return abs(pos_prob - neg_prob)

def get_measures(probs, data, measure_f=entropy_measure):
    measures = []
    for i in range(probs.shape[0]):
        m = measure_f(probs[i][0], probs[i][1])
        measures.append((data[i,:], m))
    return measures

In [93]:
gp = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0), optimizer=None)
    
data = get_data()
full_data = np.copy(data)
pos_data = data[np.where(data[:,0] == 1)]
neg_data = data[np.where(data[:,0] == 0)]
fig, axes = initialize_figure()

# Get two samples of each type to initiate learning
pos_samples = pos_data[np.random.randint(pos_data.shape[0], size=2),:]
neg_samples = neg_data[np.random.randint(neg_data.shape[0], size=2),:]

samples = np.vstack((pos_samples, neg_samples))
labels = [1, 1, 0, 0]

gp.fit(samples[:,1:], labels)

grid_data = get_grid_data(full_data[:,1:], gp)
img_prob = axes[1].imshow(grid_data, cmap="hot", vmin=0.0, vmax=1.0)

max_measures = []
line, = axes[2].plot(max_measures)
axes[2].set_ylim(0,0.7)
axes[2].set_xlim(0,100)
axes[2].set_title("Max Measure")


def iterate(i):
    global samples, data, img_prob
    
    probs = gp.predict_proba(data[:,1:])
    measures = get_measures(probs, data, measure_f=entropy_measure)

    # Max measure
    next_sample = None
    measure = 0.0
    idx = None
    for j in range(len(measures)):
        if measures[j][1] > measure:
            measure = measures[j][1]
            next_sample = measures[j][0]
            idx = j
    max_measures.append(measure)
    
#     # Random
#     idx = np.random.randint(0, data.shape[0])
#     next_sample = data[idx,:]
    
    data = np.delete(data, idx, axis=0)
    
    samples = np.vstack((samples, next_sample))
    labels.append(next_sample[0])
   
    gp.fit(samples[:,1:], labels)

    axes[0].artists = []
    axes[0].set_ylim(-1,20)
    axes[0].set_xlim(-1,20)
    pos_samples = samples[np.where(samples[:,0] == 1)]
    neg_samples = samples[np.where(samples[:,0] == 0)]
    
    plot_scatter_data(pos_samples[:,1:], axes[0], "cornflowerblue")
    plot_scatter_data(neg_samples[:,1:], axes[0], "firebrick")
    
    axes[2].plot(max_measures, lw=5.0, color='g')
    axes[2].set_ylim(0,0.7)
    axes[2].set_xlim(0,400)
    
    grid_data = get_grid_data(full_data[:,1:], gp)
    img_prob = axes[1].imshow(grid_data, origin="lower", cmap="hot", vmin=0.0, vmax=1.0)
    
ani = animation.FuncAnimation(fig, iterate, frames=400)
Writer = animation.writers['ffmpeg']
writer = Writer(fps=5, bitrate=1800)
ani.save('video.mp4', writer=writer)

#HTML(ani.to_jshtml())

ValueError: Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.