In [None]:
%load_ext autoreload
%autoreload 1

In [1]:
%pylab inline
import gym
import pywren
import pywren.wrenconfig as wc
from gym import wrappers
import numpywren as npyw
import numpywren
from numpywren import matrix, matrix_utils
import scipy
import scipy.sparse
from sklearn.linear_model import SGDClassifier
from sklearn.kernel_approximation import RBFSampler
from sklearn import metrics 
from sklearn.svm import LinearSVC
from importlib import reload
from skimage.feature import blob_dog
from skimage.feature import canny
from scipy.sparse import csr_matrix
from scipy.sparse import hstack
from scipy.sparse import vstack
from scipy.sparse.linalg import spsolve

Populating the interactive namespace from numpy and matplotlib


#  Blob Prost

In [2]:
def canny_blob(state, x_ways, y_ways, with_blobs=False):
    """Run blob detection on canny edges. Optionally compute pairwise distances."""
    edges = canny(state)
    h, w = state.shape[0], state.shape[1]
    blobs = blob_dog(edges, max_sigma=5, threshold=0.2)
    nh, nw = int(np.ceil(h / y_ways)), int(np.ceil(w / x_ways))
    y, x, r = blobs[:, 0] // y_ways, blobs[:, 1] // x_ways, blobs[:, 2] * np.sqrt(2)  # downsize blobs
    y, x = y.astype(int), x.astype(int)
    featurized = np.zeros((nh, nw))
    featurized[y, x] = r  # fill in lower-dimensional representation
    if with_blobs:
        return csr_matrix(featurized.ravel()), np.vstack((y, x)).T
    return csr_matrix(featurized.ravel())

In [95]:
def blob(frame, x_ways=8, y_ways=7, bins_per_color=5, with_prost=True):
    """Blob features for a single sample.

    :param frame: a single frame, hxwx3
    :param x_ways: Divide x dimension of sample by `x_ways`
    :param y_ways: Divide y dimension of sample by `y_ways`
    :param bins_per_color: Split each color into this many bins.
    :param with_prost: Add prost features
    :return: h2xw2x3 where h2=h/y_ways, w2=w/x_ways
    """
    all_features = []
    h, w = frame.shape[1: 3]
    bin_size = int(np.floor(255. / bins_per_color))
    nh, nw = int(np.ceil(h / y_ways)), int(np.ceil(w / x_ways))

    all_blobs = []
    for channel in range(frame.shape[2]):  # each channel for samples
      state = frame[:, :, channel]
      for bin_idx in range(bins_per_color):  # split each channel into multiple bins, evenly
        start, end = bin_idx * bin_size, (bin_idx + 1) * bin_size
        binned_state = np.zeros(state.shape)
        idxs = np.where(np.logical_and((state >= start), (state < end)))
        binned_state[idxs] = state[idxs]

        features, blobs = canny_blob(binned_state, x_ways=x_ways, y_ways=y_ways,  with_blobs=True)  # only look at values in this bin
        color = np.array([[channel * bins_per_color + bin_idx] * blobs.shape[0]]).T
        all_blobs.append(np.hstack((blobs[:, :2], color)))
        all_features.append(features)
    
    if with_prost:
        all_blobs = np.vstack(all_blobs)  # grab blob xs, ys
        all_features.append(prost(all_blobs.astype(int), bins_per_color=bins_per_color))
    return hstack(all_features)

In [96]:
def prost(all_blobs, bins_per_color=3, ww=4, wh=4):
    """Find all pairwise offset distances
    :param all_blobs: blobs in a frame, nx3
    :param ww: window width (TODO: not yet implemented!)
    :param wh: window height
    """
    assert ww % 2 == 0 and wh % 2 == 0, 'ww and wh must be even!'
    halfw, halfh = ww // 2, wh // 2
    features = np.zeros((30,20,bins_per_color * 3,ww,wh,bins_per_color * 3))
    norms = np.linalg.norm(all_blobs, axis=1)[:, np.newaxis]
    D = norms + -2*all_blobs.dot(all_blobs.T) + norms.T
    for i, row in enumerate(D):
        y1, x1, c1 = all_blobs[i]
        for j, elem in enumerate(row):
            y2, x2, c2 = all_blobs[j]
            ry, rx = y2 - y1 + halfh, x2 - x1 + halfw  # ry: [0, wh], rx: [0, ww]
            if ry >= wh or ry < 0 or rx >= ww or rx < 0:
                continue
            features[y1, x2, c1, ry, rx, c2] = elem
    return csr_matrix(features.ravel())

# Evaluation Setup

In [4]:
def agent_eval(agent, max_steps = float('inf'), game='SpaceInvaders-v0', seed=0, D=5, delay=4):
    env = wrappers.Monitor(gym.make(game), "/tmp/atari/{0}".format(seed), force=True)
    env.seed(seed)
    state = blob(env.reset(), bins_per_color=D, with_prost=False)
    total_reward = 0
    i = 0
    done = False
    np.random.seed(seed)
    states = [state] * delay
    while ((not done) and i < max_steps):
        state, reward, done, info = env.step(agent(env, states))
        total_reward += reward
        i += 1
        states.pop(0)
        states.append(blob(state, bins_per_color=D, with_prost=False))
    return state, sum(env.get_episode_rewards())

In [73]:
def agent_eval_with_kernel(agent, max_steps = float('inf'), game='SpaceInvaders-v0', seed=0, D=5, delay=4):
    env = wrappers.Monitor(gym.make(game), "/tmp/atari/{0}".format(seed), force=True)
    env.seed(seed)
    state = blob(env.reset(), bins_per_color=D, with_prost=False)
    kernel = RBFSampler(gamma=1e-3, n_components=8192)
    kernel.fit(state)
    total_reward = 0
    i = 0
    done = False
    np.random.seed(seed)
    states = [state] * delay
    while ((not done) and i < max_steps):
        state, reward, done, info = env.step(agent(env, states))
        total_reward += reward
        i += 1
        states.pop(0)
        states.append(kernel.transform(blob(state, bins_per_color=D, with_prost=False)))
    return state, sum(env.get_episode_rewards())

In [6]:
conf = wc.default()

In [7]:
conf['runtime']['s3_bucket'] = "pictureweb"
conf['runtime']['s3_key'] = "pywren.runtime/pywren_runtime-3.6-rl.tar.gz"
pwex_standalone = pywren.standalone_executor(config=conf, job_max_runtime=600)

# Featurization

In [100]:
X_train_bigm = matrix.BigMatrix("space_invader_states_train", shape=[100*1024, 210, 160, 3], shard_sizes=[1024, 210, 160, 3], dtype="uint8", bucket="vaishaalpywren")
y_train_bigm = matrix.BigMatrix("space_invader_actions_rewards_train", shape=[100*1024, 2],  shard_sizes=[1024, 2], dtype="uint8", bucket="vaishaalpywren")

In [101]:
import time
def blob_batch(X_train_bigm, y_train_bigm, block_idx, *blob_args, **blob_kwargs):
    states = X_train_bigm.get_block(block_idx,0,0,0)
    labels = y_train_bigm.get_block(block_idx,0)
    print("Downloaded now featurizing")
    all_features = []
    for i in range(0, states.shape[0], 10):
        s = time.time()
        all_features.append(blob(states[i], *blob_args, **blob_kwargs))
        e = time.time()
    X = vstack(all_features)
    Y_oh = np.eye(6)[labels[:,0].ravel()[::10]]
    XTX = X.T.dot(X)
    XTY = X.T.dot(Y_oh)
    return XTX, XTY

def blob_batch_with_kernel (X_train_bigm, y_train_bigm, block_idx, seed=0, n_components=500, *blob_args, **blob_kwargs):
    env = wrappers.Monitor(gym.make("SpaceInvaders-v0"), "/tmp/atari/{0}".format(seed), force=True)
    env.seed(seed)
    state = blob(env.reset(), *blob_args, **blob_kwargs)
    kernel = RBFSampler(gamma=1e-3, n_components=n_components)
    kernel.fit(state)
    states = X_train_bigm.get_block(block_idx,0,0,0)
    labels = y_train_bigm.get_block(block_idx,0)
    print("Downloaded now featurizing")
    all_features = []
    for i in range(states.shape[0]):
        s = time.time()
        all_features.append(blob(states[i], *blob_args, **blob_kwargs))
        e = time.time()
    X = kernel.transform(vstack(all_features))
    Y_oh = np.eye(6)[labels[:,0].ravel()[::10]]
    XTX = X.T.dot(X)
    XTY = X.T.dot(Y_oh)
    return XTX, XTY

In [None]:
%time X_features = blob_batch_with_kernel(X_train_bigm, y_train_bigm, 0, bins_per_color=20)

In [None]:
#futures = pwex_standalone.map(la1bda x: blob_batch(X_train_bigm, y_train_bigm, x, bins_per_color=20), range(200))
futures = pwex_standalone.map(lambda x: blob_batch(X_train_bigm, y_train_bigm, x, bins_per_color=5), range(100))

In [None]:
%time pywren.wait(futures)

In [None]:
futures[0].result()

In [81]:
XTX, XTY = None, None
for future in futures:
    try:
        if not XTX:
            XTX, XTY = future.result()
        else:
            xtx, xty = future.result()
            XTX = XTX + xtx
            XTY = XTY + xty
    except:
        continue
print(XTX.shape, XTY.shape)

(36000, 36000) (36000, 6)


In [22]:
I = np.eye(XTX.shape[0])
w = spsolve(XTX + 1e-5*I, XTY)



In [23]:
np.save('w_blob_kernel_%d' % XTX.shape[0], w)

In [17]:
w = np.load('w_blob_kernel_36000.npy')

In [71]:
def ls_agent(env, states):
    if len(states) == 1:
        state = states[0]
    else:
        state = scipy.sparse.hstack(states)
    return np.argmax(state.dot(w))

In [11]:
def random_agent(env, _):
    return np.random.choice(env.action_space.n)

In [None]:
agent_eval_with_kernel(ls_agent, seed=20, D=20, delay=1)

In [64]:
futures = pwex_standalone.map(lambda x: agent_eval_with_kernel(ls_agent, seed=20, D=20, delay=1), range(1))

In [65]:
%time pywren.wait(futures)

CPU times: user 340 ms, sys: 40 ms, total: 380 ms
Wall time: 12.5 s


([<pywren.future.ResponseFuture at 0x7ff3d01829b0>], [])

In [66]:
futures[0].result()

UserWarning: [(1, 36000)]

In [None]:
results = []
for future in futures:
    try:
        results.append(future.result())
    except:
        continue
rewards_ls = [result[1] for result in results]

In [None]:
plt.title('36k Blob Kernel Features')
plt.hist(rewards_ls, bins=100)
plt.show()
plt.close()

In [None]:
print(np.mean(rewards_ls))
print(np.std(rewards_ls))

In [None]:
with open('models/w_blob_kernel_36000_results.txt', 'w') as f:
    for reward in rewards_ls:
        f.write('%d\n' % int(reward))

In [51]:
np.random.seed(3)
env = wrappers.Monitor(gym.make("SpaceInvaders-v0"), "/tmp/atari/3", force=True)
env.seed(3)
state = blob(env.reset(), bins_per_color=1, with_prost=False)
kernel = RBFSampler(gamma=1e-3, n_components=8192)
kernel.fit(state)
k = kernel.transform(np.zeros(state.shape))
print(k.shape)

(1, 8192)


In [None]:
kernel_approx.transform()

In [None]:
reload(blobprost)
imshow(state_0[0])
plt.figure()
X_features = blob_batch(X_train_bigm, 0, bins_per_color=1, with_prost=False)

In [None]:
kernel_approx

In [None]:
futures = pwex_standalone.map(lambda x: blob_batch(X_train_bigm, x, bins_per_color=1), X_train_bigm._block_idxs(0))
%time pywren.wait(futures)
featurized = scipy.sparse.vstack([f.result() for f in futures])

In [None]:
y_train = y_train_bigm.numpy()

In [None]:
%time y_train_enc = np.eye(np.max(y_train[:, 0])+1)[y_train[:, 0]]

In [None]:
clf_agent = SGDClassifier(verbose=3, n_iter=400, n_jobs=10)
clf_agent.fit(featurized, y_train[:, 0])

In [None]:
metrics.accuracy_score(clf_agent.predict(featurized), y_train[:, 0])

In [None]:
def _blob_agent(env, state, *blob_args, **blob_kwargs):
    return clf_agent.predict(blobprost.blob(state, *blob_args, **blob_kwargs))

In [None]:
blob_agent = lambda x,y: _blob_agent(x, y, bins_per_color=1)

In [None]:
futures = pwex_standalone.map(lambda x: agent_eval(blob_agent, seed=int(x)), range(150))

In [None]:
rewards_clf = [f.result()[1] for f in futures]