# Load data

In [1]:
import numpy as np

In [2]:
base_dir = '/home/vitchyr/git/rllab-rail/railrl/data/replay_buffer'

all_actions = np.loadtxt(base_dir + "/actions.csv", delimiter=',')
all_obs = np.loadtxt(base_dir + "/obs.csv", delimiter=',')
all_rewards = np.loadtxt(base_dir + "/rewards.csv", delimiter=',')
all_terminals = np.loadtxt(base_dir + "/terminals.csv", delimiter=',')

In [3]:
nonzero = [i for i, e in enumerate(all_terminals) if e != 0]
last_full_episode_idx = nonzero[-1] + 1

In [188]:
terminals = all_terminals[:last_full_episode_idx]
obs = all_obs[:last_full_episode_idx]
next_obs = all_obs[1:last_full_episode_idx+1]
actions = all_actions[:last_full_episode_idx]
rewards = all_rewards[:last_full_episode_idx]

In [5]:
positive_idxs = np.array([i for i, reward in enumerate(rewards) if reward == 1.0])
negative_idxs = np.array([i for i, reward in enumerate(rewards) if reward == -1.0])

In [164]:
Xpos = np.hstack((
    actions[positive_idxs],
    obs[positive_idxs],
    next_obs[positive_idxs]
))
num_pos = Xpos.shape[0]
Xneg = np.hstack((
    actions[negative_idxs],
    obs[negative_idxs],
    next_obs[negative_idxs]
))
num_neg = Xneg.shape[0]
raw_X = np.vstack((Xpos, Xneg))
num_total = num_pos + num_neg

[[ 0.02178212  0.03042386  0.94779396  1.91589522  0.48581049  0.48751163
  -0.41613626 -1.9611131   0.54505563  0.28782532 -2.06506801  0.85537577
  -1.47169018  0.27841741 -2.12769747 -0.14251566  1.11679518  1.58600557
   1.1028744   0.12133752 -0.51180929 -1.27574861 -2.10457659  0.          0.
   0.          0.          1.          2.433213    1.27684808  0.78330934
  -0.28774428 -1.61146951  0.08649832  0.67768312 -1.63380432  1.28239703
  -0.50291008 -1.02575374  0.13314483 -0.79683441  1.43321633 -0.06803162
   0.56435943  0.0181534  -1.09016824 -1.55162406  0.51332968  0.          0.
   0.          0.          0.          1.91589522  0.48581049  0.48751163
  -0.41613626 -1.9611131   0.54505563  0.28782532 -2.06506801  0.85537577
  -1.47169018  0.27841741 -2.12769747 -0.14251566  1.11679518  1.58600557
   1.1028744   0.12133752 -0.51180929 -1.27574861 -2.10457659]]
[ 1.]


In [165]:
print(raw_y.shape)
print(raw_X.shape)

(1428,)
(1428, 73)


In [166]:
X, y = shuffle(raw_X, raw_y, random_state=0)
y.shape

(1428,)

# TensorFlow model to train

# Shuffle and build data set

In [184]:
raw_y = np.hstack((np.ones(num_pos), np.zeros(num_neg)))

In [152]:
from sklearn.utils import shuffle
from sklearn.model_selection import ShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score

In [None]:
scores = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
print(scores)
print(scores.mean())

In [12]:
rs = ShuffleSplit(n_splits=3, test_size=.25, random_state=0)
for train_index, test_index in rs.split(raw_X):
    X = raw_X[train_index]
    y = raw_y[train_index]
    model = LogisticRegression()
    model = model.fit(X, y)
    print(model.score(X, y))

0.762838468721
0.771241830065
0.77964519141


# PyTorch

In [167]:
from itertools import count

import torch
import torch.autograd
import torch.nn.functional as F
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim

In [192]:
pos = np.zeros((num_pos, 2))
pos[:, 0] = 1
neg = np.zeros((num_neg, 2))
neg[:, 1] = 1
raw_y_onehot = np.vstack((pos, neg))

torch_X, torch_y = shuffle(raw_X, raw_y_onehot, random_state=0)
torch_X = torch.from_numpy(torch_X).float()
torch_y = torch.from_numpy(torch_y).float()

Xv = Variable(torch_X)
Yv = Variable(torch_y)

## Three-way one-hot vector for reward of +1, 0, or -1

In [244]:
def to_onehot_n(inds, dim):
    ret = np.zeros((len(inds), dim))
    ret[np.arange(len(inds)), inds] = 1
    return ret
all_y_onehot = to_onehot_n((rewards+1).astype(int), 3)

In [231]:
all_X = np.hstack((
    actions,
    obs,
    next_obs
))
all_X_torch = torch.from_numpy(all_X).float()
all_y_torch = torch.from_numpy(rewards + 1).long()
all_Xv = Variable(all_X_torch)
all_Yv = Variable(all_y_torch)

In [225]:
class Net(nn.Module):

    def __init__(self, feature_dim, num_classes):
        super(Net, self).__init__()
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(feature_dim, 100)
        self.fc2 = nn.Linear(100, 64)
        self.fc3 = nn.Linear(64, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [226]:
def get_torch_iterator(X, y, batch_size=32):
    i = 0
    num_elements = len(X)
    while True:
        yield Variable(X[i:i+batch_size]), Variable(y[i:i+batch_size])
        i = (i + batch_size) % num_elements

In [227]:
print(all_X_torch.size())
print(all_y_torch.size())

torch.Size([9995, 73])
torch.Size([9995])


In [228]:
feature_dim = X.shape[1]
num_classes = 3
net = Net(feature_dim, num_classes)
batch_iterator = get_torch_iterator(all_X_torch, all_y_torch)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for _ in range(1000):
    # Get data
    batch_x, batch_y = next(batch_iterator)

    # Reset gradients
    optimizer.zero_grad()

    # Forward pass
    output = net(batch_x)
    loss = criterion(output, batch_y)
    
    # Backward pass
    loss.backward()

    # Apply gradients
    optimizer.step()
#     for param in net.parameters():
#         param.data.add_(-0.1 * param.grad.data)

# Analyze model

In [256]:
loss = criterion(net(all_Xv), all_Yv).data.numpy()
yhat_soft = net(all_Xv).data.numpy()
y = all_Yv.data.numpy().astype(int)
yhat = np.argmax(yhat_soft, axis=1)
print("Accuracy", np.mean(y == yhat))
print("Cross Entropy", np.mean(loss))
print("MSE Onehots", np.mean((yhat_soft-all_y_onehot)**2))

Accuracy 0.857128564282
Cross Entropy 0.694268
MSE Onehots 0.0952429606003


In [260]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(y, yhat)
print(cnf_matrix)

[[   0  723    0]
 [   0 8567    0]
 [   0  705    0]]
