# Load data

In [1]:
import numpy as np

In [2]:
base_dir = '/home/vitchyr/git/rllab-rail/railrl/data/replay_buffer'

all_actions = np.loadtxt(base_dir + "/actions.csv", delimiter=',')
all_obs = np.loadtxt(base_dir + "/obs.csv", delimiter=',')
all_rewards = np.loadtxt(base_dir + "/rewards.csv", delimiter=',')
all_terminals = np.loadtxt(base_dir + "/terminals.csv", delimiter=',')

In [3]:
nonzero = [i for i, e in enumerate(all_terminals) if e != 0]
last_full_episode_idx = nonzero[-1] + 1

In [4]:
terminals = all_terminals[:last_full_episode_idx]
obs = all_obs[:last_full_episode_idx]
next_obs = all_obs[1:last_full_episode_idx+last_full_episode_idx]
actions = all_actions[:last_full_episode_idx]
rewards = all_rewards[:last_full_episode_idx]

In [5]:
positive_idxs = np.array([i for i, reward in enumerate(rewards) if reward == 1.0])
negative_idxs = np.array([i for i, reward in enumerate(rewards) if reward == -1.0])

In [100]:
Xpos = np.hstack((
    actions[positive_idxs][:, :],
    obs[positive_idxs-5][:, :3],
    next_obs[positive_idxs]
))
num_pos = Xpos.shape[0]
Xneg = np.hstack((
    actions[negative_idxs][:, :],
    obs[negative_idxs-5][:, :3],
    next_obs[negative_idxs]
))
num_neg = Xneg.shape[0]
raw_X = np.vstack((Xpos, Xneg))
num_total = num_pos + num_neg
raw_y = np.hstack((np.ones(num_pos), np.zeros(num_neg)))
print(raw_X[:1])
print(raw_y[:1])

[[ 0.02178212  0.03042386  0.94779396  1.91589522  0.48581049  0.48751163
  -0.41613626 -1.9611131   0.54505563  0.28782532 -2.06506801  0.85537577
  -1.47169018  0.27841741 -2.12769747 -0.14251566  1.11679518  1.58600557
   1.1028744   0.12133752 -0.51180929 -1.27574861 -2.10457659  0.          0.
   1.          0.          0.          0.          0.          0.
   1.91589522  0.48581049  0.48751163 -0.41613626 -1.9611131   0.54505563
   0.28782532 -2.06506801  0.85537577 -1.47169018  0.27841741 -2.12769747
  -0.14251566  1.11679518  1.58600557  1.1028744   0.12133752 -0.51180929
  -1.27574861 -2.10457659]]
[ 1.]


In [101]:
print(raw_y.shape)
print(raw_X.shape)

(1428,)
(1428, 51)


In [102]:
X, y = shuffle(raw_X, raw_y, random_state=0)
y.shape

(1428,)

# TensorFlow model to train

# Shuffle and build data set

In [8]:
from sklearn.utils import shuffle
from sklearn.model_selection import ShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score

In [10]:
scores = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
print(scores)
print(scores.mean())

[ 0.8125      0.75694444  0.79166667  0.8041958   0.72027972  0.79577465
  0.78873239  0.71126761  0.75352113  0.76760563]
0.770248804404


In [12]:
rs = ShuffleSplit(n_splits=3, test_size=.25, random_state=0)
for train_index, test_index in rs.split(raw_X):
    X = raw_X[train_index]
    y = raw_y[train_index]
    model = LogisticRegression()
    model = model.fit(X, y)
    print(model.score(X, y))

0.762838468721
0.771241830065
0.77964519141


# PyTorch

In [78]:
from itertools import count

import torch
import torch.autograd
import torch.nn.functional as F
from torch.autograd import Variable
import torch.nn as nn

In [113]:
pos = np.zeros((num_pos, 2))
pos[:, 0] = 1
neg = np.zeros((num_neg, 2))
neg[:, 1] = 1
raw_y_torch = np.vstack((pos, neg))

torch_X, torch_y = shuffle(raw_X, raw_y_torch, random_state=0)
torch_X = torch.from_numpy(torch_X).float()
torch_y = torch.from_numpy(torch_y).float()

Xv = Variable(torch_X)
Yv = Variable(torch_y)

In [114]:
feature_dim = X.shape[1]
print(feature_dim)

51


In [115]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(feature_dim, 120)
        self.fc2 = nn.Linear(120, 64)
        self.fc3 = nn.Linear(64, 2)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [116]:
def get_batch_iterator(X, y, batch_size=32):
    i = 0
    num_elements = len(X)
    while True:
        yield Variable(X[i:i+batch_size]), Variable(y[i:i+batch_size])
        i = (i + batch_size) % num_elements

In [117]:
W_target = torch.randn(feature_dim, 1) * 5
net = Net()
batch_iterator = get_batch_iterator(torch_X, torch_y)
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for _ in range(10000):
    # Get data
    batch_x, batch_y = next(batch_iterator)

    # Reset gradients
    optimizer.zero_grad()

    # Forward pass
    output = net(batch_x)
    loss = criterion(output, batch_y)

    # Backward pass
    loss.backward()

    # Apply gradients
    optimizer.step()
#     for param in net.parameters():
#         param.data.add_(-0.1 * param.grad.data)

In [118]:
loss = criterion(net(Xv), Yv).data.numpy()
print(np.mean(loss))

13.6414


In [120]:
error = ((net(Xv) - Yv)**2).data.numpy()
print(np.mean(error))

0.493697
