# Load data

In [1]:
import numpy as np

In [2]:
base_dir = '/home/vitchyr/git/rllab-rail/railrl/data/replay_buffer'

all_actions = np.loadtxt(base_dir + "/actions.csv", delimiter=',')
all_obs = np.loadtxt(base_dir + "/obs.csv", delimiter=',')
all_rewards = np.loadtxt(base_dir + "/rewards.csv", delimiter=',')
all_terminals = np.loadtxt(base_dir + "/terminals.csv", delimiter=',')

In [3]:
nonzero = [i for i, e in enumerate(all_terminals) if e != 0]
last_full_episode_idx = nonzero[-1] + 1

In [188]:
terminals = all_terminals[:last_full_episode_idx]
obs = all_obs[:last_full_episode_idx]
next_obs = all_obs[1:last_full_episode_idx+1]
actions = all_actions[:last_full_episode_idx]
rewards = all_rewards[:last_full_episode_idx]

In [5]:
positive_idxs = np.array([i for i, reward in enumerate(rewards) if reward == 1.0])
negative_idxs = np.array([i for i, reward in enumerate(rewards) if reward == -1.0])

In [360]:
Xpos = np.hstack((
    actions[positive_idxs],
    obs[positive_idxs],
    next_obs[positive_idxs]
))
Xneg = np.hstack((
    actions[negative_idxs],
    obs[negative_idxs],
    next_obs[negative_idxs]
))
num_pos = Xpos.shape[0]
num_neg = Xneg.shape[0]
num_total = num_pos + num_neg


raw_X_posneg = np.vstack((Xpos, Xneg))
raw_y_posneg = np.hstack((np.ones(num_pos), np.zeros(num_neg)))

In [361]:
X_posneg, y_posneg = shuffle(raw_X_posneg, raw_y_posneg, random_state=0)

# TensorFlow model to train

# Shuffle and build data set

In [152]:
from sklearn.utils import shuffle
from sklearn.model_selection import ShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score

In [None]:
scores = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
print(scores)
print(scores.mean())

In [12]:
rs = ShuffleSplit(n_splits=3, test_size=.25, random_state=0)
for train_index, test_index in rs.split(raw_X):
    X = raw_X[train_index]
    y = raw_y[train_index]
    model = LogisticRegression()
    model = model.fit(X, y)
    print(model.score(X, y))

0.762838468721
0.771241830065
0.77964519141


# PyTorch

In [167]:
from itertools import count

import torch
import torch.autograd
import torch.nn.functional as F
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim

In [226]:
def get_torch_iterator(X, y, batch_size=32):
    i = 0
    num_elements = len(X)
    while True:
        yield Variable(X[i:i+batch_size]), Variable(y[i:i+batch_size])
        i = (i + batch_size) % num_elements

##  Regression

In [379]:
X_torch = torch.from_numpy(X_posneg).float()
y_torch = torch.from_numpy(y_posneg).float()

Xv = Variable(regression_X)
Yv = Variable(regression_y)

In [380]:
class RegressionNet(nn.Module):
    def __init__(self, feature_dim, hidden_sizes):
        super().__init__()
        # an affine operation: y = Wx + b
        self.fcs = []
        last_size = feature_dim
        for size in hidden_sizes:
            self.fcs.append(nn.Linear(last_size, size))
            last_size = size
        self.last_fc = nn.Linear(last_size, 1)

    def forward(self, x):
        x = x.view(-1, self.num_flat_features(x))
        for fc in self.fcs:
            x = F.relu(fc(x))
        x = self.last_fc(x)
        x = F.sigmoid(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [381]:
feature_dim = X.shape[1]
hidden_sizes = [100, 64, 32]
regression_net = RegressionNet(feature_dim, hidden_sizes)
batch_iterator = get_torch_iterator(X_torch, y_torch)
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for _ in range(10000):
    # Get data
    batch_x, batch_y = next(batch_iterator)

    # Reset gradients
    optimizer.zero_grad()

    # Forward pass
    output = regression_net(batch_x)
    loss = criterion(output, batch_y)
    
    # Backward pass
    loss.backward()

    # Apply gradients
    optimizer.step()

In [373]:
yhats = regression_net(Xv)
loss = criterion(yhats, Yv).data.numpy()[0]

yhats_np = yhats.data.numpy().flatten()
y_np = Yv.data.numpy()

def label(y):
    return np.round(y)

print("MSE Loss", loss)
print("Accuracy", np.mean(label(y_np) == label(yhats_np)))

[ 0.49812493  0.49970654  0.48895907 ...,  0.4981181   0.50407118
  0.49176615]
[ 1.  0.  1. ...,  0.  1.  1.]
MSE Loss 0.694374
Accuracy 0.489495798319


## One-hot

In [397]:
class OneHotNet(nn.Module):

    def __init__(self, feature_dim, hidden_sizes, num_classes):
        super().__init__()
        # an affine operation: y = Wx + b
        self.fcs = []
        last_size = feature_dim
        for size in hidden_sizes:
            self.fcs.append(nn.Linear(last_size, size))
            last_size = size
        self.last_fc = nn.Linear(last_size, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = x.view(-1, self.num_flat_features(x))
        for fc in self.fcs:
            x = F.relu(fc(x))
        x = self.last_fc(x)
        x = self.softmax(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [398]:
def to_onehot_n(inds, dim):
    ret = np.zeros((len(inds), dim))
    ret[np.arange(len(inds)), inds] = 1
    return ret
all_y_onehot_3 = to_onehot_n((rewards+1).astype(int), 3)

## two-way one-hot vector for rward of +/- 1

In [399]:
y2_torch = y_torch.long()

In [400]:
feature_dim = X.shape[1]
num_classes = 2
hidden_sizes = [100, 3]
net2 = OneHotNet(feature_dim, hidden_sizes, num_classes)
batch_iterator2 = get_torch_iterator(X_torch, y2_torch)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for _ in range(1000):
    # Get data
    batch_x, batch_y = next(batch_iterator2)

    # Reset gradients
    optimizer.zero_grad()

    # Forward pass
    output = net2(batch_x)
    loss = criterion(output, batch_y)
    
    # Backward pass
    loss.backward()

    # Apply gradients
    optimizer.step()

In [404]:
yhat_torch = net2(Xv)
yhat_numpy = yhat_torch.data.numpy()
yhat = np.argmax(yhat_numpy, axis=1)
y_numpy = Yv.data.numpy()

print(yhat_numpy.shape)
print(yhat.shape)
print(y_numpy.shape)


loss = criterion(yhat_torch, Yv.long())
print("Accuracy", np.mean(y == yhat))
print("Cross Entropy", loss.data.numpy()[0])
print("MSE Onehots", np.mean((yhat_soft-all_y_onehot)**2))

(1428, 2)
(1428,)
(1428,)
Accuracy 0.0
Cross Entropy 0.694975
MSE Onehots 0.096712527309




## Three-way one-hot vector for reward of +1, 0, or -1

In [374]:
def to_onehot_n(inds, dim):
    ret = np.zeros((len(inds), dim))
    ret[np.arange(len(inds)), inds] = 1
    return ret
all_y_onehot = to_onehot_n((rewards+1).astype(int), 3)

In [375]:
all_X = np.hstack((
    actions,
    obs,
    next_obs
))
all_X_torch = torch.from_numpy(all_X).float()
all_y_torch = torch.from_numpy(rewards + 1).long()
all_Xv = Variable(all_X_torch)
all_Yv = Variable(all_y_torch)

In [376]:
feature_dim = X.shape[1]
num_classes = 3
hidden_sizes = [100, 3]
net = OneHotNet(feature_dim, hidden_sizes, num_classes)
batch_iterator = get_torch_iterator(all_X_torch, all_y_torch)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for _ in range(1000):
    # Get data
    batch_x, batch_y = next(batch_iterator)

    # Reset gradients
    optimizer.zero_grad()

    # Forward pass
    output = net(batch_x)
    loss = criterion(output, batch_y)
    
    # Backward pass
    loss.backward()

    # Apply gradients
    optimizer.step()

# Analyze model

In [377]:
loss = criterion(net(all_Xv), all_Yv).data.numpy()
yhat_soft = net(all_Xv).data.numpy()
y = all_Yv.data.numpy().astype(int)
yhat = np.argmax(yhat_soft, axis=1)
print("Accuracy", np.mean(y == yhat))
print("Cross Entropy", np.mean(loss))
print("MSE Onehots", np.mean((yhat_soft-all_y_onehot)**2))

Accuracy 0.857128564282
Cross Entropy 0.79907
MSE Onehots 0.096712527309


In [378]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(y, yhat)
print(cnf_matrix)

[[   0  723    0]
 [   0 8567    0]
 [   0  705    0]]
