In [3]:
#from train_encoder_script.py

from lib.restartable_pendulum import RestartablePendulumEnv
from lib.state_rep import train_encoder
import numpy as np
from matplotlib import pyplot as plt
import itertools
import sys

def main():
    
    for arg in sys.argv:
        if arg.startswith('--job='):
            job_iter = int(arg.split('--job=')[1]) - 1
    
    #added
    job_iter = 0
    
    # specify environment information
    env = RestartablePendulumEnv()
    state_dim = 3
    act_dim = 1
    
    # specify training details to loop over
    archs = [[64], [64,64], [64,64,64], [128], [128, 128], [128,128,128], [300], [300,300]]
    traj_lens = [5,10,20]
    param_lists = [archs, traj_lens]
    traj_type="drive"
    
    i = job_iter
    tup = list(itertools.product(*param_lists))[i]
    
    #print(total_models)
    
    #for i,tup in enumerate(itertools.product(*param_lists)): # loop over the various architectures
    #    print("\nStarting {0} of {1} representations\n".format(i+1,total_models))

    parameters = {
        "n_episodes" : 3*20000,
        "n_passes" : 1,
        "batch_size" : 100,
        "learning_rate" : 1e-3,
        "widths" : tup[0],
        "traj_len" : tup[1]
    }

    widths = parameters["widths"]
    traj_len = parameters["traj_len"]
    save_dir = "./experiments/state_rep_params/pendulum/{}".format(i+32)
    n_episodes = parameters["n_episodes"]
    n_passes = parameters["n_passes"]
    batch_size = parameters["batch_size"]
    learning_rate = parameters["learning_rate"]    

    init_projectors=None
    init_weights=None
    init_biases=None

    # generate the seeds for the training trajectories
    start_states = [np.array([(np.random.rand(1)[0]*2 - 1)*np.pi, (np.random.rand(1)[0]*2 - 1)*8]) 
                    for _ in range(n_episodes)]
    start_actions = [np.random.rand(1)*4-2 for _ in range(n_episodes)]


    projectors,weights,biases,losses = train_encoder(env, start_states, start_actions, traj_len, n_passes, 
                                                     state_dim, act_dim, widths,
                                                     traj_type=traj_type,
                                                     learning_rate=learning_rate,
                                                     init_projectors=init_projectors,
                                                     init_weights=init_weights,
                                                     init_biases=init_biases,
                                                     batch_size = batch_size,
                                                     save_dir = save_dir,
                                                     show_progress=False,
                                                     track_loss_every = int(n_episodes/(batch_size*200)))

    # save the representation weights
    np.savez(save_dir + "projectors.npz",*projectors)
    np.savez(save_dir + "weights.npz",*weights)
    np.savez(save_dir + "biases.npz",*biases)

    # save the training params
    with open(save_dir + "train_params.txt","w") as f:
        for tup in parameters.items():
            f.write(" ".join([str(v) for v in tup]))
            f.write("\n")


    plt.plot(losses)
    plt.savefig(save_dir + "losses.png")
    plt.clf()
        

In [2]:
main()



UnboundLocalError: local variable 'job_iter' referenced before assignment

In [7]:
from lib.restartable_pendulum import RestartablePendulumEnv
from lib.state_rep_torch import train_encoder
import gym
import numpy as np
from matplotlib import pyplot as plt
import itertools
import sys

def main2():
    
    for arg in sys.argv:
        if arg.startswith('--job='):
            i = int(arg.split('--job=')[1])-1
    
    #added
    i=0
    
    # specify environment information
    env = RestartablePendulumEnv()
    state_dim = 3
    act_dim = 1
    
    # specify training details to loop over
    jobs = [2, 5, 6, 8, 9, 10, 13, 20, 24, 28, 32]
    archs = [[state_dim]+arch for arch in [[128],
                                           [256],
                                           [512],
                                           [1024],
                                           [128,128],
                                           [256,256],
                                           [512,512],
                                           [512,256],
                                           [512,128]
                                          ]]
    traj_lens = [20]
    lrs = [.0001, .0005, .001, .005]
    param_lists = [archs, traj_lens, lrs]
    
    
    tup = list(itertools.product(*param_lists))[jobs[i]]
    
    parameters = {
        "n_episodes" :30000,
        "batch_size" : 50,
        "learning_rate" : tup[2],
        "widths" : tup[0],
        "traj_len" : tup[1]
    }

    widths = parameters["widths"]
    traj_len = parameters["traj_len"]
    save_dir = "./experiments/extra_train_exps/{}".format(i)
    n_episodes = parameters["n_episodes"]
    batch_size = parameters["batch_size"]
    learning_rate = parameters["learning_rate"]    

    params, losses = train_encoder(env, traj_len, state_dim, act_dim, widths, n_episodes,
                                   lr=learning_rate,
                                   batch_size = batch_size,
                                   show_progress=False,
                                   track_loss_every = 10,
                                   drift=True
                                  )

    weights = [w for w in params[:2*(len(widths)-1):2]]
    biases = [b.flatten() for b in params[1:2*(len(widths)-1):2]]
    projectors = params[2*(len(widths)-1):2*(len(widths)-1)+traj_len+1]
                            
    
    # save the representation weights
    np.savez(save_dir + "projectors.npz",*projectors)
    np.savez(save_dir + "weights.npz",*weights)
    np.savez(save_dir + "biases.npz",*biases)
    
    # save the training params
    with open(save_dir + "train_params.txt","w") as f:
        for tup in parameters.items():
            f.write(" ".join([str(v) for v in tup]))
            f.write("\n")


    plt.plot(losses)
    plt.savefig(save_dir + "losses.png")
    plt.clf()

In [8]:
main2()



TypeError: train_encoder() got multiple values for argument 'lr'

In [None]:
import sys
from lib.restartable_pendulum import RestartablePendulumEnv
from lib import state_rep_torch as srt
import gym
import numpy as np
from matplotlib import pyplot as plt
import torch
from lib import utils
from lib import encoder_wrappers as ew

# specify environment information
n_repeats = 3 # step the environment this many times for each action, concatenate the pixel observations
env = RestartablePendulumEnv(repeats=n_repeats,pixels=True)


#nonlin = torch.nn.ELU()
nonlin = torch.nn.functional.relu
layers = [50, 10, 5] # architecture of encoder after the 2 conv layers
save_dir = "./"
n_episodes = 100000 # total batches to draw
batch_size = 25
learning_rate = .001
save_every = int(n_episodes/4) # save the model every so often

encnet = srt.ConvEncoderNet(layers,env.observation_space.shape[1:],sigma=nonlin)

# use the following commented out lines for PredictorNet (I changed deterministic sampling though...
#prednet = srt.PredictorNet(encnet,T,layers[-1],1)
#deterministic_args = (samples[i], batch_size, 35, method, n_repeats,T) 

#prednet = srt.ForwardNet(encnet,layers[-1],1)
prednet = srt.PiecewiseForwardNet(encnet,layers[-1],1,2)
deterministic_args = None 

traj_sampler = srt.SimpleTrajectorySampler(env,
                                     srt.sample_pendulum_action_batch,
                                     srt.sample_pendulum_state_batch_old,
                                     device=torch.device("cpu"),
                                     deterministic=False,
                                     deterministic_args=deterministic_args)

net, losses = srt.train_encoder(prednet,traj_sampler,n_episodes,
                                batch_size=batch_size,
                                track_loss_every=int(n_episodes/100),
                                lr=learning_rate,
                                save_every=save_every,
                                save_path=save_dir)

torch.save(net,save_dir+"net")



# what follows is code to visualize the representations
d = 5 # must match the final entry in layers
n_samps = 500
env = ew.TorchEncoderWrapper(env,net.encoder,np.eye(d))
X = np.empty((n_samps,d))
for i,ang in enumerate(np.linspace(0,2*np.pi,n_samps)): # go through the angles from 0 to 2pi
    X[i,:] = env.reset(state=[ang,0])
utils.visualize_trajectory(X)


  allow_unreachable=True)  # allow_unreachable flag


Epoch Completion: 88.000%, Loss: 0.066

In [1]:

import sys
from lib.restartable_pendulum import RestartablePendulumEnv
from lib import state_rep_torch as srt
import gym
import numpy as np
from matplotlib import pyplot as plt
import torch
from lib import utils
from lib import encoder_wrappers as ew

# specify environment information
n_repeats = 3 # step the environment this many times for each action, concatenate the pixel observations
env = RestartablePendulumEnv(repeats=n_repeats,pixels=True)


#nonlin = torch.nn.ELU()
nonlin = torch.nn.functional.relu
layers = [50, 10, 5] # architecture of encoder after the 2 conv layers
save_dir = "./"
n_episodes = 100000 # total batches to draw
batch_size = 25
learning_rate = .001
save_every = int(n_episodes/4) # save the model every so often

encnet = srt.ConvEncoderNet(layers,env.observation_space.shape[1:],sigma=nonlin)

# use the following commented out lines for PredictorNet (I changed deterministic sampling though...
#prednet = srt.PredictorNet(encnet,T,layers[-1],1)
#deterministic_args = (samples[i], batch_size, 35, method, n_repeats,T) 

#prednet = srt.ForwardNet(encnet,layers[-1],1)
prednet = srt.PiecewiseForwardNet(encnet,layers[-1],1,2)
deterministic_args = None 

traj_sampler = srt.SimpleTrajectorySampler(env,
                                     srt.sample_pendulum_action_batch,
                                     srt.sample_pendulum_state_batch_old,
                                     device=torch.device("cpu"),
                                     deterministic=False,
                                     deterministic_args=deterministic_args)

net3 = torch.load("_3.0net")

In [2]:
d = 5 # must match the final entry in layers
n_samps = 500
env = ew.TorchEncoderWrapper(env,net3.encoder,np.eye(d))
X = np.empty((n_samps,d))
for i,ang in enumerate(np.linspace(0,2*np.pi,n_samps)): # go through the angles from 0 to 2pi
    X[i,:] = env.reset(state=[ang,0])
utils.visualize_trajectory(X)

  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var


In [3]:
X

array([[ 1.69577461e-07, -2.51884460e-02,  4.55234947e-11,
         9.04095410e-10,  9.53646606e-10],
       [ 1.69577461e-07, -2.51884460e-02,  4.55234947e-11,
         9.04095410e-10,  9.53646606e-10],
       [ 1.69577461e-07, -2.51884460e-02,  4.55234947e-11,
         9.04095410e-10,  9.53646606e-10],
       ...,
       [ 1.69577461e-07, -2.51884460e-02,  4.55234947e-11,
         9.04095410e-10,  9.53646606e-10],
       [ 1.69577461e-07, -2.51884460e-02,  4.55234947e-11,
         9.04095410e-10,  9.53646606e-10],
       [ 1.69577461e-07, -2.51884460e-02,  4.55234947e-11,
         9.04095410e-10,  9.53646606e-10]])

In [4]:
import sys
from lib.restartable_pendulum import RestartablePendulumEnv
from lib import state_rep_torch as srt
import gym
import numpy as np
from matplotlib import pyplot as plt
import torch
from lib import utils
from lib import encoder_wrappers as ew

# specify environment information
n_repeats = 3 # step the environment this many times for each action, concatenate the pixel observations
env = RestartablePendulumEnv(repeats=n_repeats,pixels=True)


#nonlin = torch.nn.ELU()
nonlin = torch.nn.functional.relu
layers = [50, 10, 5] # architecture of encoder after the 2 conv layers
save_dir = "./"
n_episodes = 1000 # total batches to draw
batch_size = 25
learning_rate = .001
save_every = int(n_episodes/4) # save the model every so often

encnet = srt.ConvEncoderNet(layers,env.observation_space.shape[1:],sigma=nonlin)

# use the following commented out lines for PredictorNet (I changed deterministic sampling though...
#prednet = srt.PredictorNet(encnet,T,layers[-1],1)
#deterministic_args = (samples[i], batch_size, 35, method, n_repeats,T) 

#prednet = srt.ForwardNet(encnet,layers[-1],1)
prednet = srt.PiecewiseForwardNet(encnet,layers[-1],1,2)
deterministic_args = None 

traj_sampler = srt.SimpleTrajectorySampler(env,
                                     srt.sample_pendulum_action_batch,
                                     srt.sample_pendulum_state_batch_old,
                                     device=torch.device("cpu"),
                                     deterministic=False,
                                     deterministic_args=deterministic_args)

net, losses = srt.train_encoder(prednet,traj_sampler,n_episodes,
                                batch_size=batch_size,
                                track_loss_every=int(n_episodes/100),
                                lr=learning_rate,
                                save_every=save_every,
                                save_path=save_dir)

torch.save(net,save_dir+"net")



# what follows is code to visualize the representations
d = 5 # must match the final entry in layers
n_samps = 500
env = ew.TorchEncoderWrapper(env,net.encoder,np.eye(d))
X = np.empty((n_samps,d))
for i,ang in enumerate(np.linspace(0,2*np.pi,n_samps)): # go through the angles from 0 to 2pi
    X[i,:] = env.reset(state=[ang,0])
utils.visualize_trajectory(X)

Epoch Completion: 100.000%, Loss: 0.006

In [5]:
X

array([[-0.0010578 , -0.00092101, -0.00078934,  0.00018673,  0.00184287],
       [ 0.00081441,  0.00088192, -0.00212788,  0.0014689 , -0.00119241],
       [ 0.00081441,  0.00088192, -0.00212788,  0.0014689 , -0.00119241],
       ...,
       [-0.0010578 , -0.00092101, -0.00078934,  0.00018673,  0.00184287],
       [-0.0010578 , -0.00092101, -0.00078934,  0.00018673,  0.00184287],
       [-0.0010578 , -0.00092101, -0.00078934,  0.00018673,  0.00184287]])

# Train rep for dynamics and reward estimation

In [6]:
import sys
from lib.restartable_pendulum import RestartablePendulumEnv
from lib import state_rep_torch as srt
import gym
import numpy as np
from matplotlib import pyplot as plt
import torch
from lib import utils
from lib import encoder_wrappers as ew

# specify environment information
n_repeats = 3 # step the environment this many times for each action, concatenate the pixel observations
env = RestartablePendulumEnv(repeats=n_repeats,pixels=True)


#nonlin = torch.nn.ELU()
nonlin = torch.nn.functional.relu
layers = [50, 10, 5] # architecture of encoder after the 2 conv layers
save_dir = "./"
n_episodes = 500 # total batches to draw
batch_size = 25
learning_rate = .001
save_every = int(n_episodes/2) # save the model every so often

encnet = srt.ConvEncoderNet(layers,env.observation_space.shape[1:],sigma=nonlin)

# use the following commented out lines for PredictorNet (I changed deterministic sampling though...
#prednet = srt.PredictorNet(encnet,T,layers[-1],1)
#deterministic_args = (samples[i], batch_size, 35, method, n_repeats,T) 

# 2*state dim+action dim
rnet = srt.EncoderNet([2*5+1, 50, 10, 1])

#prednet = srt.ForwardNet(encnet,layers[-1],1)
prednet = srt.PiecewiseForwardNet(encnet,layers[-1],1,2,fit_reward=True,mu=1, r_encoder = rnet,alpha=1)
deterministic_args = None 



traj_sampler = srt.SimpleTrajectorySampler(env,
                                     srt.sample_pendulum_action_batch,
                                     srt.sample_pendulum_state_batch_old,
                                     device=torch.device("cpu"),
                                     deterministic=False,
                                     deterministic_args=deterministic_args,
                                          output_rewards=True)

net, losses = srt.train_encoder(prednet,traj_sampler,n_episodes,
                                batch_size=batch_size,
                                track_loss_every=int(n_episodes/100),
                                lr=learning_rate,
                                save_every=save_every,
                                save_path=save_dir)

torch.save(net,save_dir+".net")



# what follows is code to visualize the representations
# = 5 # must match the final entry in layers
#_samps = 500
#nv = ew.TorchEncoderWrapper(env,net.encoder,np.eye(d))
# = np.empty((n_samps,d))
#or i,ang in enumerate(np.linspace(0,2*np.pi,n_samps)): # go through the angles from 0 to 2pi
#   X[i,:] = env.reset(state=[ang,0])
#tils.visualize_trajectory(X)


Epoch Completion: 100.000%, Loss: 5.618

In [2]:
np.asarray([1,1])-np.asarray([[2],[2]])

array([[-1, -1],
       [-1, -1]])

In [7]:
# what follows is code to visualize the representations
d = 5 # must match the final entry in layers
n_samps = 500
#env = ew.TorchEncoderWrapper(env,net.encoder,np.eye(d))
X = np.empty((n_samps,d))
for i,ang in enumerate(np.linspace(0,2*np.pi,n_samps)): # go through the angles from 0 to 2pi
    #print(env.reset(state=[ang,0]))
    obs=env.reset(state=[ang,0])
    with torch.no_grad():
        X[i,:] = net.encoder.forward(torch.from_numpy(np.expand_dims(obs,0)).float())
utils.visualize_trajectory(X)

In [8]:
X

array([[-0.28855646,  0.03085409, -0.13583991,  0.38497052, -0.18825406],
       [-0.2868658 ,  0.02799525, -0.13766721,  0.38798726, -0.19210458],
       [-0.2868658 ,  0.02799525, -0.13766721,  0.38798726, -0.19210458],
       ...,
       [-0.28855646,  0.03085409, -0.13583991,  0.38497052, -0.18825406],
       [-0.28855646,  0.03085409, -0.13583991,  0.38497052, -0.18825406],
       [-0.28855646,  0.03085409, -0.13583991,  0.38497052, -0.18825406]])

In [17]:
net.encoder.forward

<bound method ConvEncoderNet.forward of ConvEncoderNet(
  (conv1): Conv2d(1, 16, kernel_size=(8, 8), stride=(4, 4))
  (conv2): Conv2d(16, 16, kernel_size=(4, 4), stride=(2, 2))
  (layers): ModuleList(
    (0): Linear(in_features=2112, out_features=50, bias=True)
    (1): Linear(in_features=50, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=5, bias=True)
  )
)>

In [2]:
#what are weights

import sys
from lib.restartable_pendulum import RestartablePendulumEnv
from lib import state_rep_torch as srt
import gym
import numpy as np
from matplotlib import pyplot as plt
import torch
from lib import utils
from lib import encoder_wrappers as ew

# specify environment information
n_repeats = 3 # step the environment this many times for each action, concatenate the pixel observations
env = RestartablePendulumEnv(repeats=n_repeats,pixels=True)


#nonlin = torch.nn.ELU()
nonlin = torch.nn.functional.relu
layers = [50, 10, 5] # architecture of encoder after the 2 conv layers
save_dir = "./"
n_episodes = 500 # total batches to draw
batch_size = 25
learning_rate = .001
save_every = int(n_episodes/2) # save the model every so often

encnet = srt.ConvEncoderNet(layers,env.observation_space.shape[1:],sigma=nonlin)

# use the following commented out lines for PredictorNet (I changed deterministic sampling though...
#prednet = srt.PredictorNet(encnet,T,layers[-1],1)
#deterministic_args = (samples[i], batch_size, 35, method, n_repeats,T) 

# 2*state dim+action dim
rnet = srt.EncoderNet([2*5+1, 50, 10, 1])

#prednet = srt.ForwardNet(encnet,layers[-1],1)
prednet = srt.PiecewiseForwardNet(encnet,layers[-1],1,2,fit_reward=True,mu=1, r_encoder = rnet,alpha=1)

In [15]:
encnet

ConvEncoderNet(
  (conv1): Conv2d(1, 16, kernel_size=(8, 8), stride=(4, 4))
  (conv2): Conv2d(16, 16, kernel_size=(4, 4), stride=(2, 2))
  (layers): ModuleList(
    (0): Linear(in_features=2112, out_features=50, bias=True)
    (1): Linear(in_features=50, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=5, bias=True)
  )
)

In [14]:
encnet.layers[2].weight

Parameter containing:
tensor([[-0.1487, -0.1759,  0.1770, -0.2456,  0.3060,  0.0667, -0.1194,  0.0529,
         -0.1871, -0.1306],
        [ 0.1675,  0.2610,  0.0371, -0.1310,  0.0397, -0.1577,  0.0479, -0.1478,
         -0.1023,  0.0937],
        [ 0.0280,  0.0736, -0.2789,  0.1069,  0.1072,  0.0639,  0.2972, -0.0238,
          0.0280,  0.2696],
        [ 0.0513, -0.3018,  0.2311,  0.0414,  0.2740, -0.0295, -0.2415, -0.2992,
         -0.2916,  0.0901],
        [ 0.1452,  0.2717,  0.2936, -0.2310, -0.0724, -0.2903,  0.2604,  0.1252,
          0.1103, -0.2748]], requires_grad=True)

In [11]:
#simple identity with actual rep

import sys
from lib.restartable_pendulum import RestartablePendulumEnv
from lib import state_rep_torch as srt
import gym
import numpy as np
from matplotlib import pyplot as plt
import torch
from lib import utils
from lib import encoder_wrappers as ew
import torch.nn as nn

# specify environment information
n_repeats = 3 # step the environment this many times for each action, concatenate the pixel observations
env = RestartablePendulumEnv(repeats=n_repeats,pixels=False)

#nonlin = torch.nn.ELU()
save_dir = "./"
n_episodes = 1000 # total batches to draw
batch_size = 25
learning_rate = .001
save_every = int(n_episodes/2) # save the model every so often

encnet = nn.Linear(9,9)

# use the following commented out lines for PredictorNet (I changed deterministic sampling though...
#prednet = srt.PredictorNet(encnet,T,layers[-1],1)
#deterministic_args = (samples[i], batch_size, 35, method, n_repeats,T) 

# 2*state dim+action dim
#rnet = srt.EncoderNet([2*9+1, 50, 10, 1])
rnet = srt.EncoderNet([2*9+1, 50, 10, 1])

#prednet = srt.ForwardNet(encnet,layers[-1],1)
prednet = srt.PiecewiseForwardNet(encnet,9,1,2,fit_reward=True,mu=1, r_encoder = rnet,alpha=0)

deterministic_args = None 

traj_sampler = srt.SimpleTrajectorySampler(env,
                                     srt.sample_pendulum_action_batch,
                                     srt.sample_pendulum_state_batch_old,
                                     device=torch.device("cpu"),
                                     deterministic=False,
                                     deterministic_args=deterministic_args,
                                          output_rewards=True)
X0, X1, U, R = traj_sampler._forward_batch(25)
r_inps = np.concatenate([X0,X1,U], axis=1)
#with torch.no_grad():
#    print(rnet.forward(torch.from_numpy(r_inps).float()))
net, losses = srt.train_encoder(prednet,traj_sampler,n_episodes,
                                batch_size=batch_size,
                                track_loss_every=int(n_episodes/100),
                                lr=learning_rate,
                                save_every=save_every,
                                save_path=save_dir)

torch.save(net,save_dir+".net")


Epoch Completion: 2.000%, Loss: 45.910



Epoch Completion: 100.000%, Loss: 0.455

In [21]:
%matplotlib

Using matplotlib backend: TkAgg


In [25]:
# what follows is code to visualize the representations
d = 9 # must match the final entry in layers
n_samps = 500
#env = ew.TorchEncoderWrapper(env,net.encoder,np.eye(d))
X = np.empty((n_samps,d))
for i,ang in enumerate(np.linspace(0,2*np.pi,n_samps)): # go through the angles from 0 to 2pi
    #print(env.reset(state=[ang,0]))
    obs=env.reset(state=[ang,0])
    print(obs)
    with torch.no_grad():
        X[i,:] = net.encoder.forward(torch.from_numpy(np.expand_dims(obs,0)).float())
#utils.visualize_trajectory(X)
for i in range(n_samps):
    print(X[i])

[ 1.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00
 -4.59242550e-18 -9.18485099e-17  1.00000000e+00 -1.37772765e-17
 -1.83697020e-16]
[0.99992073 0.01259122 0.         0.99991467 0.01306335 0.00944342
 0.99990164 0.01402531 0.01924093]
[0.99968292 0.02518045 0.         0.9996587  0.0261244  0.01888533
 0.99960659 0.02804763 0.03847864]
[0.99928662 0.03776568 0.         0.99923214 0.03918084 0.02832426
 0.99911492 0.04206395 0.05770989]
[0.99873189 0.05034492 0.         0.99863506 0.05223037 0.03775869
 0.99842677 0.0560713  0.07693147]
[0.99801881 0.06291619 0.         0.99786759 0.06527069 0.04718714
 0.99754231 0.07006668 0.09614016]
[0.99714751 0.07547747 0.         0.99692988 0.0782995  0.05660811
 0.99646178 0.0840471  0.11533273]
[0.99611811 0.0880268  0.         0.9958221  0.0913145  0.0660201
 0.99518547 0.09800959 0.13450597]
[0.99493078 0.10056216 0.         0.99454448 0.1043134  0.07542162
 0.99371371 0.11195117 0.15365667]
[0.99358571 0.11308158 0.         0.9

In [20]:
import scipy.linalg as la

net.encoder.weight.detach().numpy()

array([[-0.22706883, -0.3488301 , -0.24115439,  0.07449825, -0.19601993,
        -0.00968671,  0.4871065 ,  0.42864478, -0.04952257],
       [ 0.28043598, -0.3244927 ,  0.02371694,  0.21653175,  0.03836328,
        -0.24385722,  0.10451561, -0.3317122 , -0.0010413 ],
       [ 0.23875032,  0.41250297,  0.23115253,  0.08958464,  0.27303556,
         0.01212166, -0.20382094, -0.2932708 ,  0.09478893],
       [ 0.5737164 ,  0.13179281,  0.15421203,  0.41630912,  0.3236075 ,
        -0.11297016,  0.36789313,  0.3751704 , -0.12169885],
       [-0.54690605, -0.4148158 , -0.31900495, -0.30257475,  0.04871792,
        -0.24414149, -0.17110346,  0.37530842,  0.13548648],
       [-0.4631175 , -0.18072045, -0.28645167, -0.3850715 , -0.18157238,
         0.11084921, -0.25512013,  0.20903416,  0.0929084 ],
       [-0.44990942,  0.34595102, -0.14906578, -0.23989269,  0.08469803,
         0.18894717, -0.01423795,  0.03915308,  0.1233978 ],
       [-0.2414595 , -0.10688411,  0.2902294 , -0.06681505, -0

In [2]:
#simple identity with actual rep

import sys
from lib.restartable_pendulum import RestartablePendulumEnv
from lib import state_rep_torch as srt
import gym
import numpy as np
from matplotlib import pyplot as plt
import torch
from lib import utils
from lib import encoder_wrappers as ew
import torch.nn as nn

# specify environment information
n_repeats = 3 # step the environment this many times for each action, concatenate the pixel observations
env = RestartablePendulumEnv(repeats=n_repeats,pixels=False)

#nonlin = torch.nn.ELU()
save_dir = "./"
n_episodes = 500 # total batches to draw
batch_size = 25
learning_rate = .001
save_every = int(n_episodes/2) # save the model every so often

encnet = nn.Linear(9,9)

# use the following commented out lines for PredictorNet (I changed deterministic sampling though...
#prednet = srt.PredictorNet(encnet,T,layers[-1],1)
#deterministic_args = (samples[i], batch_size, 35, method, n_repeats,T) 

# 2*state dim+action dim
#rnet = srt.EncoderNet([2*9+1, 50, 10, 1])
rnet = srt.EncoderNet([2*9+1, 50, 1])

#prednet = srt.ForwardNet(encnet,layers[-1],1)
prednet = srt.PiecewiseForwardNet(encnet,9,1,2,fit_reward=True,mu=1, r_encoder = rnet,alpha=0)

deterministic_args = None 

traj_sampler = srt.SimpleTrajectorySampler(env,
                                     srt.sample_pendulum_action_batch,
                                     srt.sample_pendulum_state_batch_old,
                                     device=torch.device("cpu"),
                                     deterministic=False,
                                     deterministic_args=deterministic_args,
                                          output_rewards=True)



In [5]:
X0, X1, U, R = traj_sampler._forward_batch(25)

In [10]:
rnet

EncoderNet(
  (layers): ModuleList(
    (0): Linear(in_features=19, out_features=50, bias=True)
    (1): Linear(in_features=50, out_features=1, bias=True)
  )
)

In [11]:
r_inps = np.concatenate([X0,X1,U], axis=1)

In [14]:
with torch.no_grad():
    print(rnet.forward(torch.from_numpy(r_inps).float()))

tensor([[0.4506],
        [0.0000],
        [0.0000],
        [0.0000],
        [1.0077],
        [0.0000],
        [0.3479],
        [0.0000],
        [0.4808],
        [0.0000],
        [0.2500],
        [0.3049],
        [0.3724],
        [0.0000],
        [0.8552],
        [0.7754],
        [0.0000],
        [0.0000],
        [0.0000],
        [1.3394],
        [0.0000],
        [0.0118],
        [0.0000],
        [0.9776],
        [1.3443]])


In [16]:
[1,2,3][:-1]

[1, 2]