In [1]:
import gymnasium as gym
import numpy as np
import torch
from psutil import cpu_count
from pydrake.all import StartMeshcat
import Franka_gym_pos
from Franka_gym_pos import *
from manipulation.utils import FindDataResource, RenderDiagram, running_as_notebook

num_cpu = int(cpu_count() / 2) if running_as_notebook else 2

In [None]:
meshcat = StartMeshcat()

## Teleop Franka arm 
To move the robot manually and visualize it : 
Enter an integer between 0 and 5 as input.

0: positive translation along the x-axis / 1: negative translation <br>

2: positive translation along the y-axis / 3: negative translation <br>

4: positive translation along the z-axis / 5: negative translation 

In [None]:

env = gym.make("Franka-v1", meshcat=meshcat)
obs, _ = env.reset()
env.render()

for i in range(50):

    action = [int(input("Choose an action "))]
    obs, reward, terminated, truncated, info= env.step(action)
    env.render()
    #print(reward)
    print(terminated)
    

### Simulation with random actions

Sample a random action at each time step to make the robot move randomly :

In [None]:

env = gym.make("Franka-v1", meshcat=meshcat)
obs, _ = env.reset()
meshcat.StartRecording()
for i in range(200 if running_as_notebook else 5):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    if reward== 100 : print(reward)
    #if i == 50 : break
    env.render()
    if terminated: 
        obs, _ = env.reset()
meshcat.PublishRecording()

Note: The Meshcat visualizer allows you to replay the simulation after recording it. To do this, click on Open Control → Animations → Default.

In [None]:
# To display the diagram of our system

simulator= env.simulator
RenderDiagram(simulator.get_system(), max_depth=1)


## REINFORCE Algo :

In [4]:
import gymnasium as gym
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

#Hyperparameters
learning_rate = 0.0002
gamma         = 0.98

class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.data = []
        
        self.fc1 = nn.Linear(14, 128)
        self.fc2 = nn.Linear(128, 6)
        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x), dim=0)
        return x
      
    def put_data(self, item):
        self.data.append(item)
        
    def train_net(self):
        R = 0
        self.optimizer.zero_grad()
        for r, prob in self.data[::-1]:
            R = r + gamma * R
            loss = -torch.log(prob) * R
            loss.backward()
        self.optimizer.step()
        self.data = []


## Training with Reinforce RL algo :

In [None]:
env = gym.make("Franka-v1", meshcat=meshcat)
pi = Policy()
score = 0.0
print_interval = 20

s,_= env.reset()

for n_epi in range(100):
    s, _ = env.reset()
    done = False
        
    for step in range(100): 
            
        prob = pi(torch.from_numpy(s).float())
        m = Categorical(prob)
        a = m.sample()
        action= np.array([a.item()])
        s_prime, r, done, truncated, info = env.step(action)
        pi.put_data((r,prob[a]))
        s = s_prime
        score += r
            
    pi.train_net()
        
    if n_epi%print_interval==0 and n_epi!=0:
        print("# of episode :{}, avg score : {}".format(n_epi, score/print_interval))
        score = 0.0
#env.close()

print("TRAINING FINISHED")

In [None]:
env = gym.make("Franka-v1", meshcat=meshcat)
render_episodes = 5
for _ in range(render_episodes):
    s, _ = env.reset()
    done = False
    for _ in range(100):
        prob = pi(torch.from_numpy(s).float())
        m = Categorical(prob)
        a = m.sample() # Choose action greedily for rendering
        action= np.array([a.item()])
        s, r, done, _,_ = env.step(action)
        env.render()   