In [1]:
%matplotlib notebook
from mpl_toolkits import mplot3d


import numpy as np
import matplotlib.pyplot as plt
import pybullet as p
import pybullet_data 
import time
import math
import random
import torch
import torch.nn as nn
from torch.autograd import Variable


In [2]:
physicsClient = p.connect(p.GUI)

In [3]:
p.setGravity(0,0,-10) 
p.resetSimulation() 
p.setAdditionalSearchPath(pybullet_data.getDataPath()) 
planeId = p.loadURDF("plane.urdf") 
robotId = p.loadURDF("iiwa7.urdf",flags=9, useFixedBase=1)

robotStartPos = [0,0,0]
robotStartOrientation = p.getQuaternionFromEuler([0,0,0])

p.resetBasePositionAndOrientation(robotId,robotStartPos,robotStartOrientation)

p.setJointMotorControlArray(robotId,range(7),p.VELOCITY_CONTROL,forces=np.zeros(7))

In [4]:
def simulate_system(x, u):
    x_next=[]
    for i in range(7):
        p.resetJointState(robotId,i,x[i],targetVelocity = x[i+7])
    
    p.setJointMotorControlArray(robotId,range(7), controlMode=p.TORQUE_CONTROL,forces=u)
    p.stepSimulation()
    for i in range(7):
        x_next.append(p.getJointStates(robotId,range(7))[i][0])
    for i in range(7):
        x_next.append(p.getJointStates(robotId,range(7))[i][1])
    x_next = np.array(x_next)
    return x_next

In [5]:
N=100000
x = np.zeros([14,N])
u= np.zeros([7, N])
x_new=np.zeros([14,N])

In [6]:
for i in range(N):
    for j in range(7):
        a=random.randint(-180,180)#angule range(-pi,pi)
        b=random.uniform(-10, 10)#velocity range
        c=random.randint(-30,30)#torque range (-200,200)
        x[j,i]=math.radians(a/math.pi)
        x[j+7,i]=b
        u[j,i]=c
print(x)
print(u)

[[ 0.45       -0.49444444  0.21111111 ...  0.46111111 -0.61111111
   0.34444444]
 [-0.15555556 -0.06111111 -0.5        ...  0.12222222  0.02222222
   0.99444444]
 [ 0.23333333 -0.83888889 -0.02222222 ... -0.71666667  0.25555556
   0.34444444]
 ...
 [ 8.65308432  8.61860586  6.91661684 ...  4.01987702  4.51873316
   5.78356037]
 [-9.51039822  0.50120437 -3.58758048 ...  8.78849359 -1.46188412
   0.90366242]
 [-2.28748788  3.25370452 -9.91069085 ... -8.55395509  5.01763095
  -8.66222793]]
[[ 19.  -9. -24. ...  11.   2. -23.]
 [ -2.  30.  10. ...  20.  -8. -30.]
 [-25. -21.  19. ...  -8. -13. -11.]
 ...
 [ 10.   5.  19. ...  24.  14. -20.]
 [-24. -21.  13. ...  -4.  -8.  16.]
 [-19. -16.   6. ... -18.   4.   2.]]


In [7]:
for i in range(N):
    x_new[:,i]=simulate_system(x[:,i], u[:,i])
print("new state is:", x_new)

new state is: [[  0.45962305  -0.49473278   0.19530591 ...   0.4670241   -0.63409568
    0.37160071]
 [ -0.16644305  -0.0296752   -0.46941474 ...   0.14539398   0.06213868
    1.02897411]
 [  0.22187766  -0.84754735  -0.02833872 ...  -0.72523661   0.2327216
    0.33030051]
 ...
 [ 13.23015925  10.60395147   8.25113392 ...  11.60600248   6.45366773
    3.74281194]
 [-11.9579493   -3.91249901  -0.44855174 ...   9.6348448    2.65766844
    5.92760129]
 [-17.1677402   -8.90397717  -4.28718648 ... -24.25088849   3.80714124
   -3.59294696]]


In [8]:
x=x.T
u = u.T
x_train = np.append(x,u,axis=1)
print(x_train)

[[ 4.50000000e-01 -1.55555556e-01  2.33333333e-01 ...  1.00000000e+01
  -2.40000000e+01 -1.90000000e+01]
 [-4.94444444e-01 -6.11111111e-02 -8.38888889e-01 ...  5.00000000e+00
  -2.10000000e+01 -1.60000000e+01]
 [ 2.11111111e-01 -5.00000000e-01 -2.22222222e-02 ...  1.90000000e+01
   1.30000000e+01  6.00000000e+00]
 ...
 [ 4.61111111e-01  1.22222222e-01 -7.16666667e-01 ...  2.40000000e+01
  -4.00000000e+00 -1.80000000e+01]
 [-6.11111111e-01  2.22222222e-02  2.55555556e-01 ...  1.40000000e+01
  -8.00000000e+00  4.00000000e+00]
 [ 3.44444444e-01  9.94444444e-01  3.44444444e-01 ... -2.00000000e+01
   1.60000000e+01  2.00000000e+00]]


In [9]:
y_train = x_new.T

In [10]:
print(y_train)

[[  0.45962305  -0.16644305   0.22187766 ...  13.23015925 -11.9579493
  -17.1677402 ]
 [ -0.49473278  -0.0296752   -0.84754735 ...  10.60395147  -3.91249901
   -8.90397717]
 [  0.19530591  -0.46941474  -0.02833872 ...   8.25113392  -0.44855174
   -4.28718648]
 ...
 [  0.4670241    0.14539398  -0.72523661 ...  11.60600248   9.6348448
  -24.25088849]
 [ -0.63409568   0.06213868   0.2327216  ...   6.45366773   2.65766844
    3.80714124]
 [  0.37160071   1.02897411   0.33030051 ...   3.74281194   5.92760129
   -3.59294696]]


In [11]:
#input_size = 1
input_size = 21
output_size = 14
num_epochs = 300
learning_rate = 0.002

x_train = x_train.astype(np.float32)
y_train=y_train.astype(np.float32)


class LinearRegression(nn.Module):
    def __init__(self,input_size,output_size):
        super(LinearRegression,self).__init__()
        self.linear = nn.Linear(input_size,output_size)

    def forward(self,x):
        out = self.linear(x) #Forward propogation 
        return out

model = LinearRegression(input_size,output_size)

#Lost and Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

#train the Model
for epoch in range(num_epochs):
    #convert numpy array to torch Variable
    inputs = Variable(torch.from_numpy(x_train)) #convert numpy array to torch tensor
    #inputs = Variable(torch.Tensor(x_train))    
    targets = Variable(torch.from_numpy(y_train)) #convert numpy array to torch tensor

    #forward+ backward + optimize
    optimizer.zero_grad() #gradient
    outputs = model(inputs) #output
    loss = criterion(outputs,targets) #loss function
    loss.backward() #backward propogation
    optimizer.step() #1-step optimization(gradeint descent)



In [12]:
w = model.linear.weight.data.numpy()
b = model.linear.bias.data.numpy()
print('w:{},b:{}'.format(w,b))
print(loss)

w:[[-1.76890090e-01  1.69474974e-01 -9.98860523e-02 -1.72635615e-01
   4.95251045e-02  7.30510354e-02 -1.82116255e-01  5.04984194e-03
   1.18030896e-02  3.07773356e-03 -2.29798141e-03  6.69663213e-03
   1.01095829e-02  4.78207087e-03  1.33390145e-04 -2.89943800e-05
  -2.40938694e-04 -7.12421752e-05  5.51823214e-05  7.81420385e-06
   1.34631482e-04]
 [-6.79811463e-02  1.09535061e-01 -9.78549421e-02  7.97934085e-02
  -4.91888933e-02 -1.71327159e-01 -1.66871399e-02  6.82089478e-03
   1.34819029e-02 -2.37485906e-03  8.51210766e-03 -1.08976522e-02
   1.23825008e-02 -2.17936444e-03  7.46643054e-05 -1.95030898e-05
   1.16133517e-04  1.68186772e-04 -1.67625494e-05 -1.85961435e-05
  -8.04311185e-06]
 [ 1.71353165e-02  3.71231921e-02 -1.61501274e-01  1.86415076e-01
   8.28160495e-02 -1.95223302e-01 -2.03325488e-02 -1.15125412e-02
   3.09876259e-03  1.39392195e-02  1.17917238e-02 -1.02989911e-03
   3.04985070e-03  2.41449918e-04 -8.30022545e-05 -1.14812228e-05
   2.48710596e-04 -1.88530685e-04 -2

In [13]:
y_new = w@x_train[1,:].T+b
print(y_new)
print(y_train[1,:])

[  0.23178354   0.1148214    0.0879299   -0.29898876   0.17215067
  -0.15699564  -0.35198718  -0.5304712    7.1452003   -1.0895047
  -0.67603993  11.087455    -2.3385298  -10.185847  ]
[-0.49473277 -0.0296752  -0.84754735  0.38183632  0.533072    0.7836979
  0.92401123 -0.06920084  7.544618   -2.0780299  -1.6926172  10.603951
 -3.912499   -8.903977  ]


In [21]:
for i in range(7):
        p.resetJointState(robotId,i,y_new[i],targetVelocity = y_new[i+7])

In [22]:
for i in range(7):
        p.resetJointState(robotId,i,y_train[1,i],targetVelocity = y_train[1,i+7])

In [5]:
x_test = np.array([4.33333333e-01,  3.72222222e-01,  1.00000000e+00,  6.33333333e-01,
   7.77777778e-01,  5.50000000e-01,  8.77777778e-01,  5.61716944e+00,
   4.94226108e-01,  7.71834766e+00,  3.36154878e+00, -7.54252866e+00,
  -4.02101231e+00, -3.41661910e+00,  3.00000000e+00, -1.80000000e+01,
   2.80000000e+01,  7.00000000e+00, -2.60000000e+01,  1.00000000e+01,
   1.40000000e+01])
u_test = x_test[14::]
print(u_test)
yy = simulate_system(x_test, u_test)
print(yy)

[  3. -18.  28.   7. -26.  10.  14.]
[  0.45662485   0.37251037   1.03471971   0.64772252   0.72732039
   0.53907829   0.92093734   5.5899647    0.06915529   8.33272973
   3.45340603 -12.10977284  -2.62121083  10.35829541]


In [19]:
for i in range(7):
        p.resetJointState(robotId,i,yy[i],targetVelocity = yy[i+7])

In [20]:
for i in range(7):
        p.resetJointState(robotId,i,y_test[i],targetVelocity = y_test[i+7])