In [1]:
# Dependencies
import os
import time
import copy
import pickle
import pathlib
import numpy as np
from dataclasses import dataclass
import torch
from ruamel.yaml import YAML, dump, RoundTripDumper
import raisimGymTorch.algo.ppo.module as ppo_module

In [2]:
# This notebook will generate a csv folder beside the .pt file
# All csv files for miniDNN will be stored in this folder
HOME_PATH = '/home/willw/raisim/rsmGymA1/data/'
DATA_PATH = HOME_PATH + "/a1_locomotion"
EXP_DATE = "220628-001611"
ITER_NUM = "1450"
BASE_CHECKPOINT_PATH = f"{DATA_PATH}/{EXP_DATE}/{ITER_NUM}/"
logFiles = os.listdir(BASE_CHECKPOINT_PATH)
print(logFiles)

outDir = f'{BASE_CHECKPOINT_PATH}/csv/'
print(outDir)
if not os.path.exists(outDir):
    os.makedirs(outDir)

['test_110722_200522.mp4', 'csv', 'test_log_110722_200522.csv', 'test_log_290622_115648.csv', 'test_290622_104052.mp4', 'mean.csv', 'full.pt', 'action.npy', 'eval_log.csv', 'var.csv', 'test_290622_115648.mp4', 'observation.npy', 'train_policy.mp4']
/home/willw/raisim/rsmGymA1/data//a1_locomotion/220628-001611/1450//csv/


In [3]:
ob_dim=38
act_dim=12
cfg = YAML().load(open(f"{DATA_PATH}/{EXP_DATE}/cfg.yaml", 'r'))
actor = ppo_module.MLP(cfg['architecture']['policy_net'], torch.nn.LeakyReLU, ob_dim, act_dim)
actor.load_state_dict(torch.load(f"{BASE_CHECKPOINT_PATH}/full.pt")['actor_architecture_state_dict'])

<All keys matched successfully>

In [4]:
# inspect .pt file
actor_dict = torch.load(f'{BASE_CHECKPOINT_PATH}/full.pt', map_location=torch.device("cpu"))
for i in actor_dict.keys():
    for ky, im in actor_dict[i].items():
        if(hasattr(im, 'shape')):
            print(i,": ",ky,":", im.shape)
#     np.savetxt(f"{outDir}/{i}.csv", actor_dict[i], fmt ='%.4f', delimiter =',')

actor_architecture_state_dict :  architecture.0.weight : torch.Size([128, 38])
actor_architecture_state_dict :  architecture.0.bias : torch.Size([128])
actor_architecture_state_dict :  architecture.2.weight : torch.Size([128, 128])
actor_architecture_state_dict :  architecture.2.bias : torch.Size([128])
actor_architecture_state_dict :  architecture.4.weight : torch.Size([12, 128])
actor_architecture_state_dict :  architecture.4.bias : torch.Size([12])
actor_distribution_state_dict :  std : torch.Size([12])
critic_architecture_state_dict :  architecture.0.weight : torch.Size([128, 38])
critic_architecture_state_dict :  architecture.0.bias : torch.Size([128])
critic_architecture_state_dict :  architecture.2.weight : torch.Size([128, 128])
critic_architecture_state_dict :  architecture.2.bias : torch.Size([128])
critic_architecture_state_dict :  architecture.4.weight : torch.Size([1, 128])
critic_architecture_state_dict :  architecture.4.bias : torch.Size([1])


In [5]:
actor_dict = torch.load(f'{BASE_CHECKPOINT_PATH}/full.pt', map_location=torch.device("cpu"))['actor_architecture_state_dict']
for ky, im in actor_dict.items():
    np.savetxt(f"{outDir}/{ky}.csv", im, fmt ='%.4f', delimiter =',')
    print(ky,":", im.shape)

architecture.0.weight : torch.Size([128, 38])
architecture.0.bias : torch.Size([128])
architecture.2.weight : torch.Size([128, 128])
architecture.2.bias : torch.Size([128])
architecture.4.weight : torch.Size([12, 128])
architecture.4.bias : torch.Size([12])


In [6]:
obMean = np.loadtxt(f'{BASE_CHECKPOINT_PATH}/mean.csv', delimiter =',')
obVar = np.loadtxt(f'{BASE_CHECKPOINT_PATH}/var.csv', delimiter =',')
obStd = np.sqrt(obVar+1e-8)
jointPgain = [50] * 12
jointDgain = [0.2] * 12
actionMean = [-0.1, 0.75, -1.6, 0.1, 0.75, -1.6, -0.1, 0.75, -1.6, 0.1, 0.75, -1.6]
actionStd = [0.3] * 12
actionMrg = [10] * 12

In [23]:
if os.path.exists(f'{outDir}/obStd.csv'):
    os.remove(f'{outDir}/obStd.csv')
if os.path.exists(f'{outDir}/obMean.csv'):
    os.remove(f'{outDir}/obMean.csv')
np.savetxt(f'{outDir}/obMean.csv', obMean, fmt ='%.8f', delimiter =',')
np.savetxt(f'{outDir}/obStd.csv', obStd , fmt ='%.8f', delimiter =',')
np.savetxt(f"{outDir}/jointPgain.csv", jointPgain, fmt ='%.1f', delimiter =',')
np.savetxt(f"{outDir}/jointDgain.csv", jointDgain, fmt ='%.2f', delimiter =',')
np.savetxt(f"{outDir}/acMean.csv", actionMean, fmt ='%.3f', delimiter =',')
np.savetxt(f"{outDir}/acStd.csv", actionStd, fmt ='%.3f', delimiter =',')
np.savetxt(f"{outDir}/acMargin.csv", actionMrg, fmt ='%.5f', delimiter =',')

In [None]:
# Verify that loaded network works the same as training 
obSet = np.load(f'{BASE_CHECKPOINT_PATH}/observation.npy')[:,0,:]
acSet = np.load(f'{BASE_CHECKPOINT_PATH}/action.npy')[:,0,:]
for obs, ac in zip(obSet, acSet):
    action_ll = actor.architecture(torch.from_numpy(obs).cpu())
    print("pyt:", action_ll.detach().numpy())
    print("set:", ac)

In [59]:
np.savetxt(f"{outDir}/obSet.csv", obSet, fmt ='%.4f', delimiter =',')
np.savetxt(f"{outDir}/acSet.csv", acSet, fmt ='%.4f', delimiter =',')

In [7]:
ob=np.array([0.282235, 0.0609965, -0.00398822, 0.99813, -0.0763699, 0.744449, -1.65893, 0.0754569, 0.749086, -1.65433, -0.0330489, 0.67921, -1.74906, 0.0336214, 0.67846, -1.74376, 0.0182143, -0.000910908, 0.0249357, 0.00400675, -0.0124857, 0.00130095, 0, 0, 0, 0.00601475, -0.00498411, 0.173749, -0.00740538, -0.00289552, 0.169396, 0.00322394, 0.0318879, 0.123273, -0.0019017, 0.0352441, 0.116703, 0.03])
ac=np.array([-0.242944, 0.969328, -1.77864, -0.105204, 0.707987, -1.59278, 0.128312, 0.893308, -1.10543, 0.243095, 0.75673, -1.43475])
print(ob.shape)
ob = ((ob-obMean)/obStd).astype(np.float32)
action_ll = actor.architecture(torch.from_numpy(ob).cpu())
print("pyt:", action_ll.detach().numpy()*actionStd+actionMean)
print("Gzb:", ac)
print("Dif:", action_ll.detach().numpy()*actionStd+actionMean-ac)

(38,)
pyt: [-0.24298366  0.96925299 -1.77873883 -0.10515217  0.70804655 -1.59281431
  0.12827864  0.8932572  -1.10546324  0.24303446  0.75673213 -1.4346742 ]
Gzb: [-0.242944  0.969328 -1.77864  -0.105204  0.707987 -1.59278   0.128312
  0.893308 -1.10543   0.243095  0.75673  -1.43475 ]
Dif: [-3.96630821e-05 -7.50083237e-05 -9.88324738e-05  5.18281498e-05
  5.95495586e-05 -3.43100813e-05 -3.33571072e-05 -5.07963018e-05
 -3.32425308e-05 -6.05358791e-05  2.12669790e-06  7.58025646e-05]


In [14]:
np.sqrt(0.707943**2 + 0.370952**2 )

0.7992425649031712