In [1]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import sys

sys.path.append("..")
from demo_2_awac import och_2_awac

DATA_DIR = '/usr/local/google/home/bkinman/proj/rpl_reset_free/20201005_slider_play_reprocessed'

def create_awac_dict_from_demo_pkls(data_dir):
    full_awac_dict = None
    glob_str = os.path.join(data_dir, 'recording?.pkl')
    pkl_files = [f for f in glob.glob(glob_str)]
    for f in pkl_files:
        data_path = os.path.join(data_dir, f)
        data = pickle.load(open(data_path,'rb'))
        awac_formatted_list = och_2_awac(data)
        for entry_dict in awac_formatted_list:
            if not full_awac_dict:
                full_awac_dict = entry_dict 
            else:
                for k, v in entry_dict.items():
                    full_awac_dict[k] = np.concatenate((full_awac_dict[k], v), axis=0)
    return full_awac_dict

def relabel_bc(trajs: dict, window_size = 100):
    goal_size = 25
    paths = []
    num_idxs = trajs['observations'].shape[0]
    for idx_start in range(num_idxs - window_size - 1):
        path = {}
        # Windowed observations
        ob = trajs['observations'][idx_start:idx_start + window_size].copy()
        next_ob = trajs['observations'][idx_start + 1:idx_start + window_size + 1].copy()
        # Last observations
        goals = np.repeat([trajs['observations'][idx_start + window_size - 1]], len(ob), axis=0)
        ob[:, goal_size:] = goals[:, :goal_size]
        next_ob[:, goal_size:] = goals[:, :goal_size]
        path['observations'] = ob.copy()
        path['full_observations'] = ob.copy()
        path['next_observations'] = next_ob.copy()
        path['full_next_observations'] = next_ob.copy()
        path['actions'] = trajs['actions'][idx_start:idx_start + window_size].copy()
        reward = np.zeros((len(ob), 1))
        reward[-1] = 1.0
        path['rewards'] = reward
        terminals = np.zeros((len(ob),), dtype=np.bool)
        terminals[-1] = True
        path['terminals'] = terminals
        path['env_infos'] = [{}]*len(ob)
        path['agent_infos'] = [{}] * len(ob)
        paths.append(path)
    return paths

def relabel_bc_strided(trajs: dict, window_size = 100, stride=10):
    """ Strided relabeling procedure produces less data."""
    goal_size = 25
    paths = []
    num_idxs = trajs['observations'].shape[0]
    for idx_start in range(num_idxs - window_size - 1):
        path = {}
        # Windowed observations
        ob = trajs['observations'][idx_start:idx_start + window_size][::stride].copy()
        next_ob = trajs['observations'][idx_start + 1:idx_start + window_size + 1][::stride].copy()
        # Last observations
        goals = np.repeat([trajs['observations'][idx_start + window_size - 1]], len(ob), axis=0)
        ob[:, goal_size:] = goals[:, :goal_size]
        next_ob[:, goal_size:] = goals[:, :goal_size]
        path['observations'] = ob.copy()
        path['full_observations'] = ob.copy()
        path['next_observations'] = next_ob.copy()
        path['full_next_observations'] = next_ob.copy()
        path['actions'] = trajs['actions'][idx_start:idx_start + window_size][::stride].copy()
        reward = np.zeros((len(ob), 1))
        reward[-1] = 1.0
        path['rewards'] = reward
        terminals = np.zeros((len(ob),), dtype=np.bool)
        terminals[-1] = True
        path['terminals'] = terminals
        path['env_infos'] = [{}]*len(ob)
        path['agent_infos'] = [{}] * len(ob)
        paths.append(path)
    return paths

def compute_window_size(obs, thresh_low=4.45, thresh_high=10e6, debug_plot = False):
    """ Computes the window size, which is the essentially the average duration of each episode.
    """
    thresh = ((obs > thresh_low) & (obs < thresh_high))*1.0
    grad = np.gradient(thresh)
    last_rise = 0
    deltas_ts = []
    for ts in range(len(grad)):
        if grad[ts] > 0:
            last_rise = ts
        if grad[ts] < 0:
            deltas_ts.append((last_rise, ts))
    mid_ts = np.array([((b-a)/2+a) for a,b in deltas_ts]).astype(np.int32)
    if debug_plot:
        mid_pnts = np.zeros(len(obs))
        mid_pnts[mid_ts] = 1
        plt.figure(figsize=(30, 5))
        plt.plot(thresh)
        plt.plot(mid_pnts)
        plt.plot(obs-np.amin(obs))
    mean_episode_len = np.mean(mid_ts[1:] - mid_ts[:-1])
    return int(mean_episode_len)

## Load demo data, convert to AWAC format, and relabel for Behavior Cloning

In [2]:
full_awac_dict = create_awac_dict_from_demo_pkls(DATA_DIR)
sliding_cabinet_obs = full_awac_dict['observations'][:,1]
window_size = compute_window_size(sliding_cabinet_obs)
bc_training_data = relabel_bc_strided(full_awac_dict, window_size)
lens = sum(len(a['observations']) for a in bc_training_data)
print(f'bc_num_pretrain_steps should be {(int(lens/1000)+1)*1500} steps')
print(f'q_num_pretrain2_steps should be {(int(lens/1000)+1)*3000} steps')
output_path = os.path.join(DATA_DIR, 'bc_train_strided.pkl')
pickle.dump(bc_training_data, open(output_path,'wb'))

bc_num_pretrain_steps should be 789000 steps
q_num_pretrain2_steps should be 1578000 steps


## Reprocess Demo Data
When the demo data was collected, the observation vector was the incorrect size, and contained incorrect values (should have been zero initialized).
The following routine opens the original demo vectors and corrects this. To prevent accidental overwriting of data, data will be dumped to a new directory alongside DATA_DIR.

In [23]:
def reprocess_demo_data(data_dir):
    output_dirname = os.path.basename(os.path.normpath(DATA_DIR))+'_reprocessed'
    output_dir = os.path.join(DATA_DIR, '..', output_dirname)
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    glob_str = os.path.join(DATA_DIR, 'recording?.pkl')
    pkl_files = [f for f in glob.glob(glob_str)]
    for f in pkl_files:
        data = pickle.load(open(f, 'rb'))
        for episode in data:
            for step in episode:
                step['obs'] = np.concatenate((step['obs'][:25], np.zeros(25)), axis=0)
        pickle.dump(data, open(os.path.join(output_dir, os.path.basename(f)), 'wb'))
reprocess_demo_data(DATA_DIR)

In [3]:
window_size

209