## Data Preparation for one file ##

In [1]:
import numpy as np
import os 
import cv2
## tfds build --overwrite

collected_data_feedback_path = 'data/simulation_dataset_1.npy'

## load data 
data = np.load(collected_data_feedback_path, allow_pickle=True).item()
episode_length = 80

for key, value in data.items():
    # print (len(value["image"]))
    # print ("action", value["action"])
    if len(value['image']) < episode_length:
        ## pad the data with the 0 antion and last image
        last_image = value['image'][-1]
        ## save the last_image as a png 
        # print (last_image.shape)
        # cv2.imwrite(f"last_image_{key}.png", last_image)
        # input()
        last_action = np.zeros(7)
        for i in range(episode_length - len(value["image"])):
            value['image'].append(last_image)
            value['action'].append(last_action)
    value['image'] = [cv2.cvtColor(np.array(image).astype(np.uint8), cv2.COLOR_BGR2RGB) for image in value['image']]
    value['action'] = [np.array(action).astype(np.float32) for action in value['action']]
    data[key] = value


training_save_path = f'data/training_data_{len(data)}'
os.mkdir(training_save_path) if not os.path.exists(training_save_path) else None 
val_save_path = f'data/validation_data_{len(data)}'
os.mkdir(val_save_path) if not os.path.exists(val_save_path) else None

for index in range(0, int(len(data))):
    episode = []
    for step in range(episode_length):
        episode.append({
            'image': data[f"data_{index}"]['image'][step],
            'action': data[f"data_{index}"]['action'][step],
            'language_instruction': 'lift cube',
        })
    # np.save(training_save_path + f'/episode_{index}', episode)
    # np.save(val_save_path + f'/episode_{index}', episode)
    if index < int(0.8 * len(data)):
        np.save(training_save_path + f'/episode_{index}', episode)
    else:
        np.save(val_save_path + f'/episode_{index-int(0.8 * len(data))}', episode)
  


## Combine Multiple Data into Training dataset ##

In [2]:
import numpy as np
import os 
import cv2
## tfds build --overwrite

data_feedback_path_1 = 'data/simulation_dataset_1200.npy'
data_feedback_path_2 = 'data/simulation_dataset_2000.npy'
data_feedback_path_3 = 'data/simulation_dataset_2200.npy'

## load data 
data_1 = np.load(data_feedback_path_1, allow_pickle=True).item()
data_2 = np.load(data_feedback_path_2, allow_pickle=True).item()
data_3 = np.load(data_feedback_path_3, allow_pickle=True).item()
episode_length = 80

## stack all data together
data = {}
for key, value in data_1.items():
    data[key] = value
data_length = len(data)
for key, value in data_2.items():
    number = key.split("_")[1]
    data[f"data_{int(number) + data_length}"] = value
data_length = len(data)
for key, value in data_3.items():
    number = key.split("_")[1]
    data[f"data_{int(number) + data_length}"] = value

print (len(data_1), len(data_2), len(data_3), len(data))

for key, value in data.items():
    # print (len(value["image"]))
    if len(value['image']) < episode_length:
        ## pad the data with the 0 antion and last image
        last_image = value['image'][-1]
        ## save the last_image as a png 
        # print (last_image.shape)
        # cv2.imwrite(f"last_image_{key}.png", last_image)
        # input()
        last_action = np.zeros(7)
        for i in range(episode_length - len(value["image"])):
            value['image'].append(last_image)
            value['action'].append(last_action)
    value['image'] = [cv2.cvtColor(np.array(image).astype(np.uint8), cv2.COLOR_BGR2RGB) for image in value['image']]
    value['action'] = [np.array(action).astype(np.float32) for action in value['action']]
    data[key] = value


training_save_path = f'data/training_data_{len(data)}'
os.mkdir(training_save_path) if not os.path.exists(training_save_path) else None 
val_save_path = f'data/validation_data_{len(data)}'
os.mkdir(val_save_path) if not os.path.exists(val_save_path) else None

for index, key in enumerate(data.keys()):
    episode = []
    for step in range(episode_length):
        episode.append({
            'image': data[key]['image'][step],
            'action': data[key]['action'][step],
            'language_instruction': 'lift cube',
        })
    if index < int(0.8 * len(data)):
        np.save(training_save_path + f'/episode_{index}', episode)
    else:
        np.save(val_save_path + f'/episode_{index-int(0.8 * len(data))}', episode)
  


1139 1928 2114 5181


: 

## DPO Data Generation ##

In [4]:
import numpy as np
import os 
import cv2
## tfds build --overwrite

collected_data_feedback_path = 'data/simulation_dataset_10.npy'

## load data 
data = np.load(collected_data_feedback_path, allow_pickle=True).item()
episode_length = 80

for key, value in data.items():
    # print (len(value["image"]))
    # print ("action", value["action"])
    if len(value['image']) < episode_length:
        ## pad the data with the 0 antion and last image
        last_image = value['image'][-1]
        last_action = np.array([0, 0, 0, 0, 0, 0, -1])
        for i in range(episode_length - len(value["image"])):
            value['image'].append(last_image)
            value['action'].append(last_action)
    value['image'] = [cv2.cvtColor(np.array(image).astype(np.uint8), cv2.COLOR_BGR2RGB) for image in value['image']]
    value['action'] = [np.array(action).astype(np.float32) for action in value['action']]
    data[key] = value


training_save_path = f'data/dpo_training_data_{len(data)}'
os.mkdir(training_save_path) if not os.path.exists(training_save_path) else None 
val_save_path = f'data/dpo_validation_data_{len(data)}'
os.mkdir(val_save_path) if not os.path.exists(val_save_path) else None

all_data = []
for index in range(0, int(len(data))):
    episode_data = []
    for step in range(episode_length):
        episode_data.append({
            'image': data[f"data_{index}"]['image'][step],
            'action': data[f"data_{index}"]['action'][step],
            'language_instruction': 'lift cube',
            'reward': data[f"data_{index}"]['rewards']
        })
    all_data.append(episode_data)
        
all_pairs = {}
all_pairs['prompt'] = []
all_pairs['chosen'] = []
all_pairs['rejected'] = []

print (len(all_data))

print (all_data[0][0]['image'].shape)

for index, sample_1 in enumerate(all_data):
    for sample_2 in all_data[index:]:
        assert sample_1[0]['image'].all() == sample_2[0]['image'].all()
        # take 0 language_instruction because they all the same, take 0 image because we focus on same initial state
        all_pairs['prompt'].append([sample_1[0]['language_instruction'], sample_1[0]['image']])
        all_pairs['chosen'].append(sample_1['action']) if sample_1['reward'] > sample_2['reward'] else all_pairs['chosen'].append(sample_2['action'])
        all_pairs['rejected'].append(sample_2['action']) if sample_1['reward'] > sample_2['reward'] else all_pairs['rejected'].append(sample_1['action'])


print (all_pairs)
# if index < int(0.8 * len(data)):
#     np.save(training_save_path + f'/episode_{index}', episode)
# else:
#     np.save(val_save_path + f'/episode_{index-int(0.8 * len(data))}', episode)
  


7
(256, 256, 3)


TypeError: list indices must be integers or slices, not str