# Process Steps Data for RL Preference Learning

This notebook processes a file containing RL episodes data and generates validation segments and rewards for preference learning.

## Import Required Libraries

In [None]:
import torch
import einops
from pref_rl.utils.pref import Sampler
import os

## Set Constants

In [None]:
OBS_DIM = 78
ACT_DIM = 12

## Set the Path to the Steps File

Update the path below to point to your `steps.pkl` file.

In [None]:
# Set the path to your steps.pkl file
steps_file = '../data/validation_steps.pkl'

# Verify the file exists
if not os.path.exists(steps_file):
    raise FileNotFoundError(f"The file {steps_file} does not exist")
print(f"File found: {steps_file}")

## Load and Examine the Steps Data

In [None]:
# Load the steps data
print(f"Loading steps from {steps_file}...")
with open(steps_file, 'rb') as f:
    import pickle
    deque_eps = pickle.load(open(steps_file, 'rb'))

eps = torch.stack(list(deque_eps))
print(f"Loaded {len(eps)} episodes.")
print(f"Stacked episodes shape: {eps.shape}")

print(f"Episode {0:2} shape: {eps[0].shape}")
print(f"Episode {len(eps)-1} shape: {eps[-1].shape}")

In [None]:
# Split episodes into observations, actions, and rewards
print("Splitting episodes into (obs, actions, rewards)...")

# Split obs, actions, rewards based on dimensions
obs = eps[:, :, :OBS_DIM]
actions = eps[:, :, OBS_DIM:OBS_DIM + ACT_DIM]
rewards = eps[:, :, -1]

print(f"Observations shape: {obs.shape}")
print(f"Actions shape: {actions.shape}")
print(f"Rewards shape: {rewards.shape}")

steps = {
    'obs': obs,
    'actions': actions,
    'rewards': rewards
}

## Initialize the Sampler

Initialize the `Sampler` with the appropriate dimensions from the data.

In [None]:
segment_length = 50

print(f"Initializing Sampler with parameters:")
print(f"  segment_length: {segment_length}")
print(f"  obs_dim: {OBS_DIM}")
print(f"  action_dim: {ACT_DIM}")

sampler = Sampler(segment_length, OBS_DIM, ACT_DIM)

## Sample Segments

Use the `sample_segments` method to sample segments from the reshaped episodes.

In [None]:
print("Sampling segments...")
num_segments = 5000
print(f"  Number of segments: {num_segments}")
print(f"  Sampling method: uniform")

sa, r = sampler.sample_segments(eps, num_segments, 'uniform', None, True)
print(f"  Segments shape: {sa.shape}")
print(f"  Rewards shape: {r.shape}")

## Save Segments and Rewards

In [None]:
data_file = '../data/validation_data.pkl'

print(f"Saving segments and rewards to {data_file}...")
torch.save([sa.contiguous(), r.contiguous()], data_file)

print("Processing completed successfully!")

## Optional: Verify Saved Files

In [None]:
# Check that the files were created and show their sizes
files_to_check = [data_file]
for file in files_to_check:
    if os.path.exists(file):
        size_mb = os.path.getsize(file) / (1024 * 1024)
        print(f"{file} - Size: {size_mb:.2f} MB")
    else:
        print(f"{file} not found")