# Process Steps Data for RL Preference Learning

This notebook processes a file containing RL episodes data and generates validation segments and rewards for preference learning.

## Import Required Libraries

In [None]:
import torch
import einops
from pref_rl.utils.pref import Sampler
import os

## Set the Path to the Steps File

Update the path below to point to your `steps.pkl` file.

In [None]:
# Set the path to your steps.pkl file
steps_file = '../data/validation_steps.pkl'

# Verify the file exists
if not os.path.exists(steps_file):
    raise FileNotFoundError(f"The file {steps_file} does not exist")
print(f"File found: {steps_file}")

## Load and Examine the Steps Data

In [None]:
# Load the steps data
print(f"Loading steps from {steps_file}...")
steps = torch.load(steps_file)

# Examine the keys and shapes
print("Keys in steps:", steps.keys())
print("\nShapes:")
for key in steps.keys():
    print(f"  {key}: {steps[key].shape}")

## Initialize the Sampler

Initialize the `Sampler` with the appropriate dimensions from the data.

In [None]:
# Extract dimensions from the data
obs_dim = steps['obs'].shape[-1]
action_dim = steps['actions'].shape[-1]
segment_length = 50

print(f"Initializing Sampler with parameters:")
print(f"  segment_length: {segment_length}")
print(f"  obs_dim: {obs_dim}")
print(f"  action_dim: {action_dim}")

sampler = Sampler(segment_length, obs_dim, action_dim)

## Concatenate Observations, Actions, and Rewards

In [None]:
print("Concatenating obs, actions, and rewards...")

# Ensure rewards are of the right shape by adding an extra dimension
rewards_reshaped = steps['rewards'].unsqueeze(-1)
print(f"  Original rewards shape: {steps['rewards'].shape}")
print(f"  Reshaped rewards shape: {rewards_reshaped.shape}")

# Concatenate along the last dimension
eps = torch.cat([steps['obs'], steps['actions'], rewards_reshaped], dim=-1)
print(f"\nCombined shape: {eps.shape}")

## Reshape Episodes

Use einops to reshape the episodes as shown in the original code.

We remove the last 8 rollout buffers to make the number of saved rollout buffers divisible by 2, so that we can merge two rollout buffers each of size 500 to a single episode of 1000 steps.

In [None]:
print("Reshaping episodes...")
print(f"  Original shape: {eps.shape}")
print(f"  Taking all but the last element: {eps[:-1].shape}")

# Reshape using einops
reshaped_eps = einops.rearrange(eps[:-1], '(a aa) b c d -> (a c) (aa b) d', aa=2)
print(f"  Reshaped shape: {reshaped_eps.shape}")

## Save Reshaped Episodes (Intermediate Result)

In [None]:
# Save the reshaped episodes
reshaped_file = '../data/validation_episodes.pkl'
print(f"Saving reshaped episodes to {reshaped_file}...")
torch.save(reshaped_eps, reshaped_file)
print(f"Saved successfully!")

## Sample Segments

Use the `sample_segments` method to sample segments from the reshaped episodes.

In [None]:
print("Sampling segments...")
num_segments = 5000
print(f"  Number of segments: {num_segments}")
print(f"  Sampling method: uniform")

sa, r = sampler.sample_segments(reshaped_eps, num_segments, 'uniform', None, True)
print(f"  Segments shape: {sa.shape}")
print(f"  Rewards shape: {r.shape}")

## Save Segments and Rewards

In [None]:
# Save the segments and rewards
segments_file = '../data/validation_segments.pkl'
rewards_file = '../data/validation_rewards.pkl'

print(f"Saving segments to {segments_file}...")
torch.save(sa.contiguous(), segments_file)

print(f"Saving rewards to {rewards_file}...")
torch.save(r.contiguous(), rewards_file)

print("Processing completed successfully!")

## Optional: Verify Saved Files

In [None]:
# Check that the files were created and show their sizes
files_to_check = [reshaped_file, segments_file, rewards_file]
for file in files_to_check:
    if os.path.exists(file):
        size_mb = os.path.getsize(file) / (1024 * 1024)
        print(f"{file} - Size: {size_mb:.2f} MB")
    else:
        print(f"{file} not found")