In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import pickle
from glob import glob
from pathlib import Path
import os
from tqdm import tqdm
import pandas as pd
import zarr
import numpy as np

from src.models.vision import get_encoder
from src.data_processing.process_demos import encode_demo
from src.visualization.render_mp4 import create_mp4

In [5]:
base_dir = Path(os.environ["ROLLOUT_SAVE_DIR"])

rollout_dir = base_dir / "raw" / "sim_rollouts"

file_path = rollout_dir / "index.csv"

## Index the raw rollout data

Now done in a standalone script `src.data_processing.index_rollouts`

## Augment an existing Zarr array with new data from the index

Now done in a standalone script `src.data_processing.augment_dataset`

## Merge together the two index files

In [14]:
old_index = pd.read_csv("/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/index.csv")
old_index

Unnamed: 0,path,furniture,success
0,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
1,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
2,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
3,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
4,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
...,...,...,...
8809,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
8810,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
8811,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
8812,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False


In [16]:
new_index = pd.read_csv(file_path)

new_index

Unnamed: 0,path,furniture,success
0,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_7.pkl,one_leg,False
1,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_0.pkl,one_leg,False
2,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_1.pkl,one_leg,False
3,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_6.pkl,one_leg,False
4,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_5.pkl,one_leg,False
...,...,...,...
8814,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_4.pkl,one_leg,False
8815,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_3.pkl,one_leg,False
8816,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_2.pkl,one_leg,False
8817,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_5.pkl,one_leg,False


In [12]:
old_index.path = old_index.path.str.replace("/data/pulkitag/data/ankile/furniture-data/data", "/data/scratch-oc40/pulkitag/ankile/furniture-data")

old_index.path[0]

'/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_7.pkl'

In [15]:
# Merge the two index files
new_index = pd.concat([old_index, new_index], axis=0, ignore_index=True).reset_index()

new_index

Unnamed: 0,index,path,furniture,success
0,0,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
1,1,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
2,2,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
3,3,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
4,4,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
...,...,...,...,...
9320,9320,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
9321,9321,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
9322,9322,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
9323,9323,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False


In [16]:
# Save the new index back to the original file
new_index.to_csv(file_path)

In [17]:
new_index.success.mean()

0.07184986595174263

## Diagnose the new index file

In [12]:
# Change the maximum width of a pandas columnn
pd.set_option('max_colwidth', None)

In [21]:
# Now, after more indexing, see again how it looks
new_index = pd.read_csv(file_path)

new_index

Unnamed: 0,path,furniture,success
0,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_7.pkl,one_leg,False
1,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_0.pkl,one_leg,False
2,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_1.pkl,one_leg,False
3,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_6.pkl,one_leg,False
4,/data/pulkitag/data/ankile/furniture-data/data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_5.pkl,one_leg,False
...,...,...,...
8815,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_3.pkl,one_leg,False
8816,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_2.pkl,one_leg,False
8817,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_5.pkl,one_leg,False
8818,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_6.pkl,one_leg,False


In [22]:
# Update the paths of the old rollouts
new_index.path = new_index.path.str.replace("/data/pulkitag/data/ankile/furniture-data/data", "/data/scratch-oc40/pulkitag/ankile/furniture-data")

new_index

Unnamed: 0,path,furniture,success
0,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_7.pkl,one_leg,False
1,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_0.pkl,one_leg,False
2,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_1.pkl,one_leg,False
3,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_6.pkl,one_leg,False
4,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_5.pkl,one_leg,False
...,...,...,...
8815,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_3.pkl,one_leg,False
8816,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_2.pkl,one_leg,False
8817,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_5.pkl,one_leg,False
8818,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_6.pkl,one_leg,False


In [23]:
new_index.path.nunique()

8820

In [24]:
# Write the file back to the same file
print(f"Writing back to path {file_path}")
new_index.to_csv(file_path, index=False)

Writing back to path /data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/index.csv


In [26]:
# Read back to double check
pd.read_csv(file_path)

Unnamed: 0,path,furniture,success
0,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_7.pkl,one_leg,False
1,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_0.pkl,one_leg,False
2,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_1.pkl,one_leg,False
3,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_6.pkl,one_leg,False
4,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-02_18-33-01/rollout_5.pkl,one_leg,False
...,...,...,...
8820,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_0.pkl,one_leg,False
8821,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_23-38-45/rollout_7.pkl,one_leg,False
8822,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_20-14-31/rollout_0.pkl,one_leg,False
8823,/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/2024-01-09_20-14-31/rollout_1.pkl,one_leg,False


In [41]:
bad_start = [i for i, elem in enumerate(new_index["Unnamed: 0"]) if elem.startswith("/data")][0]

new_index.iloc[bad_start:, :]

Unnamed: 0.1,Unnamed: 0,path,furniture,success
12084,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False,
12085,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False,
12086,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False,
12087,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False,
12088,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False,
...,...,...,...,...
12167,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False,
12168,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,True,
12169,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False,
12170,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False,


In [42]:
new_index.iloc[bad_start:, 1:4] = new_index.iloc[bad_start:, 0:3].values

In [43]:
new_index.iloc[bad_start:, :]

Unnamed: 0.1,Unnamed: 0,path,furniture,success
12084,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12085,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12086,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12087,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12088,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
...,...,...,...,...
12167,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12168,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,True
12169,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12170,/data/scratch-oc40/pulkitag/ankile/furniture-d...,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False


In [44]:
new_index2 = new_index.drop(columns=["Unnamed: 0"])

In [45]:
new_index2

Unnamed: 0,path,furniture,success
0,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
1,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
2,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
3,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
4,/data/pulkitag/data/ankile/furniture-data/data...,one_leg,False
...,...,...,...
12167,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12168,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,True
12169,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12170,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False


In [51]:
new_index2.path = new_index2.path.str.replace("/data/pulkitag/data/ankile/furniture-data/data", "/data/scratch-oc40/pulkitag/ankile/furniture-data")

new_index2

Unnamed: 0,path,furniture,success
0,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
1,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
2,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
3,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
4,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
...,...,...,...
12167,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12168,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,True
12169,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False
12170,/data/scratch-oc40/pulkitag/ankile/furniture-d...,one_leg,False


In [52]:
new_index2.to_csv("/data/scratch-oc40/pulkitag/ankile/furniture-data/raw/sim_rollouts/index.csv", index=False)

In [58]:
new_index2.success.astype(bool).sum()

984