# Evaluate STAIR's Performance in Stage-alignment

In [None]:
import os
import numpy as np
import pandas as pd
import random
from IPython.display import display
import time
from collections import defaultdict

# setting
env_name = 'window-close'  # ['door-open', 'window-close', 'window-open']
long_env_name = 'metaworld_' + env_name + '-v2'
METHODS = ['pebble', 'stair']

In [None]:
def load_queries(env_name, long_env_name, method):
    queries_path = f'./query/{env_name}/{method}/'
    if not os.path.exists(queries_path):
        raise FileNotFoundError(f"Queries path {queries_path} does not exist.")
    
    queries = []
    groups = defaultdict(dict)

    
    for root, dirs, files in os.walk(queries_path):
        for file in files:
            if file.endswith(".gif"):
                
                name = os.path.splitext(file)[0]
                parts = name.split('_')
                
                env_name = parts[0]
                method = parts[1]
                seed = int(parts[2].replace("seed", ""))     
                step = int(parts[3])
                count = int(parts[4])
                query_id = parts[5]                          
                en_idx = int(parts[6].replace("en", ""))     
                start_index = int(parts[7])
                group_key = (env_name, method, seed, step, count)
                
                groups[group_key][query_id] = {
                    "en_idx": en_idx,
                    "start_idx": start_index,
                    "video_path": os.path.join('.', 'query', env_name, method, file)
                }

    
    for group_key, pairs in groups.items():
        if "0" in pairs and "1" in pairs:  
            q0 = pairs["0"]
            q1 = pairs["1"]
            queries.append({
                "seed": group_key[2],
                "step": group_key[3],
                "en_idx0": q0["en_idx"],
                "start_idx0": q0["start_idx"],
                "video_path0": q0["video_path"],
                "en_idx1": q1["en_idx"],
                "start_idx1": q1["start_idx"],
                "video_path1": q1["video_path"]
            })

    
    print(f"Generated {len(queries)} query pairs")
    print(queries[0])  
    return queries

obvious_step = {
    "door-open": 500000,
    "window-close": 300000, 
    "window-open": 300000,
}

In [None]:
queries_dict = {
    method: load_queries(env_name, long_env_name, method)
    for method in METHODS
}

import datetime
def getDataTimeString():
    return datetime.datetime.now().strftime('%Y%m%d-%H%M%S')[2:]


base_path = f"./human_label/{env_name}/"
if not os.path.exists(base_path):
    os.makedirs(base_path)
output_csv = os.path.join(base_path, f"human_fair_{getDataTimeString()}.csv")
if os.path.exists(output_csv):
    df = pd.read_csv(output_csv)
else:
    df = pd.DataFrame(columns=[
        'method',
        'segment0_start_idx', 'segment0_episode', 
        'segment1_start_idx', 'segment1_episode', 
        'is_stage_aligned', 
    ])

## Prompt

### door-open

The target behavior is that the robot arm smoothly rotates the door until it stays fully open at a clearly visible angle.

### window-open
The target behavior is that the window slides horizontally to a clearly open position with coordinated gripper guidance.

### window-close
The target behavior is that the window slides horizontally to a clearly close position with coordinated gripper guidance.

In [None]:
from IPython.display import clear_output, HTML, Image
import base64

N_QUERIES = 10
method_idxs = list(range(0, len(METHODS))) * N_QUERIES
random.shuffle(method_idxs)
n_all_queries = len(method_idxs)

for i in range(n_all_queries):
    clear_output()
    print(f"{i+1}th among total {n_all_queries} feedbacks")
    method = METHODS[method_idxs[i]]
    queries = queries_dict[method]
    query = random.sample(queries, 1)[0]
    while int(query['step']) < obvious_step[env_name]:
        query = random.sample(queries, 1)[0]

    time.sleep(0.1)
    display(HTML(f'''
    <div style="display: inline-block; margin-right: 100px;">
        <p> Please enter '1' for stage-alignment, '0' for misalignment. </p>
        <img src="{query['video_path0']}" width="400" loop="true" >
        <img src="{query['video_path1']}" width="400" loop="true" >
    </div>
    '''))

    time.sleep(1)
    select = input("Please enter '1' for stage-alignment, '0' for misalignment.")
    if select == 'quit' or select == 'exit':
        break
    elif select not in ['1', '0']:
        print("Invalid input. Please enter '1' for stage-alignment, '0' for misalignment.")
        continue
    new_entry = {
        'method': method,
        'segment0_start_idx': query['start_idx0'],
        'segment0_episode': query['en_idx0'],
        'segment1_start_idx': query['start_idx1'],
        'segment1_episode': query['en_idx1'],
        'is_stage_aligned': select
    }
    df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)

    df.to_csv(output_csv, index=False)
    print(f"\nresult save to {output_csv}")


df.to_csv(output_csv, index=False)
print(f"\nresult save to {output_csv}")