In [None]:
%reload_ext autoreload
%autoreload 2

import json
import yaml
import types
import open_clip
import torch
import cv2
import numpy as np
import matplotlib as mpl
from scipy.spatial import KDTree
import peek
import tqdm

import sys
from utils.db_utils import get_df, get_data, connect_db, DB
from utils.plotly_utils import *
from utils.vis import *
from utils.predict_scenegraph import PredictSceneGraph
from utils.imagine_nav_planner import ImagineNavPlanner
from utils.scene_graph_utils import update_region
from experiments.test_scenegraph_offline import evaluate_sg
from constants import *

dump_folder = './dump/prediction_may13/'
output_folder = f'{dump_folder}/objectnav-dino'

# load results
print(f'Loading results from {output_folder}/result.db')
args = types.SimpleNamespace(**json.load(open(f'{dump_folder}/args.json')))
results = get_df(f'{output_folder}/result.db', 'result')
print(f'Loaded {len(results)} results')
print(f'Current success rate: {results.tail(1)["success"].values[0]/len(results):.2%}')
print(f'Current SPL: {results["spl"].mean():.2f}')

# load agent modules
print(f'Loading agent modules')
device = torch.device("cuda")
args = types.SimpleNamespace(**json.load(open(f'{dump_folder}/args.json')))
with open(f'{dump_folder}/{args.exp_config}') as f:
    exp_config = yaml.safe_load(f)
clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
    "ViT-H-14", "laion2b_s32b_b79k"
)
clip_model = clip_model.to(device).half()
clip_tokenizer = open_clip.get_tokenizer("ViT-H-14")
clip_model_list = (clip_model, clip_preprocess, clip_tokenizer)

# access db
import os
import pathlib
with DB(f'{output_folder}/result.db') as con:
    table = con.table('result')
    print(table)
# episode infos
# steps_df = get_df(f'{output_folder}/result.db', 'result', select=['count_steps', 'episode', 'target', 'habitat_success', 'switch_upstair_count', 'switch_downstair_count'])
steps_df = get_df(f'{output_folder}/result.db', 'result', select=['count_steps', 'episode', 'target', 'habitat_success', 'switch_upstair_count', 'switch_downstair_count'])
# print(steps_df.head(30))
# step infos
sample_episode_label = steps_df['episode'].values[0]
with DB(f'{output_folder}/steps/{sample_episode_label}.db') as con:
    table = con.table('step_data')
    print(table)

In [None]:

def get_sg_data(episode_label, step=None):
    if step is None:
        filter = lambda x: (x['episode_label']==episode_label) & (x['step']%5==0)
    else:
        filter = lambda x: (x['episode_label']==episode_label)
    try:
        data = get_data(
            f'{output_folder}/steps/{episode_label}.db',
            'step_data',
            filter=filter,
            select=[
                'timestamp',
                'step',
                'episode_label',
                'cate_object',
                'origins_grid',
                'current_grid_pose',
                'camera_position_tensor',
                'global_scene_graph_pickle',
                'gt_scenegraph',
                'predicted_global_scene_graph_pickle',
                'global_bev_rgb_map_tensor',
                # fbe
                'traversible_map_tensor',
                'occupancy_map_tensor',
                'frontier_candidate_list',
            ]
        )
    except Exception as e:
        print(e)
        return None
    # data = data[np.argmax([x['timestamp'] for x in data])]
    return data

def check_step(step):
    keys = ['gt_scenegraph', 'global_scene_graph']
    for key in keys:
        if key not in step or step[key] is None:
            return False
    return True

args = types.SimpleNamespace(**json.load(open(f'{dump_folder}/args.json')))

count_episode = 0
valid_steps = []
for i in range(len(results)):
    episode_label = results['episode'].iloc[i]
    print(f'Processing episode {episode_label}')
    data = get_sg_data(episode_label)
    if data is None:
        # print(f'No data for episode {episode_label}')
        continue
    count_episode += 1
    # find the last step with valid sg
    for step in data[::-1]:
        if check_step(step):
            valid_steps.append(step)
            break
    if len(valid_steps) >= 5:
        break
print(f'Loaded {count_episode} episodes')
print(f'Loaded {len(valid_steps)} sg')


In [None]:
os.environ["GPT_API_KEY"] = "sk-Sg58hfwiUGtMak37De998909A76c44E88f7dF36730Dd29B4"
step = valid_steps[4]
print(f'episode {step["episode_label"]} step {step["step"]}')
grid_size = args.map_resolution
map_size = args.map_size_cm/grid_size
origins_grid = step['origins_grid']
camera_position = step['camera_position'][:3, 3]
target = step['cate_object']

obs_sg = step['global_scene_graph']
obs_regions = [update_region(region, grid_size, origins_grid) for room in obs_sg['rooms'] for region in room['regions']]
obs_objects = [obj for region in obs_regions for obj in region['objects']]


gt_sg = step['gt_scenegraph']
floor_avg_heights = [floor['floor_avg_height'] for floor in gt_sg['floors']]
floor_id = np.argmin(np.abs(np.array(floor_avg_heights) - camera_position[1]))
for room in gt_sg['floors']:
    for region in room['regions']:
        region['caption'] = gt_region_captions_map[region['caption']]
gt_regions = [update_region(region, grid_size, origins_grid) for region in gt_sg['floors'][floor_id]['regions']]
gt_objects = [obj for region in gt_regions for obj in region['objects']]

# fbe
frontier_candidate_list = step['frontier_candidate_list']
current_grid_pose = step['current_grid_pose']
traversible_map = step['traversible_map']
occupancy_map = step['occupancy_map']
global_bev_rgb_map = step['global_bev_rgb_map']

imagine_nav_planner = ImagineNavPlanner(args, exp_config, clip_model_list)
scene_graph = imagine_nav_planner.scene_graph
imagine_nav_planner.origins_grid = origins_grid


scene_graph.grid_size = grid_size
scene_graph.scene_graph = copy.deepcopy(obs_sg)
imagine_nav_planner.set_obj_goal(target)
imagine_nav_planner.set_step(step["step"], episode_label)
scores, best_frontier_id, exploration_scores = imagine_nav_planner.fbe(
    frontier_candidate_list,
    current_grid_pose,
    traversible_map,
    occupancy_map,
    global_bev_rgb_map,
    gt_sg['floors'][floor_id],
)

pred_sg = imagine_nav_planner.predicted_global_scene_graph
pred_regions = [update_region(region, grid_size, origins_grid) for room in pred_sg['rooms'] for region in room['regions']]
pred_objects = [obj for region in pred_regions for obj in region['objects']]

In [None]:
peek(scene_graph._llm_prompt)

In [None]:
print(scene_graph._response)

In [None]:
reasoning = [f'{region["id"]} {region["caption"]}: {region["reasoning"]}' for room in scene_graph.predicted_sg['rooms'] for region in room['regions']]
reasoning

In [None]:
peek(target)

vis = create_fig(img=step['global_bev_rgb_map'][...,::-1])
vis.update_layout(width=1000, height=1000)
vis = plot_region(vis, pred_regions, map_size, show_objects=True, min_region_size=1)
vis.show()

gt_vis = create_fig(img=step['global_bev_rgb_map'][...,::-1])
gt_vis.update_layout(width=1000, height=1000)
gt_vis = plot_region(gt_vis, gt_regions, map_size, show_objects=True, min_region_size=1)
gt_vis.show()

from utils.vis import remove_image_border
peek(scene_graph.obs_bev_image.shape)
vlm_fig = create_fig(img=scene_graph.obs_bev_image)
vlm_fig.update_layout(width=1000, height=1000)
vlm_fig.show()



In [None]:
imagine_nav_planner.semantic_graph_based(pred_sg)

In [None]:
for region in pred_regions:
    print(region['id'], region.get('predicted', False))
    print(region['caption'])
    print(region['corr_score'])
    print('-'*20)
    

In [8]:
# # merge regions
# from utils.scene_graph_utils import update_region, detect_match, UnionFind
# pred_regions = merge_regions(obs_regions)
# print(len(obs_regions), len(pred_regions))


In [9]:
# obs_matches, obs_scores = evaluate_sg(
#     clip_model_list=imagine_nav_planner.clip_model_list,
#     obs_regions=obs_regions,
#     gt_regions=gt_regions,
#     obs_objects=obs_objects,
#     gt_objects=gt_objects,
#     knn_region=3,
#     knn_object=5,
#     max_object_dist=100.0/grid_size,
# )
# pred_matches, pred_scores = evaluate_sg(
#     clip_model_list=imagine_nav_planner.clip_model_list,
#     obs_regions=pred_regions,
#     gt_regions=gt_regions,
#     obs_objects=pred_objects,
#     gt_objects=gt_objects,
#     knn_region=3,
#     knn_object=5,
#     max_object_dist=100.0/grid_size,
# )

In [None]:
obs_vis = create_fig(img=step['global_bev_rgb_map'][...,::-1])
obs_vis = plot_region(obs_vis, obs_regions, map_size, show_objects=True, min_region_size=1)

pred_vis = create_fig(img=step['global_bev_rgb_map'][...,::-1])
pred_vis = plot_region(pred_vis, pred_regions, map_size, show_objects=True, min_region_size=1)

gt_vis = create_fig(img=step['global_bev_rgb_map'][...,::-1])
gt_vis = plot_region(gt_vis, gt_regions, map_size, show_objects=True, min_region_size=1)

obs_vis.show()
pred_vis.show()
gt_vis.show()

# plot_matches(obs_vis, gt_vis, matches['region_recall_relaxed']).show()
# plot_matches(obs_vis, gt_vis, matches['region_precision_relaxed'], reversed=True).show()

In [27]:
def sample_steps(step_list, n=5):
    step_list = [x for x in step_list if x>=30]
    if len(step_list) <= n:
        return step_list
    else:
        # Calculate interval to get n evenly spaced samples
        interval = max(1, (len(step_list)-1) // (n-1))
        # Return evenly spaced samples using the interval
        return step_list[::interval][:n]
print(sample_steps([5,10]))
print(sample_steps([5,10,35]))
print(sample_steps([5,10,35,40,50]))
print(sample_steps([*range(0,50,5)]))
print(sample_steps([*range(0,55,5)]))
print(sample_steps([*range(0,60,5)]))
print(sample_steps([*range(0,100,5)]))
print(sample_steps([*range(0,200,5)]))

[]
[35]
[35, 40, 50]
[30, 35, 40, 45]
[30, 35, 40, 45, 50]
[30, 35, 40, 45, 50]
[30, 45, 60, 75, 90]
[30, 70, 110, 150, 190]
