In [None]:
%cd ..
import os
import time
import json

In [None]:
from apperception.database import database
from apperception.world import empty_world
from apperception.utils import F
from apperception.predicate import camera, objects
from optimized_ingestion.utils.preprocess import preprocess
database.connection
from optimized_ingestion.cache import disable_cache
disable_cache()

In [None]:
NUSCENES_PROCESSED_DATA = "NUSCENES_PROCESSED_DATA"
if NUSCENES_PROCESSED_DATA in os.environ:
    DATA_DIR = os.environ[NUSCENES_PROCESSED_DATA]
else:
    DATA_DIR = "/data/processed/full-dataset/trainval"
NUSCENES_RAW_DATA = "NUSCENES_RAW_DATA"
if NUSCENES_RAW_DATA in os.environ:
    RAW_DATA_DIR = os.environ[NUSCENES_RAW_DATA]
else:
    RAW_DATA_DIR = "/data/full-dataset/trainval"

In [None]:
with open('data/evaluation/video-samples/boston-seaport.txt', 'r') as f:
    scenes = f.read().split('\n')

In [None]:
def bechmark_detection_estimation_histogram(world, video_names=[], scenes=[], path_suffix=None):
    bucket_count = [0] * 10
    bucket_videos = [[]] * 10
    ### detection estimation benchmark
    optimize_benchmark_path = f'./outputs/detection_estimation_histogram{"_"+path_suffix if path_suffix else ""}.json'
    preprocess(world, DATA_DIR, video_names, scenes,
               base=False,
               benchmark_path=optimize_benchmark_path)
    
    with open(optimize_benchmark_path) as benchmark_file:
        benchmark_content = benchmark_file.read()
    
    parsed_json = json.loads(benchmark_content)
    stage_runtimes = parsed_json[0]['stage_runtimes']
    num_videos = parsed_json[2]['number of videos']
    for stage_runtime in stage_runtimes:
        if stage_runtime['stage'] == 'DetectionEstimation':
            for benchmark in stage_runtime['runtimes']:
                keep, _, total = benchmark['keep']
#                 print(benchmark['keep'])
                for i in range(10):
                    if (total-keep)/total >= i*0.1 and (total-keep)/total <= (i+1)*0.1:
                        bucket_count[i] += 1
                        bucket_videos[i].append(benchmark['name'])
#     print(bucket_count)
    return bucket_count


In [None]:
name = 'ScenicWorld' # world name
world = empty_world(name=name)

In [None]:
buckets = ['0-0.1', '0.1-0.2', '0.2-0.3', '0.3-0.4',
           '0.4-0.5', '0.5-0.6', '0.6-0.7', '0.7-0.8',
           '0.8-0.9', '0.9-1']
all_obj_bucket_count = bechmark_detection_estimation_histogram(world, scenes=scenes[:200])

In [None]:
num_videos = sum(all_obj_bucket_count)
all_obj_count_ratio = [c/num_videos for c in all_obj_bucket_count]
all_obj_count_cdf = [sum([e for e in all_obj_count_ratio[i:]]) for i in range(0, len(all_obj_bucket_count))]
import matplotlib.pyplot as plt
print(f'total number of videos {num_videos}')
fig, axs = plt.subplots(3, 1, tight_layout=True)
axs[0].bar(buckets, all_obj_bucket_count, label="count")
axs[1].bar(buckets, all_obj_count_ratio,  label="count/total_num_videos")
axs[2].plot(range(10), all_obj_count_cdf, label="1-cdf")
for ax in axs:
    ax.legend(loc='upper center', shadow=True)
plt.show()

In [None]:
obj1 = objects[0]
cam = camera
car_world = empty_world(name=name).filter(
    (F.like(obj1.type, 'car') | F.like(obj1.type, 'truck') | F.like(obj1.type, 'bus'))
)

In [None]:
only_car_bucket_count = bechmark_detection_estimation_histogram(car_world, path_suffix="only_car")

In [None]:
num_videos = sum(only_car_bucket_count)
only_car_count_ratio = [c/num_videos for c in only_car_bucket_count]
all_obj_count_cdf = [sum([e for e in only_car_bucket_count[i:]]) for i in range(0, len(only_car_bucket_count))]
import matplotlib.pyplot as plt
print(f'total number of videos {num_videos}')
fig, ax = plt.subplots(1, 3, sharey=True, tight_layout=True)
axs[0].hist(only_car_bucket_count, 10, [0, 10], label="count")
axs[1].hist(only_car_count_ratio, 10, [0, 10], label="ratio")
axs[2].hist(only_car_count_cdf, 10, [0, 10], label="cdf")
legend = ax.legend(loc='upper center', shadow=True)
plt.show()