In [1]:
from pathlib import Path
import sys
import os
import json
sys.path.append("..")

In [2]:
ENTITIES_DIR = Path(os.getcwd()) / "data" / "mm_inp_dataset" / "entities"
TRAIN_DIR = Path(os.getcwd()) / "data" / "mm_inp_dataset" / "images" / "train"

In [3]:
masks_per_images = {}
noun_chunks_per_images = {}
train_files = set(os.listdir(TRAIN_DIR))
for entity_dir in ENTITIES_DIR.iterdir():
    with open(entity_dir / "annotations.json", "r") as f:
        if not f"{entity_dir.name}.jpg" in train_files:
            continue
        annotations = json.load(f)
        if not len(annotations) in masks_per_images:
            masks_per_images[len(annotations)] = 0
        masks_per_images[len(annotations)] += 1
        for k, v in annotations.items():
            if not v["noun_chunk_root"] in noun_chunks_per_images:
                noun_chunks_per_images[v["noun_chunk_root"]] = 0
            noun_chunks_per_images[v["noun_chunk_root"]] += 1

In [4]:
from inpainting.data.datasets import LlavaDataset
import torch

dataset = LlavaDataset(
    data_dir=Path(os.getcwd()) / "data" / "mm_inp_dataset",
    max_concepts=5,
    generator=torch.Generator().manual_seed(42),
    remove_intersections=False,
    shuffle_concepts=True,
    masked_area_threshold=0.65,
    return_entity_PILs=False,
    only_gray_concept=False,
    override_gray=False,
)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
masks_per_images, noun_chunks_per_images

In [5]:
from tqdm import tqdm

In [6]:
masks_per_images_2 = {}
for x in tqdm(dataset):
    if not len(x["entity_captions"]) in masks_per_images_2:
        masks_per_images_2[len(x["entity_captions"])] = 0
    masks_per_images_2[len(x["entity_captions"])] += 1

100%|██████████| 92276/92276 [29:55<00:00, 51.38it/s]


In [8]:
dataset.n_invisible / sum(masks_per_images_2.values())

0.0004768303784299276

In [None]:
masks_per_images_2

In [None]:
import matplotlib.pyplot as plt

from matplotlib import rcParams

# Set the font globally
rcParams["font.family"] = "Roboto"

# Optional: Set font for specific styles
rcParams["font.sans-serif"] = ["Roboto"]
rcParams["font.serif"] = ["Roboto"]

In [None]:
# Sort objects by frequency
objects_count = dict(sorted(noun_chunks_per_images.items(), key=lambda item: item[1], reverse=True))

# make the keys strings
objects_count = {str(k): v for k, v in objects_count.items()}

# keep only th 100 most frequent objects
objects_to_plot = dict(list(objects_count.items())[:50])

# Plot
plt.figure(figsize=(10, 10))
plt.barh(list(objects_to_plot.keys()), list(objects_to_plot.values()))
plt.xlabel("No. masks", fontsize=16)
plt.ylabel("Noun chunk roots", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.savefig("assets/noun_chunk_roots.svg", format="svg", bbox_inches="tight")