## LLM-based Rewriter

In [1]:
from flow.LLM.rewrite_FV import FactValidationRewriter
from flow.LLM.rewrite_PM import PromptMatchingRewriter

In [2]:
FactValidationRewriter(
    enforce_balanced_boolean=True,
    question_json_file='./example/data/ScanQA-sample.json',
    llm_model='qwen2.5:72b',
    output_path='./example/output/',
).rewrite()

Using "qwen2.5:72b" model with "ollama" backend for rewriting the questions
Exporting the rewritten questions and failed rewritten questions to: /home/jitri7/projects/yifanw/NUMINA-dev/NUMINA-flow/example/output
Loaded 3 questions from: /home/jitri7/projects/yifanw/NUMINA-dev/NUMINA-flow/example/data/ScanQA-sample.json


Rewriting: 100%|██████████| 3/3 [00:17<00:00,  5.89s/Q]


In [3]:
PromptMatchingRewriter(
    enforce_balanced_options=True,
    n_options=5,
    question_json_file='./example/data/ScanQA-sample.json',
    llm_model='qwen2.5:72b',
    output_path='./example/output/',
).rewrite()

Using "qwen2.5:72b" model with "ollama" backend for rewriting the questions
Exporting the rewritten questions and failed rewritten questions to: /home/jitri7/projects/yifanw/NUMINA-dev/NUMINA-flow/example/output
Loaded 3 questions from: /home/jitri7/projects/yifanw/NUMINA-dev/NUMINA-flow/example/data/ScanQA-sample.json


Rewriting: 100%|██████████| 3/3 [00:13<00:00,  4.52s/Q]


## Rule-based Question Generator

### ScanNet Scene Analysis

In [4]:
from flow.ScanNet_scene_analyzer import ScanNetSceneAnalyzer

In [5]:
# analyze a ScanNet scene
ScanNetSceneAnalyzer(
    scene_dir='./example/data/ScanNet-scenes/scene0406_00',
).analyze()

{'scene_id': 'scene0406_00',
 'num_instances': 30,
 'unique_labels': ['toilet',
  'shower curtain',
  'doorframe',
  'bar',
  'floor',
  'shower walls',
  'trash can',
  'bathtub',
  'sink',
  'picture',
  'mirror',
  'wall',
  'toilet paper',
  'door',
  'counter',
  'ceiling',
  'towel'],
 'instance_map': {'toilet': [3],
  'shower curtain': [0],
  'doorframe': [22],
  'bar': [27],
  'floor': [2],
  'shower walls': [1],
  'trash can': [7],
  'bathtub': [5],
  'sink': [6],
  'picture': [8],
  'mirror': [12],
  'wall': [13, 14, 15, 17, 18, 24, 25],
  'toilet paper': [19],
  'door': [23],
  'counter': [4],
  'ceiling': [28],
  'towel': [9, 10, 11, 16, 20, 21, 26, 29]},
 'object_map': {0: 'shower curtain',
  1: 'shower walls',
  2: 'floor',
  3: 'toilet',
  4: 'counter',
  5: 'bathtub',
  6: 'sink',
  7: 'trash can',
  8: 'picture',
  9: 'towel',
  10: 'towel',
  11: 'towel',
  12: 'mirror',
  13: 'wall',
  14: 'wall',
  15: 'wall',
  16: 'towel',
  17: 'wall',
  18: 'wall',
  19: 'toilet

In [6]:
# alternatively, analyze all scenes within a directory via CLI mode
!python -m flow.ScanNet_scene_analyzer \
    --scenes "./example/data/ScanNet-scenes/" \
    --export_dir "./example/output/analyzed_scenes/" \
    --skip_confirm

Found 2 to be processed with 80 process(es):
./example/data/ScanNet-scenes/scene0407_00
./example/data/ScanNet-scenes/scene0406_00
Summarized JSON files will be exported to /home/jitri7/projects/yifanw/NUMINA-dev/NUMINA-flow/example/output/analyzed_scenes.
100%|██████████████████████████████████████████| 2/2 [00:01<00:00,  1.19tasks/s]


### Question Generation

In [7]:
import os

from joblib import delayed

from flow.rule.distance_FV import DistanceCompareFVGenerator
from flow.rule.distance_NI import DistanceNIGenerator
from flow.rule.quantity_FV import QuantityCompareFVGenerator
from flow.rule.quantity_NI import QuantityNIGenerator
from flow.rule.volume_FV import VolumeCompareFVGenerator
from flow.rule.volume_NI import VolumeNIGenerator
from flow.utils.io import enum_files
from flow.utils.parallel import ParallelTqdm

In [8]:
tasks = [
    (generator, scene_json, os.path.join('./example/output/', filename))
    for scene_json in enum_files('./example/output/analyzed_scenes/', 'json')
    for generator, filename in [
        (VolumeNIGenerator, 'NUM-volume-NI.json'),
        (VolumeCompareFVGenerator, 'NUM-volume_compare-FV.json'),
        (QuantityNIGenerator, 'NUM-count-NI.json'),
        (QuantityCompareFVGenerator, 'NUM-count_compare-FV.json'),
        (DistanceNIGenerator, 'NUM-distance-NI.json'),
        (DistanceCompareFVGenerator, 'NUM-distance_compare-FV.json'),
    ]
]

ParallelTqdm(n_jobs=8)(
    delayed(
        lambda g, s, o: g(
            scene_stat_json_file=s, output_json_file=o
        ).generate(n_questions=6)
    )(generator_class, scene_json, output_json)
    for generator_class, scene_json, output_json in tasks
)


Found 2 files with extension "json" in "./example/output/analyzed_scenes/":
/home/jitri7/projects/yifanw/NUMINA-dev/NUMINA-flow/example/output/analyzed_scenes/scene_stats-scene0406_00.json
/home/jitri7/projects/yifanw/NUMINA-dev/NUMINA-flow/example/output/analyzed_scenes/scene_stats-scene0407_00.json


0tasks [00:00, ?tasks/s]

[WARN] scene0406_00 - RULE-volume-NI: Only 1 unique candidates available for 6 questions. Sampling only 1 questions.
[WARN] scene0406_00 - RULE-volume_compare-FV: Only 0 unique candidates available for 6 questions. Sampling only 0 questions.
[WARN] scene0406_00 - RULE-volume_compare-FV: No questions generated. Skipping...
[WARN] scene0407_00 - RULE-volume_compare-FV: Only 0 unique candidates available for 6 questions. Sampling only 0 questions.
[WARN] scene0407_00 - RULE-volume_compare-FV: No questions generated. Skipping...
[WARN] scene0407_00 - RULE-volume-NI: Only 0 unique candidates available for 6 questions. Sampling only 0 questions.
[WARN] scene0407_00 - RULE-volume-NI: No questions generated. Skipping...


[None, None, None, None, None, None, None, None, None, None, None, None]