In [8]:
import os
from pathlib import Path

import comet_ml

import torch
from torch.utils.data import DataLoader

from config import Config, load_config
from data import CLEVRSplit, CLEVRTextSplit
from model import Model, TextualModel, TrainingModel


import lightning as L
from lightning import Trainer
from lightning.pytorch.loggers.comet import CometLogger
from lightning.pytorch.callbacks import ModelCheckpoint


def log_to_comet():
    return False
#     return ('COMET_API_KEY' in os.environ and
#             'COMET_WORKSPACE' in os.environ and
#             'COMET_EXPERIMENT_KEY' in os.environ)

torch.backends.cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

experiment_name = 'default-txt-scene--obj-desc'

In [20]:
def load_best_checkpoint_config(experiment_name):
    all_checkpoints = Path(f"outputs/{experiment_name}/").glob('*.ckpt')
    best_checkpoint = [p for p in all_checkpoints if 'last.ckpt' not in p.name][0]
    ckpt = torch.load(best_checkpoint)
    config = ckpt['hyper_parameters']['config']
    return config, str(best_checkpoint)

# config = Config()
# config = load_config()
config, checkpoint_path = load_best_checkpoint_config(experiment_name)

# config.resume_training = True
# config.rels_to_sample = 0
# config.use_txt_scene = True
# config.display_object_properties = True 

In [21]:
default_config = load_config()

In [22]:
len('resume_training')

15

In [23]:
for dk, dv in vars(default_config).items():
    if dk not in vars(config):
        print(dk, 'not in config')
        continue
    if dv != vars(config)[dk]:
        print(f'{dk:25.25s} is different in config:  {dv} vs {vars(config)[dk]}')

pad_idx                   is different in config:  0 vs 1
n_tokens                  is different in config:  95 vs 117
max_scene_size            is different in config:  259 vs 659
optimizer not in config
weight_decay not in config
use_txt_scene             is different in config:  False vs True
display_object_properties is different in config:  False vs True
profile not in config


In [3]:
if config.use_txt_scene:
    train_dataset, test_dataset, systematic_dataset = CLEVRTextSplit.build_splits(config)
else:
    train_dataset, test_dataset, systematic_dataset = CLEVRSplit.build_splits(config)

config.pad_idx = train_dataset.pad_idx

test_dataset.processor.remove_unneeded_relations = True
systematic_dataset.processor.remove_unneeded_relations = True

Building vocabulary


  0%|          | 0/699960 [00:00<?, ?it/s]

Building answers index


  0%|          | 0/699960 [00:00<?, ?it/s]

In [4]:
dlkwargs = {
    'batch_size': config.batch_size,
    'num_workers': int(os.environ.get("SLURM_JOB_CPUS_PER_NODE", 4)),
    'pin_memory': torch.cuda.is_available(),
}

train_loader = DataLoader(train_dataset, shuffle=True, **dlkwargs)
test_loader = DataLoader(test_dataset, shuffle=False, **dlkwargs)
systematic_loader = DataLoader(systematic_dataset, shuffle=False, **dlkwargs)

In [29]:
import random
random.randint(0, 0)

0

In [None]:
if config.use_txt_scene:
    model = TextualModel(config)
else:
    model = Model(config)
training_model = TrainingModel(model, config)

In [None]:
training_model = TrainingModel.load_from_checkpoint(checkpoint_path)

In [None]:
checkpoint_path = f"outputs/{experiment_name}/"
if not os.path.exists(checkpoint_path):
    os.mkdir(checkpoint_path)
checkpoint_callback = ModelCheckpoint(
    dirpath=checkpoint_path, save_top_k=1, monitor="val_loss/dataloader_idx_0", every_n_epochs=1, save_last=True)


In [None]:
from lightning.pytorch import loggers as pl_loggers

tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs/")

trainer = Trainer(max_epochs=config.max_epochs, accelerator="gpu", devices=1,
                    logger=tb_logger, callbacks=[checkpoint_callback])

In [None]:
trainer.test(training_model, ckpt_path='last', dataloaders=[test_loader, systematic_loader])

In [None]:
# checkpoint_path = f"outputs/{experiment_name}/"
# if not os.path.exists(checkpoint_path):
#     os.mkdir(checkpoint_path)
# checkpoint_callback = ModelCheckpoint(
#     dirpath=checkpoint_path, save_top_k=1, monitor="val_loss/dataloader_idx_0", every_n_epochs=1, save_last=True)

# resume_from_path = None
# if config.resume_training:
#     resume_from_path = checkpoint_path

In [None]:
# def get_obj_properties(start, program):
#     next_ =  start['inputs'][0]
# #     properties = {'material': None, 'color': None, 'shape': None, 'size': None}
#     properties = {}
    
#     while program[next_]['function'] in {'filter_material', 'filter_color', 'filter_shape', 'filter_size'}:
#         next_fn = program[next_]
#         fn_type = next_fn['function']
#         property_type = fn_type.replace('filter_', '')
#         properties[property_type] = next_fn['value_inputs'][0]
#         next_ = next_fn['inputs'][0]
        
#     return properties

# def object_satisfy(object_, properties):
#     for k, v in properties.items():
#         if object_[k] != v:
#             return False
#     return True

# def is_object_relevant(object_, all_properties):
#     return any(object_satisfy(object_, properties) for properties in all_properties)

In [None]:
# # program = test_dataset.questions[1000]['program']
# # scene = test_dataset.indexed_scenes[test_dataset.questions[1000]['image_index']]

# prop = []
# for q in test_dataset.questions:
#     program = q['program']
#     scene = test_dataset.indexed_scenes[q['image_index']]

#     objects = scene['objects']

#     start_nodes = [fn for fn in program if fn['function'] in {'count', 'exist', 'unique', 'union'}]
# #     start_nodes = [fn for fn in program if fn['function'] in {'union' , 'unique'}]

#     relevant_properties = []
#     for obj_to_filter in start_nodes:
#         relevant_properties.append(get_obj_properties(obj_to_filter, program))

#     relevant_objects = [(o_idx, o) for o_idx, o in enumerate(objects) if is_object_relevant(o, relevant_properties)]

#     prop.append(len(relevant_objects) / len(objects))

In [None]:
# import numpy as np
# np.mean(prop)

In [None]:
# {fn['function'] for q in test_dataset.questions for fn in q['program']}

In [None]:
# import random
# qs = random.sample(test_dataset.questions, k=10)
# for q in qs:
#     print(q['question'], '\n')

In [None]:
# fn_types = list({fn['function'] for q in test_dataset.questions for fn in q['program']})

# for fn_type in fn_types:

#     count = 0
#     for idx, q in enumerate(test_dataset.questions):
#         for p in q['program']:
#             if p['inputs']:
#                 for input_ in p['inputs']:
#                     if fn_type == p['function'] and 'filter_' in q['program'][input_]['function']:
#                         count += 1
# #                         print(idx)
                
#     if count > 0:
#         print(fn_type) 

# count = 0
# for idx, q in enumerate(test_dataset.questions):
#     for p in q['program']:
#         if p['inputs']:
#             input_ = p['inputs'][0]
#         if 'relate' == p['function'] and q['program'][input_]['function'] != 'unique':
#             print(idx)
#             continue
#         else:
#             count += 1

In [None]:
# q = qs[9]

# program = q['program']
# scene = test_dataset.indexed_scenes[q['image_index']]

# objects = scene['objects']

# start_nodes = [fn for fn in program if fn['function'] in {'unique', 'count'}]

# relevant_properties = []
# for obj_to_filter in start_nodes:
#     relevant_properties.append(get_obj_properties(obj_to_filter, program))

# relevant_objects = [(o_idx, o) for o_idx, o in enumerate(objects) if is_object_relevant(o, relevant_properties)]

# print(q['question'], '\n')

# for idx, p in enumerate(program):
#     print(idx, ':', p)

In [None]:
# pad_used = []
# total_seqs = []
# for scenes, questions, answers in test_loader:
#     pad_used.append((scenes == config.pad_idx).sum())
#     total_seqs.append(scenes.numel())

In [None]:
from data import Scene
from matplotlib import pyplot as plt


remove_unneeded_relations = False

scene_lengths = []
# for question in test_dataset.questions:

question = test_dataset.questions[2893]

image_idx = question['image_index']
scene = test_dataset.indexed_scenes[image_idx]

question_str = question['question']
answer_str = question['answer']

filter_objects_from_relations = None
if remove_unneeded_relations:
    filter_objects_from_relations = test_dataset.processor.get_objects_to_filter(question, scene)

# scene_str = self.scene_to_txt(scene, rels_to_sample=self.rels_to_sample)
scene_object = Scene.from_dict(scene,
                            shuffle_relations=True,
                            relations_to_sample=0,
                            only_front_right=config.only_front_right_relations,
                            filter_symmetric=config.filter_symmetric_relations,
                            always_display_properties=config.display_object_properties,
                            filter_objects_from_relations=filter_objects_from_relations)
scene_str = str(scene_object)

tokenized_scene = test_dataset.processor.tokenize_sequence(
    scene_str, test_dataset.processor.max_scene_size, pad_seq=False, lower=False)

scene_lengths.append(len(tokenized_scene))

# plt.hist(scene_lengths, bins=50)
# plt.plot()

# print((np.array(scene_lengths) > 259).sum() / len(scene_lengths))

In [None]:
scene_str

In [None]:
scene_object.relations

In [None]:
scene_str

In [None]:
scene_object.relations

In [7]:
test_dataset.questions[2893]

{'question_index': 2893,
 'question_family_index': 19,
 'image_index': 289,
 'question': 'Is the material of the small yellow block that is to the left of the cyan metallic cylinder the same as the tiny ball?',
 'answer': 'no',
 'image_filename': 'CLEVR_valA_000289.png',
 'split': 'valA',
 'program': [{'value_inputs': [], 'inputs': [], 'function': 'scene'},
  {'value_inputs': ['cyan'], 'inputs': [0], 'function': 'filter_color'},
  {'value_inputs': ['metal'], 'inputs': [1], 'function': 'filter_material'},
  {'value_inputs': ['cylinder'], 'inputs': [2], 'function': 'filter_shape'},
  {'value_inputs': [], 'inputs': [3], 'function': 'unique'},
  {'value_inputs': ['left'], 'inputs': [4], 'function': 'relate'},
  {'value_inputs': ['small'], 'inputs': [5], 'function': 'filter_size'},
  {'value_inputs': ['yellow'], 'inputs': [6], 'function': 'filter_color'},
  {'value_inputs': ['cube'], 'inputs': [7], 'function': 'filter_shape'},
  {'value_inputs': [], 'inputs': [8], 'function': 'unique'},
  {