In [1]:
import os
from pycocotools.coco import COCO
from transformers import AutoImageProcessor, AutoModelForObjectDetection, Trainer
from functools import partial
import argparse
import utils
import dataset
import train_eval
from glob import glob

In [10]:
current_path = ''
exp_conf_path = '/data/ephemeral/home/Dongjin/level2-objectdetection-cv-07/transformers/Dongjin/1019_resume_train/config/exp1.json'
default_conf_path = os.path.join(current_path, 'config/default.json')
conf = utils.load_conf(default_conf_path, exp_conf_path)
checkpoint_path = '/data/ephemeral/home/Dongjin/level2-objectdetection-cv-07/transformers/Dongjin/1019_resume_train/result/deta-swin-large_3_img_size_720/checkpoint-15704'

conf['output_dir'] = os.path.join(current_path, 'result/' + conf['output_dir_format'].format(**conf)) # 결과 저장 경로
conf['output_dir'] = utils.renew_if_path_exist(conf['output_dir'])

# conf 저장
os.makedirs(conf['output_dir'], exist_ok=True)
conf_path = os.path.join(conf['output_dir'], conf['output_dir'].split('/')[-1] + '.json')
utils.save_json(conf, conf_path)

train_info_path = os.path.join(conf['coco_dir_path'], conf['train_info_name'])
valid_info_path = os.path.join(conf['coco_dir_path'], conf['valid_info_name'])

coco_train = COCO(train_info_path)
coco_valid = COCO(valid_info_path)

id2label = utils.get_id2label(conf['classes'])
label2id = utils.get_label2id(id2label)

train = dataset.COCO2dataset(conf['data_dir_path'], coco_train, range(10))
valid = dataset.COCO2dataset(conf['data_dir_path'], coco_valid, range(10))

train_augment_and_transform, validation_transform = dataset.get_transforms()

image_processor = AutoImageProcessor.from_pretrained(
    conf['model_name'],
    do_resize=True,
    size={"max_height": conf['image_size'], "max_width": conf['image_size']},
    do_pad=True,
    pad_size={"height": conf['image_size'], "width": conf['image_size']},
)

# Make transform functions for batch and apply for dataset splits
train_transform_batch = partial(
    dataset.augment_and_transform_batch, transform=train_augment_and_transform, image_processor=image_processor
)
validation_transform_batch = partial(
    dataset.augment_and_transform_batch, transform=validation_transform, image_processor=image_processor
)

train = train.with_transform(train_transform_batch)
valid = valid.with_transform(validation_transform_batch)

eval_compute_metrics_fn = partial(
    train_eval.compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
)

# model = AutoModelForObjectDetection.from_pretrained(checkpoint_path)

model = AutoModelForObjectDetection.from_pretrained(
    checkpoint_path,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)

training_args = train_eval.load_train_args(conf)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=valid,
    tokenizer=image_processor,
    data_collator=dataset.collate_fn,
    compute_metrics=eval_compute_metrics_fn,
)

trainer.train()

log_path = os.path.join(conf['output_dir'], conf['output_dir'].split('/')[-1] + '.txt')
utils.save_log(trainer, log_path)


# train_eval.test_eval(conf, model, image_processor)

loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss,Map,Map 50,Map 75,Map Small,Map Medium,Map Large,Mar 1,Mar 10,Mar 100,Mar Small,Mar Medium,Mar Large,Map General trash,Mar 100 General trash,Map Paper,Mar 100 Paper,Map Paper pack,Mar 100 Paper pack,Map Metal,Mar 100 Metal,Map Glass,Mar 100 Glass,Map Plastic,Mar 100 Plastic,Map Styrofoam,Mar 100 Styrofoam,Map Plastic bag,Mar 100 Plastic bag,Map Battery,Mar 100 Battery,Map Clothing,Mar 100 Clothing
1,17.3029,14.632709,0.2144,0.3932,0.1951,-1.0,0.0888,0.2284,0.1957,0.2696,0.3309,-1.0,0.1706,0.3467,0.0422,0.1273,0.1314,0.2474,0.3384,0.4667,0.6145,0.7,0.0,0.0,0.0924,0.25,-1.0,-1.0,0.2819,0.525,-1.0,-1.0,-1.0,-1.0
2,13.3979,13.588432,0.275,0.4245,0.3261,-1.0,0.135,0.2867,0.2418,0.3624,0.4028,-1.0,0.1722,0.4264,0.0216,0.1545,0.1267,0.2526,0.3568,0.6,0.7192,0.8,0.0,0.0,0.1724,0.2875,-1.0,-1.0,0.528,0.725,-1.0,-1.0,-1.0,-1.0


There were missing keys in the checkpoint model loaded: ['class_embed.0.weight', 'class_embed.0.bias', 'class_embed.1.weight', 'class_embed.1.bias', 'class_embed.2.weight', 'class_embed.2.bias', 'class_embed.3.weight', 'class_embed.3.bias', 'class_embed.4.weight', 'class_embed.4.bias', 'class_embed.5.weight', 'class_embed.5.bias', 'class_embed.6.weight', 'class_embed.6.bias', 'bbox_embed.0.layers.0.weight', 'bbox_embed.0.layers.0.bias', 'bbox_embed.0.layers.1.weight', 'bbox_embed.0.layers.1.bias', 'bbox_embed.0.layers.2.weight', 'bbox_embed.0.layers.2.bias', 'bbox_embed.1.layers.0.weight', 'bbox_embed.1.layers.0.bias', 'bbox_embed.1.layers.1.weight', 'bbox_embed.1.layers.1.bias', 'bbox_embed.1.layers.2.weight', 'bbox_embed.1.layers.2.bias', 'bbox_embed.2.layers.0.weight', 'bbox_embed.2.layers.0.bias', 'bbox_embed.2.layers.1.weight', 'bbox_embed.2.layers.1.bias', 'bbox_embed.2.layers.2.weight', 'bbox_embed.2.layers.2.bias', 'bbox_embed.3.layers.0.weight', 'bbox_embed.3.layers.0.bias', '

In [8]:
path = '/data/ephemeral/home/Dongjin/level2-objectdetection-cv-07/transformers/Dongjin/1019_resume_train/result/deta-swin-large_3_img_size_720'

In [11]:
model = AutoModelForObjectDetection.from_pretrained(path)

OSError: Error no file named pytorch_model.bin, model.safetensors, tf_model.h5, model.ckpt.index or flax_model.msgpack found in directory /data/ephemeral/home/Dongjin/level2-objectdetection-cv-07/transformers/Dongjin/1019_resume_train/result/deta-swin-large_3_img_size_720.

In [36]:
def find_checkpoint_path(input_path):
    output_paths = glob(input_path + '/checkpoint*')

    if 1 < len(output_paths):
        raise Exception(f'checkpoint는 1개여야 합니다. {len(output_path)}개 있습니다.')

    output_path = output_paths[0]
    return output_path


In [42]:
model = AutoModelForObjectDetection.from_pretrained(find_checkpoint_path(path))

training_args = train_eval.load_train_args(conf)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=valid,
    tokenizer=image_processor,
    data_collator=dataset.collate_fn,
    compute_metrics=eval_compute_metrics_fn,
)

trainer.train()


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


RuntimeError: selected index k out of range

In [11]:
conf['saved_model_path']

KeyError: 'saved_model'

: 