In [None]:
from transformers import pipeline

vision_classifier = pipeline(model="google/vit-base-patch16-224")
preds = vision_classifier(
    images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
preds

In [None]:
from transformers import pipeline
vision_classifier = pipeline(task="object-detection")
zeroshot_vision_classifier = pipeline(task="zero-shot-object-detection")

In [None]:
path = r"TACO\data\dumped\0b5xei5CRT5Z5XqxBmqjL8UDKn7EiEZV1aNBbxwU.jpeg"
#r"C:\Users\devic\Downloads\zero-sh-obj-detection_1.png" 
from PIL import Image
image = Image.open(path)

predictions = vision_classifier(image)#, candidate_labels=["human face", "paper", "recycleable", "unknown", "glass"],)

In [None]:
# https://huggingface.co/docs/transformers/v4.32.0/en/tasks/object_detection
# https://huggingface.co/docs/transformers/v4.32.0/en/tasks/zero_shot_object_detection#zeroshot-object-detection-pipeline

In [None]:
predictions

In [None]:
from PIL import ImageDraw
image.convert("RGB")
draw = ImageDraw.Draw(image)

for prediction in predictions:
    box = prediction["box"]
    label = prediction["label"]
    score = prediction["score"]

    xmin, ymin, xmax, ymax = box.values()
    draw.rectangle((xmin, ymin, xmax, ymax), outline=(1,0,0), width=1)
    draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill=(1,0,0))

image

In [None]:
predictions

In [None]:
import torch
torch.cuda.is_available()

In [None]:
 torch.cuda.get_device_name(0)

In [None]:
from transformers import AutoTokenizer, AutoModel

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModel.from_pretrained(checkpoint)

raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!",
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
inputs
outputs = model(**inputs)
outputs

In [None]:
from transformers import AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs = model(**inputs)
print(outputs.logits)
import torch
# models return logits
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)
predictions_label = torch.argmax(predictions, dim=-1)
print(predictions_label)
print(model.config.id2label)

In [None]:
from transformers import AutoConfig

bert_config = AutoConfig.from_pretrained("bert-base-cased")
print(type(bert_config))

gpt_config = AutoConfig.from_pretrained("gpt2")
print(type(gpt_config))

bart_config = AutoConfig.from_pretrained("facebook/bart-base")
print(type(bart_config))
print(bert_config)


In [None]:

from transformers import AutoModelForSequenceClassification

ids1 = torch.tensor(
    [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]]
)
ids2 = torch.tensor([[1045, 5223, 2023, 1012]])
all_ids = torch.tensor(
    [[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012],
     [1045, 5223, 2023, 1012,    0,    0,    0,     0,     0,    0,    0,    0,    0,    0]]
)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
print(model(ids1).logits)
print(model(ids2).logits)
print(model(all_ids).logits) # padded but no mask returns different results

In [None]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sentences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this.",
]
# tokens = tokenizer(sentences) # no truncation or padding, cannot return tensors
# print(tokens)
tokens_with_mask = tokenizer(sentences, padding=True, return_tensors="pt") # attentoion mask removes padding from the evaluation
print(tokens_with_mask)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
print(model(**tokens_with_mask).logits)


In [None]:
# datasets - dynamic batching and padding
# https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/videos/dynamic_padding.ipynb
from torch.utils.data import DataLoader
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer)
train_dataloader = DataLoader(
    tokenized_datasets["train"], batch_size=16, shuffle=True, collate_fn=data_collator
)

for step, batch in enumerate(train_dataloader):
    print(batch["input_ids"].shape)
    if step > 5:
        break

# save load dataset to disk
# https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/videos/save_load_dataset.ipynb
#memory mapping / arrow
# https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/videos/memory_mapping_streaming.ipynb

In [None]:
# Trainer API - metrics
# https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/videos/trainer_api.ipynb#scrollTo=lP1Acnlxol2S
metric = load_metric("glue", "mrpc")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments("test-trainer", evaluation_strategy="epoch")
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)
trainer.train()

In [None]:
# custom training loop
# https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/videos/training_loop.ipynb
# https://huggingface.co/docs/accelerate/index
# https://www.youtube.com/watch?v=s7dy8QRgjJ0&list=PLo2EIpI_JMQvWfQndUesu0nPBAtZ9gP1o&index=31
# + from accelerate import Accelerator
# + accelerator = Accelerator()

# + model, optimizer, training_dataloader, scheduler = accelerator.prepare(
# +     model, optimizer, training_dataloader, scheduler
# + )

#   for batch in training_dataloader:
#       optimizer.zero_grad()
#       inputs, targets = batch
#       inputs = inputs.to(device)
#       targets = targets.to(device)
#       outputs = model(inputs)
#       loss = loss_function(outputs, targets)
# +     accelerator.backward(loss)
#       optimizer.step()
#       scheduler.step()

from transformers import TrainingArguments

args = TrainingArguments(
    "bert-fine-tuned-cola",
    evaluation_strategy="epoch",
    save_strategy="epoch", # <--- save strategy
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True, # <--- push to hub
)

from transformers import Trainer

trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()

trainer.push_to_hub("End of training")
repo_name = "bert-fine-tuned-cola"

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)
model.config.id2label = {str(i): lbl for i, lbl in enumerate(label_names)}
model.config.label2id = {lbl: str(i) for i, lbl in enumerate(label_names)}

# custom loss function
# https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/videos/custom_loss.ipynb

In [None]:
import albumentations
import numpy as np
import torch

transform = albumentations.Compose(
    [
        albumentations.Resize(480, 480),
        albumentations.HorizontalFlip(p=1.0),
        albumentations.RandomBrightnessContrast(p=1.0),
    ],
    bbox_params=albumentations.BboxParams(format="coco", label_fields=["category"]),
)

In [None]:
def formatted_anns(image_id, category, area, bbox):
    annotations = []
    for i in range(0, len(category)):
        new_ann = {
            "image_id": image_id,
            "category_id": category[i],
            "isCrowd": 0,
            "area": area[i],
            "bbox": list(bbox[i]),
        }
        annotations.append(new_ann)

    return annotations

In [None]:
# transforming a batch
def transform_aug_ann(examples):
    image_ids = examples["image_id"]
    images, bboxes, area, categories = [], [], [], []
    for image, objects in zip(examples["image"], examples["objects"]):
        image = np.array(image.convert("RGB"))[:, :, ::-1]
        out = transform(image=image, bboxes=objects["bbox"], category=objects["category"])

        area.append(objects["area"])
        images.append(out["image"])
        bboxes.append(out["bboxes"])
        categories.append(out["category"])

    targets = [
        {"image_id": id_, "annotations": formatted_anns(id_, cat_, ar_, box_)}
        for id_, cat_, ar_, box_ in zip(image_ids, categories, area, bboxes)
    ]

    return image_processor(images=images, annotations=targets, return_tensors="pt")

In [None]:
 from datasets import load_dataset

 dataset = load_dataset("imagefolder", data_dir="./dataset")

In [None]:
load_dataset

In [None]:
dataset["train"][0]['image']

In [None]:
coco_dataset = load_dataset("HuggingFaceM4/COCO", streaming=True)

In [None]:
import datasets
ds = datasets.load_dataset("ydshieh/coco_dataset_script", "2017", data_dir="./dummy_data/")


In [None]:
ds['train'][0]

In [None]:
ds['train'][0]

In [None]:
ds

In [None]:
# https://huggingface.co/datasets/rafaelpadilla/coco2017
dt = load_dataset('rafaelpadilla/coco2017')

In [None]:
# https://huggingface.co/datasets/CreatlV/CoVa-coco-v2-fold2



In [46]:
# import inspect
# inspect.getmro()
type(image_processor).mro()

[transformers.models.yolos.image_processing_yolos.YolosImageProcessor,
 transformers.image_processing_utils.BaseImageProcessor,
 transformers.image_processing_utils.ImageProcessingMixin,
 transformers.utils.hub.PushToHubMixin,
 object]

In [21]:

# PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray, but got .

print(inputs)

{'pixel_values': tensor([[[[-1.5699, -0.7650, -0.7993,  ..., -1.8097, -1.7240, -1.6727],
          [-1.5528, -0.6623, -0.7822,  ..., -1.7583, -1.7412, -1.7583],
          [-1.4158, -0.7308, -0.5938,  ..., -1.7240, -1.7069, -1.6384],
          ...,
          [-0.9363, -0.8678, -0.6794,  ...,  0.3994,  1.8379,  0.4679],
          [-1.6042, -1.2617, -0.5596,  ...,  1.7352,  1.3070, -0.7993],
          [-1.5528, -1.5699, -1.3130,  ...,  1.6153, -0.1657, -1.2788]],

         [[-1.5630, -0.7402, -0.7927,  ..., -1.6856, -1.5980, -1.5455],
          [-1.5455, -0.6352, -0.7577,  ..., -1.6331, -1.6155, -1.6331],
          [-1.4055, -0.7052, -0.5826,  ..., -1.5980, -1.5805, -1.5105],
          ...,
          [-0.8803, -0.8452, -0.6877,  ...,  0.4153,  1.9034,  0.4678],
          [-1.5630, -1.2654, -0.6001,  ...,  1.8333,  1.2906, -0.9153],
          [-1.5105, -1.5805, -1.3529,  ...,  1.5882, -0.4251, -1.3880]],

         [[-1.4384, -0.6193, -0.6715,  ..., -1.4907, -1.3861, -1.3339],
          [-1

In [52]:
# import torch
# labels = [
#     {
#         'class_labels':torch.LongTensor([1]),
#         'boxes': torch.FloatTensor([[0.0, 0.0, 1.0, 1.0]]),
#         },
# ]

outputs = model.forward(**inputs)
print(outputs)

YolosObjectDetectionOutput(loss=tensor(4.9529, grad_fn=<AddBackward0>), loss_dict={'loss_ce': tensor(1.9190, grad_fn=<NllLoss2DBackward0>), 'loss_bbox': tensor(0.2211, grad_fn=<DivBackward0>), 'loss_giou': tensor(0.9641, grad_fn=<DivBackward0>), 'cardinality_error': tensor(0.)}, logits=tensor([[[-30.8526, -11.5561, -16.8260,  ..., -25.6281, -16.9084,  -0.0901],
         [-23.0619,  -6.3175,  -9.3645,  ..., -15.1229,  -9.4487,   0.3318],
         [-22.9314,  -8.4993, -15.7024,  ..., -18.1679, -11.6726,  -1.0845],
         ...,
         [-24.2839, -10.3712, -18.5917,  ..., -18.4504, -11.4156,  -1.8397],
         [-26.9704,  -9.4384, -18.2385,  ..., -17.2723, -12.4500,  -2.6774],
         [-37.3419, -15.4624, -25.6961,  ..., -43.8754, -24.3447,  -3.1460]]],
       grad_fn=<ViewBackward0>), pred_boxes=tensor([[[0.0758, 0.0382, 0.1080, 0.0737],
         [0.0725, 0.4880, 0.1436, 0.4331],
         [0.1143, 0.3144, 0.0378, 0.0568],
         [0.2436, 0.4913, 0.0201, 0.0277],
         [0.3669, 0

In [None]:
outputs.loss

In [None]:
# model predicts bounding boxes and corresponding COCO classes
logits = outputs.logits
bboxes = outputs.pred_boxes


# print results
target_sizes = torch.tensor([image.size[::-1]])
results = image_processor.post_process_object_detection(outputs, threshold=0.6, target_sizes=target_sizes)[0]
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
        f"Detected {model.config.id2label[label.item()]} with confidence "
        f"{round(score.item(), 3)} at location {box}"
    )

In [None]:
outputs.keys()

In [33]:
feature_extractor.preprocess(image,
                           annotations=anns[0],
                           return_tensors="pt")

{'pixel_values': tensor([[[[-1.5699, -0.7650, -0.7993,  ..., -1.8097, -1.7240, -1.6727],
          [-1.5528, -0.6623, -0.7822,  ..., -1.7583, -1.7412, -1.7583],
          [-1.4158, -0.7308, -0.5938,  ..., -1.7240, -1.7069, -1.6384],
          ...,
          [-0.9363, -0.8678, -0.6794,  ...,  0.3994,  1.8379,  0.4679],
          [-1.6042, -1.2617, -0.5596,  ...,  1.7352,  1.3070, -0.7993],
          [-1.5528, -1.5699, -1.3130,  ...,  1.6153, -0.1657, -1.2788]],

         [[-1.5630, -0.7402, -0.7927,  ..., -1.6856, -1.5980, -1.5455],
          [-1.5455, -0.6352, -0.7577,  ..., -1.6331, -1.6155, -1.6331],
          [-1.4055, -0.7052, -0.5826,  ..., -1.5980, -1.5805, -1.5105],
          ...,
          [-0.8803, -0.8452, -0.6877,  ...,  0.4153,  1.9034,  0.4678],
          [-1.5630, -1.2654, -0.6001,  ...,  1.8333,  1.2906, -0.9153],
          [-1.5105, -1.5805, -1.3529,  ...,  1.5882, -0.4251, -1.3880]],

         [[-1.4384, -0.6193, -0.6715,  ..., -1.4907, -1.3861, -1.3339],
          [-1

In [22]:
from itertools import groupby
from transformers import AutoModel,AutoConfig, AutoImageProcessor, AutoModelForObjectDetection, AutoFeatureExtractor
# AutoConfig.from_pretrained("clip")
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
# image_processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
image_processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
from PIL import Image
path = r"TACO\data\batch_3\IMG_4852.JPG"
image = Image.open(path)
# from datasets import load_dataset

# dataset = load_dataset("./dataset", 'mini-multi')
import json
annotations_path_waste = r"C:\Users\devic\OneDrive\Documents\mini_project\detect-waste\annotations\annotations_train.json"
annotations_train_waste = json.load(open(annotations_path_waste))
anns = [{'image_id': key, 'annotations': list(group)} for key, group in groupby(annotations_train_waste['annotations'], lambda x: x['image_id'])]
inputs = image_processor.preprocess(image,annotations=anns[0],
                           return_tensors="pt")
# model(dataset['train'][0]['images'] )

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [1]:
from datasets import load_dataset

dataset = load_dataset("./dataset", 'mini-multi')

KeyboardInterrupt: 

In [9]:
dataset['train'][0]

{'image_id': 2,
 'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1537x2049>,
 'width': 0,
 'height': 0,
 'objects': {'id': [3, 4],
  'area': [73832, 915],
  'bbox': [[632.0, 987.0, 500.0, 374.0], [632.0, 989.0, 44.0, 51.0]],
  'category': [1, 1]}}

In [12]:
import json
annotations_path_waste = r"C:\Users\devic\OneDrive\Documents\mini_project\detect-waste\annotations\annotations_train.json"
annotations_train_waste = json.load(open(annotations_path_waste))
annotations_train_waste['images'][0]

{'coco_url': None,
 'date_captured': None,
 'file_name': 'batch_1/000010.jpg',
 'flickr_640_url': 'https://farm66.staticflickr.com/65535/40888872753_631ab0f441_z.jpg',
 'flickr_url': 'https://farm66.staticflickr.com/65535/40888872753_08ffb24902_o.png',
 'height': 2049,
 'id': 2,
 'license': None,
 'width': 1537}

In [45]:
# 'convert_coco_poly_to_mask',
# 'do_normalize',
# 'do_pad',
# 'do_rescale',
# 'do_resize',
# 'format',

# 'image_mean',
# 'image_std',
# 'model_input_names',
# 'normalize',
# 'normalize_annotation',
# 'pad',
# 'post_process',
# 'post_process_object_detection',
# 'prepare_annotation',
# 'preprocess',
# 'resample',
# 'rescale',
# 'rescale_factor',
# 'resize',
# 'resize_annotation',
# 'size',
print(anns[0])
image_processor.normalize_annotation(anns[0], image_size=(100, 416))

{'image_id': 2, 'annotations': [{'area': 73832.5, 'bbox': [632.0, 987.0, 500.0, 374.0], 'category_id': 1, 'id': 3, 'image_id': 2, 'iscrowd': 0}, {'area': 915.0, 'bbox': [632.0, 989.0, 44.0, 51.0], 'category_id': 1, 'id': 4, 'image_id': 2, 'iscrowd': 0}]}


{'image_id': 2,
 'annotations': [{'area': 73832.5,
   'bbox': [632.0, 987.0, 500.0, 374.0],
   'category_id': 1,
   'id': 3,
   'image_id': 2,
   'iscrowd': 0},
  {'area': 915.0,
   'bbox': [632.0, 989.0, 44.0, 51.0],
   'category_id': 1,
   'id': 4,
   'image_id': 2,
   'iscrowd': 0}]}

In [18]:
 {
     'labels': [{'size': tensor([512, 682]), 'image_id': tensor([2]), 'class_labels': tensor([1, 1]), 'boxes': tensor([[0.2188, 0.3882, 0.1240, 0.1237],
        [0.1622, 0.3355, 0.0109, 0.0169]]), 'area': tensor([2114.4607,   26.2043]), 'iscrowd': tensor([0, 0]), 'orig_size': tensor([3024, 4032])}]}
dataset['train'][0]
{'labels':
    {
     'class_labels'   : [1]
     'boxes': [[1,2,3,4]]
    }
}

{'images': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1537x2049>,
 'annotations': {'image_id': 2,
  'annotations': {'image_id': [2, 2],
   'id': [3, 4],
   'area': [73832, 915],
   'bbox': [[632.0, 987.0, 500.0, 374.0], [632.0, 989.0, 44.0, 51.0]],
   'category_id': [0, 0],
   'iscrowd': [False, False]}}}

In [20]:
dataset['train'][0]['images']


AttributeError: shape

In [15]:
dataset['train'][0]

{'images': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1537x2049>,
 'annotations': {'image_id': 2,
  'annotations': {'image_id': [2, 2],
   'id': [3, 4],
   'area': [73832, 915],
   'bbox': [[632.0, 987.0, 500.0, 374.0], [632.0, 989.0, 44.0, 51.0]],
   'category_id': [0, 0],
   'iscrowd': [False, False]}}}

In [4]:
annotations_train_waste['categories']

[{'category': 'metals_and_plastic',
  'id': 1,
  'name': 'metals_and_plastic',
  'supercategory': ''},
 {'category': 'other', 'id': 2, 'name': 'other', 'supercategory': ''},
 {'category': 'non_recyclable',
  'id': 3,
  'name': 'non_recyclable',
  'supercategory': ''},
 {'category': 'glass', 'id': 4, 'name': 'glass', 'supercategory': ''},
 {'category': 'paper', 'id': 5, 'name': 'paper', 'supercategory': ''},
 {'category': 'bio', 'id': 6, 'name': 'bio', 'supercategory': ''},
 {'category': 'unknown', 'id': 7, 'name': 'unknown', 'supercategory': ''}]

In [168]:
import csv
from pathlib import Path
images = {}
with open(r"C:\Users\devic\OneDrive\Documents\mini_project\TACO\data\all_image_urls.csv") as f:
            reader = csv.DictReader(f, fieldnames=["flickr_640_url","flickr_url"])
            for row in reader:
                for url in row.values():
                    if url:
                        images[Path(url).name] = row
for image in annotations_train_waste['images']:
    name = Path(image['file_name']).name
    image['flickr_640_url'] = image.get('flickr_640_url')or  ''
    image['flickr_url'] = image.get('flickr_url') or ''
    if name in images:
        image.update(images[name])

In [163]:
'a' or ''

'a'

In [183]:
ids_to_download = set(map(lambda ann: ann['image_id'],annotations_train_waste['annotations']))
images_to_download = list(filter(lambda image: image['id'] in ids_to_download, annotations_train_waste['images']))


In [188]:
def download_url(image):
    file_path = Path(image['file_name'])
    url = image['flickr_640_url'] if Path(image['flickr_640_url']).name == file_path.name else image['flickr_url']
    return str(file_path), url
urls = dict(map(lambda image: download_url(image),images_to_download))
for key, value in urls.items():
    print(key, value)

batch_1\000010.jpg https://farm66.staticflickr.com/65535/40888872753_08ffb24902_o.png
batch_1\000019.jpg https://farm66.staticflickr.com/65535/47803331492_0e1085ca55_o.png
batch_1\000026.jpg https://farm66.staticflickr.com/65535/33978199868_88ee160849_o.png
batch_1\000047.jpg https://farm66.staticflickr.com/65535/33978200068_c6eed416ac_o.png
batch_1\000055.jpg https://farm66.staticflickr.com/65535/47803332212_af8cfa9704_o.png
batch_1\000001.jpg https://farm66.staticflickr.com/65535/33978202498_effbca58ef_o.png
batch_1\000005.jpg https://farm66.staticflickr.com/65535/47803335992_9c58683430_o.png
batch_1\000007.jpg https://farm66.staticflickr.com/65535/47855505601_f75a430abc_o.png
batch_1\000012.jpg https://farm66.staticflickr.com/65535/40888877173_855795c875_o.png
batch_1\000014.jpg https://farm66.staticflickr.com/65535/47066066634_c50443ca0c_o.png
batch_1\000048.jpg https://farm66.staticflickr.com/65535/47803337262_8e069056d4_o.png
batch_1\000053.jpg https://farm66.staticflickr.com/655

In [181]:
str(Path('aaa/a'))

'aaa\\a'

In [148]:
ids = set(map(lambda x:x['id'], samples_640))
ids

{1500, 1578, 1598, 1599, 1601, 1602, 1604, 1606, 1607, 1608}

In [150]:
list(map(lambda x: x['width'],samples_640))

[640, 480, 640, 480, 480, 640, 640, 480, 480, 360]

In [152]:
list(map(lambda x: x['height'],samples_640))

[480, 640, 480, 640, 640, 480, 480, 640, 640, 640]

In [147]:
list(filter(lambda ann: ann['image_id'] in ids, annotations_train_waste['annotations']))

[{'area': 1046.540100000001,
  'attributes': {'occluded': False},
  'bbox': [196.04, 217.03, 60.81, 17.21],
  'category_id': 1,
  'id': 4784,
  'image_id': 1500,
  'iscrowd': 0,
  'segmentation': []},
 {'area': 1207.0305,
  'attributes': {'occluded': False},
  'bbox': [187.19, 288.92, 30.55, 39.51],
  'category_id': 7,
  'id': 4989,
  'image_id': 1578,
  'iscrowd': 0,
  'segmentation': []},
 {'area': 3066.3299999999995,
  'attributes': {'occluded': False},
  'bbox': [344.44, 173.27, 64.5, 47.54],
  'category_id': 1,
  'id': 5031,
  'image_id': 1598,
  'iscrowd': 0,
  'segmentation': []},
 {'area': 2061.746399999999,
  'attributes': {'occluded': False},
  'bbox': [189.65, 176.72, 72.19, 28.56],
  'category_id': 1,
  'id': 5032,
  'image_id': 1598,
  'iscrowd': 0,
  'segmentation': []},
 {'area': 487.74960000000056,
  'attributes': {'occluded': False},
  'bbox': [192.14, 262.33, 23.16, 21.06],
  'category_id': 1,
  'id': 5033,
  'image_id': 1599,
  'iscrowd': 0,
  'segmentation': []},
 {

In [1]:
annotations_train_waste.keys()

NameError: name 'annotations_train_waste' is not defined

In [62]:
l = list(map(lambda x: images[Path(x).name], val.values()))
l[0] is l[1]

True

{'image_id': 2,
 'annotations': [{'area': 73832.5,
   'bbox': [632.0, 987.0, 500.0, 374.0],
   'category_id': 1,
   'id': 3,
   'image_id': 2,
   'iscrowd': 0},
  {'area': 915.0,
   'bbox': [632.0, 989.0, 44.0, 51.0],
   'category_id': 1,
   'id': 4,
   'image_id': 2,
   'iscrowd': 0}]}

In [50]:
annotations_train['images'][0]

{'coco_url': None,
 'date_captured': None,
 'file_name': 'batch_1/000010.jpg',
 'flickr_640_url': 'https://farm66.staticflickr.com/65535/40888872753_631ab0f441_z.jpg',
 'flickr_url': 'https://farm66.staticflickr.com/65535/40888872753_08ffb24902_o.png',
 'height': 2049,
 'id': 2,
 'license': None,
 'width': 1537}

In [None]:
{"annotations" :[annotations_train['annotations'][0]]}

In [None]:
annotations_train['images'][0]

In [None]:
annotations

In [1]:
from datasets import load_dataset
from datasets import DownloadConfig

dataset = load_dataset("./dataset",download_config=DownloadConfig(num_proc=8))



{'annotations_train': 'C:\\Users\\devic\\.cache\\huggingface\\datasets\\downloads\\83a0d8d43cddb0cbb24aae6d88bf1cb0e125be1b7d6358707d7f53ca92505548.38f63b4556b0492aa833afa96460b1920b7a4f12dea0626374f4c401b43484f5', 'annotations_test': 'C:\\Users\\devic\\.cache\\huggingface\\datasets\\downloads\\e64415acfb6e7a3b1c118929fc9be8be7ac5093fd78b2e471f23fa5509e1a68e.80d2da5585abbc3028d862965c20531e510dc53073d88fb267486f6e6664b5f1', 'taco_all_image_urls_csv': 'C:\\Users\\devic\\.cache\\huggingface\\datasets\\downloads\\2acf44e080d20c7d42b80e2734f991a4272aa9eee7f53bb3656f7fb3e454630b.607b80853ccd0c0882354f29f8b80ee9e90d9e110d03b48c5f6120cd09d916e8'}
