<a href="https://colab.research.google.com/github/donbcolab/composable_vlms/blob/main/notebooks/vision_model_evaluation_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages (uncomment if needed)
!pip install -q pycocotools Pillow tqdm transformers torch datasets huggingface_hub wandb timm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m64.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m70.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.9/39.9 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━

In [None]:
import os
from google.colab import userdata
import wandb

# setting the WANDB_API_KEY environment variable
os.environ['WANDB_API_KEY'] = userdata.get('WANDB_API_KEY')

In [None]:
import os
import logging
import requests
import shutil
from typing import List, Dict, Any, Tuple
from pycocotools.coco import COCO
from datasets import load_dataset, Dataset
from huggingface_hub import HfApi, hf_hub_download
from tqdm import tqdm

# Configuration
CONFIG = {
    "annotation_file": '/content/annotations/instances_train2017.json',
    "image_directory": '/content/train2017/',
    "num_images": 500,
    "hf_dataset_name": "dwb2023/coco-subset-for-vision-evaluation",
    "output_directory": '/content/coco_subset/',
    "coco_annotations_url": "http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
    "coco_images_url": "http://images.cocodataset.org/zips/train2017.zip"
}

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')



In [None]:
def download_file(url: str, save_path: str):
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024
    with open(save_path, 'wb') as file, tqdm(
        desc=save_path,
        total=total_size,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as progress_bar:
        for data in response.iter_content(block_size):
            size = file.write(data)
            progress_bar.update(size)

def download_coco_data():
    import zipfile

    # Download annotations
    os.makedirs('/content/annotations', exist_ok=True)
    annotations_zip = '/content/annotations.zip'
    logging.info("Downloading COCO annotations...")
    download_file(CONFIG['coco_annotations_url'], annotations_zip)

    # Extract annotations
    logging.info("Extracting COCO annotations...")
    with zipfile.ZipFile(annotations_zip, 'r') as zip_ref:
        zip_ref.extractall('/content')
    os.remove(annotations_zip)

    # Download images
    os.makedirs(CONFIG['image_directory'], exist_ok=True)
    images_zip = '/content/train2017.zip'
    logging.info("Downloading COCO images...")
    download_file(CONFIG['coco_images_url'], images_zip)

    # Extract images
    logging.info("Extracting COCO images...")
    with zipfile.ZipFile(images_zip, 'r') as zip_ref:
        zip_ref.extractall('/content')
    os.remove(images_zip)

    logging.info("COCO data downloaded and extracted successfully")

In [None]:
def prepare_data() -> Tuple[List[Dict[str, Any]], COCO]:
    if not os.path.exists(CONFIG["annotation_file"]):
        logging.info("COCO annotations not found. Downloading COCO data...")
        download_coco_data()

    coco = COCO(CONFIG["annotation_file"])
    catIds = coco.getCatIds(catNms=['person', 'car'])
    imgIds = coco.getImgIds(catIds=catIds)
    images = coco.loadImgs(imgIds[:CONFIG["num_images"]])
    return images, coco

In [None]:
def prepare_and_upload_hf_dataset(images: List[Dict[str, Any]], coco: COCO, hf_dataset_name: str) -> Dataset:
    # Prepare the directory structure for ImageFolder
    for split in ['train', 'val']:
        os.makedirs(os.path.join(CONFIG["output_directory"], split), exist_ok=True)

    # Copy images and create metadata
    data = {'train': [], 'val': []}
    for idx, img in enumerate(tqdm(images, desc="Preparing dataset")):
        # Determine split (80% train, 20% val)
        split = 'train' if idx < 0.8 * len(images) else 'val'

        # Copy image
        src_path = os.path.join(CONFIG["image_directory"], img['file_name'])
        dst_path = os.path.join(CONFIG["output_directory"], split, img['file_name'])
        shutil.copy(src_path, dst_path)

        # Get object labels for the image
        ann_ids = coco.getAnnIds(imgIds=img['id'], catIds=coco.getCatIds(catNms=['person', 'car']), iscrowd=None)
        anns = coco.loadAnns(ann_ids)
        labels = [ann['category_id'] for ann in anns]

        # Add image and labels to metadata
        data[split].append({'image_path': dst_path, 'labels': labels})

    # Create the dataset
    dataset = DatasetDict({
        'train': Dataset.from_dict(data['train']),
        'val': Dataset.from_dict(data['val'])
    })

    # Set labels to integers
    label2id = {name: id for id, name in enumerate(['person', 'car'])}
    dataset = dataset.map(lambda batch: {'labels': [label2id[coco.loadCats(label)[0]['name']] for label in batch['labels']]})

    try:
        dataset.push_to_hub(hf_dataset_name)
        logging.info(f"Dataset pushed to Hugging Face: {hf_dataset_name}")
    except Exception as e:
        logging.error(f"Failed to push dataset to Hugging Face: {e}")
        logging.info("Continuing without uploading to Hugging Face")

    return dataset

In [None]:
def main():
    logging.info("Starting Vision Model Evaluation Setup")

    # Prepare data if it doesn't exist
    if not os.path.exists(CONFIG["output_directory"]) or not os.listdir(CONFIG["output_directory"]):
        images, coco = prepare_data()
        dataset = prepare_and_upload_hf_dataset(images, coco, CONFIG["hf_dataset_name"])
    else:
        # Load existing dataset
        dataset = load_dataset("imagefolder", data_dir=CONFIG["output_directory"])
        coco = COCO(CONFIG["annotation_file"])

    logging.info("Data preparation and loading complete")
    return dataset, coco
if __name__ == "__main__":
    dataset, coco = main()
    print("Dataset sample:", dataset['train'][0])
    print("Number of images:", len(dataset['train']))

Resolving data files:   0%|          | 0/400 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/100 [00:00<?, ?it/s]

loading annotations into memory...
Done (t=15.55s)
creating index...
index created!
Dataset sample: {'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x428 at 0x7B303A599630>, 'label': None}
Number of images: 400
