In [1]:
import torch
import torch.nn as nn
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models import resnet50

In [None]:
class DETRWithFasterRCNNBackbone(nn.Module):
    """
    Demo DETR implementation.

    Demo implementation of DETR in minimal number of lines, with the
    following differences wrt DETR in the paper:
    * learned positional encoding (instead of sine)
    * positional encoding is passed at input (instead of attention)
    * fc bbox predictor (instead of MLP)
    The model achieves ~40 AP on COCO val5k and runs at ~28 FPS on Tesla V100.
    Only batch size 1 supported.
    """
    def __init__(self, num_classes, hidden_dim=256, nheads=8,
                 num_encoder_layers=6, num_decoder_layers=6):
        super().__init__()

        # create ResNet-50 backbone
        self.backbone = resnet50()
        del self.backbone.fc

        # create conversion layer
        self.conv = nn.Conv2d(2048, hidden_dim, 1)

        # create a default PyTorch transformer
        self.transformer = nn.Transformer(
            hidden_dim, nheads, num_encoder_layers, num_decoder_layers)

        # prediction heads, one extra class for predicting non-empty slots
        # note that in baseline DETR linear_bbox layer is 3-layer MLP
        self.linear_class = nn.Linear(hidden_dim, num_classes + 1)
        self.linear_bbox = nn.Linear(hidden_dim, 4)

        # output positional encodings (object queries)
        self.query_pos = nn.Parameter(torch.rand(100, hidden_dim))

        # spatial positional encodings
        # note that in baseline DETR we use sine positional encodings
        self.row_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))
        self.col_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))

    def forward(self, inputs):
        # propagate inputs through ResNet-50 up to avg-pool layer
        x = self.backbone.conv1(inputs)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        # convert from 2048 to 256 feature planes for the transformer
        h = self.conv(x)

        # construct positional encodings
        H, W = h.shape[-2:]
        pos = torch.cat([
            self.col_embed[:W].unsqueeze(0).repeat(H, 1, 1),
            self.row_embed[:H].unsqueeze(1).repeat(1, W, 1),
        ], dim=-1).flatten(0, 1).unsqueeze(1)

        # propagate through the transformer
        h = self.transformer(pos + 0.1 * h.flatten(2).permute(2, 0, 1),
                             self.query_pos.unsqueeze(1)).transpose(0, 1)

        # finally project transformer outputs to class labels and bounding boxes
        return {'pred_logits': self.linear_class(h),
                'pred_boxes': self.linear_bbox(h).sigmoid()}

In [128]:
class DETRWithFasterRCNNBackbone(nn.Module):
    def __init__(self, num_classes, hidden_dim=256, nheads=8,
                 num_encoder_layers=6, num_decoder_layers=6):
        super().__init__()

        # Create Faster R-CNN backbone
        self.backbone = fasterrcnn_resnet50_fpn(pretrained=True)
        # del self.backbone.fc

        # create conversion layer
        self.conv = nn.Conv2d(2048, hidden_dim, 1)

        # create a default PyTorch transformer
        self.transformer = nn.Transformer(
            hidden_dim, nheads, num_encoder_layers, num_decoder_layers)

        # prediction heads, one extra class for predicting non-empty slots
        # note that in baseline DETR linear_bbox layer is 3-layer MLP
        self.linear_class = nn.Linear(hidden_dim, num_classes + 1)
        self.linear_bbox = nn.Linear(hidden_dim, 4)

        # output positional encodings (object queries)
        self.query_pos = nn.Parameter(torch.rand(100, hidden_dim))

        # spatial positional encodings
        # note that in baseline DETR we use sine positional encodings
        self.row_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))
        self.col_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))


    def forward(self, inputs, targets):
        # Propagate inputs through Faster R-CNN backbone
        features = self.backbone(inputs)
        h = features[0]  # Extract the desired feature level (adjust if needed)

        # Extract boxes and labels from the targets dictionary
        boxes = [target["boxes"] for target in targets]
        labels = [target["labels"] for target in targets]

        # Convert features to the desired shape for transformer input
        H, W = h.shape[-2:]
        h_flatten = h.flatten(2).permute(2, 0, 1)  # Reshape features for transformer input

        # Construct positional encodings
        pos = torch.cat([
            self.col_embed[:W].unsqueeze(0).repeat(H, 1, 1),
            self.row_embed[:H].unsqueeze(1).repeat(1, W, 1),
        ], dim=-1).flatten(0, 1).unsqueeze(1)

        # Propagate through the transformer
        h_with_pos = h_flatten + 0.1 * pos
        h_transformed = self.transformer(h_with_pos, self.query_pos.unsqueeze(1))
        h_transformed = h_transformed.transpose(0, 1)

        # Finally project transformer outputs to class labels and bounding boxes
        transformer_outputs = {
            'pred_logits': self.linear_class(h_transformed),
            'pred_boxes': self.linear_bbox(h_transformed).sigmoid()
        }

        # Compute the loss using the predictions and targets
        loss = compute_loss(transformer_outputs, boxes, labels)  # You need to implement compute_loss

        return loss




In [6]:
# Create the model
num_classes = 42  # Example: 40
model = DETRWithFasterRCNNBackbone(num_classes=num_classes)



# **dataset**

In [7]:
import pandas as pd

# Load class descriptions
class_descriptions_file = '/content/drive/MyDrive/OIDv4_ToolKit/OID/csv_folder/class-descriptions-boxable.csv'
class_descriptions_df = pd.read_csv(class_descriptions_file, header=None, names=['LabelName', 'ClassName'])
class_dict = dict(zip(class_descriptions_df['LabelName'], class_descriptions_df['ClassName']))


In [8]:
print(class_descriptions_df.head(12))


      LabelName   ClassName
0     /m/011k07    Tortoise
1   /m/011q46kg   Container
2     /m/012074      Magpie
3     /m/0120dh  Sea turtle
4     /m/01226z    Football
5     /m/012n7d   Ambulance
6     /m/012w5l      Ladder
7     /m/012xff  Toothbrush
8     /m/012ysf     Syringe
9     /m/0130jx        Sink
10    /m/0138tl         Toy
11    /m/013y1f       Organ


In [9]:
from PIL import Image

def get_image_size(image_path):
    with Image.open(image_path) as img:
        return img.size  # Returns a tuple (width, height)


In [10]:
import os

# Path to the directory containing all class subfolders
data_dir = '/content/drive/MyDrive/OIDv4_ToolKit/OID/Dataset/train/'

# Create a list to store images with annotations
data = []
# Create a mapping from class labels to integer indices
class_labels = []
# Iterate through class subfolders
for class_name in os.listdir(data_dir):
    class_dir = os.path.join(data_dir, class_name)

    # Skip if not a directory
    if not os.path.isdir(class_dir):
        continue
    class_labels.append(class_name)
    # Iterate through images in the class subfolder
    for image_file in os.listdir(class_dir):
        if image_file.endswith('.jpg'):
            image_id = os.path.splitext(image_file)[0]

            image_path = os.path.join(class_dir, image_file)
            image_width, image_height = get_image_size(image_path)

            # Load annotation file for the image
            annotation_file = os.path.join(class_dir, 'Label', f'{image_id}.txt')

            with open(annotation_file, 'r') as f:
                lines = f.readlines()

            bbox_annotations = []
            for line in lines:
                values = line.strip().split()
                class_label = '_'.join(values[:len(values)-4])  # Join class name parts with underscores
                x_min, y_min, x_max, y_max = map(float, values[len(values)-4:])
                bbox_annotations.append({
                    'bbox': [x_min, y_min, x_max, y_max],
                    'category_id': class_label
                })

            data.append({
                'file_name': image_path,
                'image_id': image_id,
                'height': image_height,
                'width': image_width,
                'annotations': bbox_annotations
            })

# Now you can print the first element in the data list
print(data[0])


{'file_name': '/content/drive/MyDrive/OIDv4_ToolKit/OID/Dataset/train/Whisk/0320649bd28a44a1.jpg', 'image_id': '0320649bd28a44a1', 'height': 683, 'width': 1024, 'annotations': [{'bbox': [0.0, 0.0, 430.08, 496.552611], 'category_id': 'Whisk'}]}


In [11]:
print(data[1])
print(data[4])


{'file_name': '/content/drive/MyDrive/OIDv4_ToolKit/OID/Dataset/train/Whisk/08d454219a3a52b9.jpg', 'image_id': '08d454219a3a52b9', 'height': 1024, 'width': 1024, 'annotations': [{'bbox': [0.0, 69.12, 851.2, 819.84], 'category_id': 'Whisk'}]}
{'file_name': '/content/drive/MyDrive/OIDv4_ToolKit/OID/Dataset/train/Whisk/17d1b0091181382e.jpg', 'image_id': '17d1b0091181382e', 'height': 683, 'width': 1024, 'annotations': [{'bbox': [193.876992, 84.009, 291.499008, 505.42], 'category_id': 'Whisk'}, {'bbox': [381.611008, 199.43599999999998, 469.675008, 626.994], 'category_id': 'Whisk'}, {'bbox': [531.796992, 206.94899999999998, 621.908992, 631.775], 'category_id': 'Whisk'}, {'bbox': [684.032, 50.541999999999994, 801.451008, 545.034], 'category_id': 'Whisk'}]}


# **DATASET DOWNLOAD**

In [11]:
!pwd

/content


In [12]:
!git clone https://github.com/facebookresearch/detectron2.git


Cloning into 'detectron2'...
remote: Enumerating objects: 15188, done.[K
remote: Counting objects: 100% (209/209), done.[K
remote: Compressing objects: 100% (130/130), done.[K
remote: Total 15188 (delta 117), reused 133 (delta 79), pack-reused 14979[K
Receiving objects: 100% (15188/15188), 6.22 MiB | 11.60 MiB/s, done.
Resolving deltas: 100% (10984/10984), done.


In [13]:
%cd /content/detectron2


/content/detectron2


In [None]:
!python -m pip install -e .


# **train**

In [12]:
# Create a mapping from class labels to integer indices
class_to_idx = {class_label: idx for idx, class_label in enumerate(class_labels)}

In [13]:
class_to_idx

{'Whisk': 0,
 'Vase': 1,
 'Toy': 2,
 'Torch': 3,
 'Toilet paper': 4,
 'Tiara': 5,
 'Teddy bear': 6,
 'Sunglasses': 7,
 'Stapler': 8,
 'Spoon': 9,
 'Sock': 10,
 'Screwdriver': 11,
 'Sandal': 12,
 'Ruler': 13,
 'Remote control': 14,
 'Power plugs and sockets': 15,
 'Plate': 16,
 'Pencil case': 17,
 'Pen': 18,
 'Personal care': 19,
 'Necklace': 20,
 'Mug': 21,
 'Mobile phone': 22,
 'Mirror': 23,
 'Laptop': 24,
 'Lamp': 25,
 'Kitchen knife': 26,
 'Ipod': 27,
 'Headphones': 28,
 'Hat': 29,
 'Glasses': 30,
 'Glove': 31,
 'Eraser': 32,
 'Doll': 33,
 'Clock': 34,
 'Candle': 35,
 'Calculator': 36,
 'Briefcase': 37,
 'Book': 38,
 'Bottle': 39,
 'Belt': 40,
 'Ball': 41}

In [16]:
class CustomDataset(Dataset):
    def __init__(self, data, class_labels, transforms=None):
        self.data = data
        self.class_labels = class_labels
        self.class_to_idx = {class_label: idx for idx, class_label in enumerate(self.class_labels)}
        self.transforms = transforms

    def __len__(self):
        return len(self.data)


    def __getitem__(self, idx):
        img_info = self.data[idx]
        image = Image.open(img_info['file_name']).convert('RGB')

        bbox_annotations = img_info['annotations']
        boxes = []
        labels = []

        for anno in bbox_annotations:
            box = torch.tensor(anno['bbox'], dtype=torch.float32)

            # Preprocess the class label to match the format with underscores
            class_label_with_underscores = anno['category_id']
            class_label = class_label_with_underscores.replace('_', ' ')
            label = torch.tensor(self.class_to_idx[class_label], dtype=torch.int64)

            # label = torch.tensor(self.class_to_idx[anno['category_id']], dtype=torch.int64)
            boxes.append(box)
            labels.append(label)

        target = {
            'boxes': torch.stack(boxes),
            'labels': torch.stack(labels)
        }

        if self.transforms is not None:
            transformed_image = self.transforms(image)
            transformed_target = {
                'boxes': target['boxes'],
                'labels': target['labels']
            }

            return transformed_image, transformed_target

        return image, target




In [17]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


# Define transformations to be applied to both images and targets
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to tensor
    # Add more transformations as needed
])

# Create an instance of your custom dataset with transformations
train_dataset = CustomDataset(data, class_labels, transforms=transform)

# Create a DataLoader for training
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: x)



In [None]:

# Set up training parameters
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

In [19]:

# Define optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [20]:
# Print the structure of a batch from the train_loader
for batch_idx, (images, targets) in enumerate(train_loader):
    print(f"Batch {batch_idx + 1}:")
    print("Images:")
    print(images)
    print("Targets:")
    print(targets)
    break  # Print only the first batch for inspection


Batch 1:
Images:
(tensor([[[0.7216, 0.7255, 0.7294,  ..., 0.7922, 0.7922, 0.7922],
         [0.7216, 0.7255, 0.7255,  ..., 0.7843, 0.7843, 0.7843],
         [0.7255, 0.7216, 0.7216,  ..., 0.7843, 0.7843, 0.7843],
         ...,
         [0.6549, 0.6549, 0.6549,  ..., 0.5059, 0.5059, 0.5059],
         [0.6588, 0.6549, 0.6549,  ..., 0.5020, 0.4980, 0.4980],
         [0.6588, 0.6549, 0.6549,  ..., 0.4941, 0.4941, 0.4941]],

        [[0.7059, 0.7098, 0.7137,  ..., 0.7647, 0.7647, 0.7647],
         [0.7059, 0.7098, 0.7098,  ..., 0.7569, 0.7569, 0.7569],
         [0.7098, 0.7059, 0.7059,  ..., 0.7569, 0.7569, 0.7569],
         ...,
         [0.6510, 0.6510, 0.6510,  ..., 0.4745, 0.4745, 0.4745],
         [0.6549, 0.6510, 0.6510,  ..., 0.4706, 0.4667, 0.4667],
         [0.6549, 0.6510, 0.6510,  ..., 0.4627, 0.4627, 0.4627]],

        [[0.7020, 0.7059, 0.7098,  ..., 0.7333, 0.7333, 0.7333],
         [0.7020, 0.7059, 0.7059,  ..., 0.7255, 0.7255, 0.7255],
         [0.7059, 0.7020, 0.7020,  ..., 

In [None]:
import torch
import torch.optim as optim

num_epochs = 5
# Training loop
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (images, targets) in enumerate(train_loader):

        # Print the shape and contents of the targets tensor
        print(images)
        print(targets)
        optimizer.zero_grad()
        predictions = model(images)
        loss = criterion(predictions, targets)
        loss.backward()
        optimizer.step()

    # Validation and model evaluation
    model.eval()
    with torch.no_grad():
        for batch_idx, (val_images, val_targets) in enumerate(val_loader):
            val_predictions = model(val_images)
            val_loss = criterion(val_predictions, val_targets)
            # Calculate validation metrics (e.g., mAP) here

    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {loss.item()} - Val Loss: {val_loss.item()}")


In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss() + nn.SmoothL1Loss()  # Example combined loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
def compute_combined_loss(outputs, targets, criterion):
    class_logits = outputs['pred_logits']
    pred_boxes = outputs['pred_boxes']
    target_classes = targets['class_labels']
    target_boxes = targets['bounding_boxes']

    # Compute classification loss
    class_loss = criterion(class_logits, target_classes)

    # Compute regression loss
    reg_loss = criterion(pred_boxes, target_boxes)

    # Combine the losses
    combined_loss = class_loss + reg_loss
    return combined_loss


def evaluate(model, validation_loader):
    model.eval()
    total_loss = 0.0
    metrics = {}  # You can define and calculate your evaluation metrics here

    with torch.no_grad():
        for images, targets in validation_loader:
            outputs = model(images)
            loss = compute_combined_loss(outputs, targets, criterion)
            total_loss += loss.item()
            # Calculate and update metrics

    avg_loss = total_loss / len(validation_loader)
    return avg_loss, metrics


In [None]:
import torch
import torch.optim as optim

# Create DataLoader and model (using DETRWithPretrainedTransformer)

# Training loop
num_epochs = 10  # Example number of epochs
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    for images, targets in train_loader:
        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        outputs = model(images)

        # Compute loss
        loss = compute_combined_loss(outputs, targets, criterion)  # Implement this function

        # Backpropagation
        loss.backward()  # Compute gradients

        # Update parameters
        optimizer.step()  # Update model weights

    # Validation
    if epoch % 2 == 0:  # Validate every 2 epochs
        model.eval()  # Set the model to evaluation mode
        with torch.no_grad():
            validation_loss, validation_metrics = evaluate(model, validation_loader)  # Implement this function
            print(f"Epoch [{epoch}/{num_epochs}]: Validation Loss: {validation_loss}, Metrics: {validation_metrics}")

# Save the trained model
torch.save(model.state_dict(), "trained_model.pth")


In [None]:

# Test the model with a sample input
sample_input = torch.rand(1, 3, 224, 224)  # Example input shape
output = model(sample_input)
print(output)

# **DATASET download**

In [1]:
!git clone https://github.com/EscVM/OIDv4_ToolKit.git

Cloning into 'OIDv4_ToolKit'...
remote: Enumerating objects: 422, done.[K
remote: Total 422 (delta 0), reused 0 (delta 0), pack-reused 422[K
Receiving objects: 100% (422/422), 34.08 MiB | 11.05 MiB/s, done.
Resolving deltas: 100% (146/146), done.


In [2]:
%cd OIDv4_ToolKit

/content/OIDv4_ToolKit


In [3]:
!pip3 install -r requirements.txt


Collecting awscli (from -r requirements.txt (line 3))
  Downloading awscli-1.29.32-py3-none-any.whl (4.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
Collecting botocore==1.31.32 (from awscli->-r requirements.txt (line 3))
  Downloading botocore-1.31.32-py3-none-any.whl (11.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.1/11.1 MB[0m [31m56.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docutils<0.17,>=0.10 (from awscli->-r requirements.txt (line 3))
  Downloading docutils-0.16-py2.py3-none-any.whl (548 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m548.2/548.2 kB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting s3transfer<0.7.0,>=0.6.0 (from awscli->-r requirements.txt (line 3))
  Downloading s3transfer-0.6.2-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[

In [4]:
!python3 main.py

usage: main.py
       [-h]
       [--Dataset /path/to/OID/csv/]
       [-y]
       [--classes list of classes [list of classes ...]]
       [--type_csv 'train' or 'validation' or 'test' or 'all']
       [--sub Subset of human verified images or machine generated (h or m)]
       [--image_IsOccluded 1 or 0]
       [--image_IsTruncated 1 or 0]
       [--image_IsGroupOf 1 or 0]
       [--image_IsDepiction 1 or 0]
       [--image_IsInside 1 or 0]
       [--multiclasses 0 (default) or 1]
       [--n_threads [default 20]]
       [--noLabels]
       [--limit integer number]
       <command>
       'downloader',
       'visualizer'
       or
       'ill_downloader'.
main.py: error: the following arguments are required: <command> 'downloader', 'visualizer' or 'ill_downloader'.


**train_set**

In [5]:
!python3 main.py downloader -y --classes Whisk Vase Toy Torch "Toilet paper" Tiara "Teddy bear" "Sunglasses" Stapler Spoon Sock "Screwdriver" Sandal Ruler "Remote control" "Power plugs and sockets" Plate "Pencil case" Pen "Personal care" Necklace Mug "Mobile phone" Mirror Laptop Lamp "Kitchen knife" Ipod "Headphones" Hat "Glasses" Glove Eraser Doll Clock Candle Calculator Briefcase Book Bottle Belt Ball --type_csv train --image_IsGroupOf 0 --limit 90


[92m
		   ___   _____  ______            _    _    
		 .'   `.|_   _||_   _ `.         | |  | |   
		/  .-.  \ | |    | | `. \ _   __ | |__| |_  
		| |   | | | |    | |  | |[ \ [  ]|____   _| 
		\  `-'  /_| |_  _| |_.' / \ \/ /     _| |_  
		 `.___.'|_____||______.'   \__/     |_____|
	[0m
[92m
             _____                    _                 _             
            (____ \                  | |               | |            
             _   \ \ ___  _ _ _ ____ | | ___   ____  _ | | ____  ____ 
            | |   | / _ \| | | |  _ \| |/ _ \ / _  |/ || |/ _  )/ ___)
            | |__/ / |_| | | | | | | | | |_| ( ( | ( (_| ( (/ /| |    
            |_____/ \___/ \____|_| |_|_|\___/ \_||_|\____|\____)_|    
                                                          
        [0m
    [INFO] | Downloading Whisk.[0m
[91m   [ERROR] | Missing the class-descriptions-boxable.csv file.[0m
[94m[DOWNLOAD] | Automatic download.[0m
...72%, 0 MB, 54291 KB/s, 0 seconds passed...145%, 0

**val_set**

In [None]:
!python3 main.py downloader -y --classes Whisk Vase Toy Torch "Toilet paper" Tiara "Teddy bear" "Sunglasses" Stapler Spoon Sock "Screwdriver" Sandal Ruler "Remote control" "Power plugs and sockets" Plate "Pencil case" Pen "Personal care" Necklace Mug "Mobile phone" Mirror Laptop Lamp "Kitchen knife" Ipod "Headphones" Hat "Glasses" Glove Eraser Doll Clock Candle Calculator Briefcase Book Bottle Belt Ball --type_csv validation --image_IsGroupOf 0 --limit 5
# !python3 main.py downloader -y --classes Apple Orange --type_csv train --image_IsGroupOf 0 --limit 5

**test set**

In [7]:
!python3 main.py downloader -y --classes Whisk Vase Toy Torch "Toilet paper" Tiara "Teddy bear" "Sunglasses" Stapler Spoon Sock "Screwdriver" Sandal Ruler "Remote control" "Power plugs and sockets" Plate "Pencil case" Pen "Personal care" Necklace Mug "Mobile phone" Mirror Laptop Lamp "Kitchen knife" Ipod "Headphones" Hat "Glasses" Glove Eraser Doll Clock Candle Calculator Briefcase Book Bottle Belt Ball --type_csv test --image_IsGroupOf 0 --limit 2


[92m
		   ___   _____  ______            _    _    
		 .'   `.|_   _||_   _ `.         | |  | |   
		/  .-.  \ | |    | | `. \ _   __ | |__| |_  
		| |   | | | |    | |  | |[ \ [  ]|____   _| 
		\  `-'  /_| |_  _| |_.' / \ \/ /     _| |_  
		 `.___.'|_____||______.'   \__/     |_____|
	[0m
[92m
             _____                    _                 _             
            (____ \                  | |               | |            
             _   \ \ ___  _ _ _ ____ | | ___   ____  _ | | ____  ____ 
            | |   | / _ \| | | |  _ \| |/ _ \ / _  |/ || |/ _  )/ ___)
            | |__/ / |_| | | | | | | | | |_| ( ( | ( (_| ( (/ /| |    
            |_____/ \___/ \____|_| |_|_|\___/ \_||_|\____|\____)_|    
                                                          
        [0m
    [INFO] | Downloading Whisk.[0m
[91m   [ERROR] | Missing the test-annotations-bbox.csv file.[0m
[94m[DOWNLOAD] | Automatic download.[0m
...100%, 49 MB, 41184 KB/s, 1 seconds passed
[94m[DOWNLOAD

In [8]:
import shutil

# Source folder path (in Colab environment)
source_folder = '/content/OIDv4_ToolKit2'

# Destination folder path in Google Drive
destination_folder = '/content/drive/MyDrive/'

# Move the folder and its contents
shutil.move(source_folder, destination_folder)


'/content/drive/MyDrive/OIDv4_ToolKit2'

In [None]:
!zip -r OID.zip OID


In [17]:
from google.colab import files
files.download('OID.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>