# Downloading Images


In [47]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [48]:
import json

def generate_download_list(annotation_file, output_file):
    """Generate a list of image URLs for download"""

    with open(annotation_file, 'r') as f:
        annotations = json.load(f)

    dataset_type = 'train2017' if 'train' in annotation_file else 'val2017'

    with open(output_file, 'w') as f:
        for img_info in annotations['images']:
            url = f"http://images.cocodataset.org/{dataset_type}/{img_info['file_name']}\n"
            f.write(url)

#Generate download lists
generate_download_list('/content/drive/MyDrive/DATASET/filtered_train2017.json', 'train_images.txt')
generate_download_list('/content/drive/MyDrive/DATASET/filtered_val2017.json', 'val_images.txt')

print("Download lists generated. Use:")
print("wget -i train_images.txt -P filtered_train2017/")
print("wget -i val_images.txt -P filtered_val2017/")

Download lists generated. Use:
wget -i train_images.txt -P filtered_train2017/
wget -i val_images.txt -P filtered_val2017/


In [49]:
with open('train_images.txt', 'r') as f:
  links = f.readlines()
  cntr=0
  for link in links:
    if link != ' ' or link != '\n':
      cntr+=1
  print(f'Number of images : {cntr}')

Number of images : 32801


In [50]:
with open('val_images.txt', 'r') as f:
  links = f.readlines()
  cntr=0
  for link in links:
    if link != ' ' or link != '\n':
      cntr+=1
  print(f'Number of images : {cntr}')

Number of images : 4082


In [51]:
#!wget -i val_images.txt -P /content/drive/MyDrive/COCO2017Val

In [52]:
#!wget -i train_images.txt -P /content/drive/MyDrive/COCODataset2017/COCO2017train

#Downloading labels

In [53]:
#paths
trainjson_path = '/content/drive/MyDrive/DATASET/filtered_train2017.json'
valjson_path = '/content/drive/MyDrive/DATASET/filtered_val2017.json'

In [54]:
with open (trainjson_path, 'r') as trainjson_path:
  data = json.load(trainjson_path)

In [55]:
imgs = data['images']
imgs

[{'license': 3,
  'file_name': '000000574769.jpg',
  'coco_url': 'http://images.cocodataset.org/train2017/000000574769.jpg',
  'height': 640,
  'width': 480,
  'date_captured': '2013-11-14 17:07:59',
  'flickr_url': 'http://farm8.staticflickr.com/7010/6728227647_3d5a0d55ee_z.jpg',
  'id': 574769},
 {'license': 4,
  'file_name': '000000060623.jpg',
  'coco_url': 'http://images.cocodataset.org/train2017/000000060623.jpg',
  'height': 427,
  'width': 640,
  'date_captured': '2013-11-14 17:24:15',
  'flickr_url': 'http://farm7.staticflickr.com/6080/6113512699_37b4c98473_z.jpg',
  'id': 60623},
 {'license': 2,
  'file_name': '000000005802.jpg',
  'coco_url': 'http://images.cocodataset.org/train2017/000000005802.jpg',
  'height': 479,
  'width': 640,
  'date_captured': '2013-11-14 17:28:25',
  'flickr_url': 'http://farm4.staticflickr.com/3810/9614287841_1b724dbbc5_z.jpg',
  'id': 5802},
 {'license': 2,
  'file_name': '000000374628.jpg',
  'coco_url': 'http://images.cocodataset.org/train2017/

In [56]:
path = '/content/drive/MyDrive/DATASET/'
for img in imgs :
  with open (path+'TrainLabels/'+img['file_name'].split('.')[0]+'.txt', 'w') as f:
    pass

In [57]:
images = {img['id']: img for img in data['images']}
categories = {cat['id']: idx for idx, cat in enumerate(data['categories'])}

In [58]:
anns = data['annotations']
for ann in anns :
    image_id = ann['image_id']
    img_info = images[image_id]
    img_w, img_h = img_info['width'], img_info['height']

    # Bounding box
    x_min, y_min, w, h = ann['bbox']
    x_center = (x_min + w / 2) / img_w
    y_center = (y_min + h / 2) / img_h
    w_norm = w / img_w
    h_norm = h / img_h

    class_id = categories[ann['category_id']]

    # YOLO txt file path
    txt_file = path+'TrainLabels/'+img_info['file_name'].split('.')[0]+'.txt'

    # Append annotation to file
    with open(txt_file, 'a') as f:
        f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n")

# Category Downloading

In [59]:
classes = {cat['id']: cat['name'] for cat in data['categories']}
with open(path+'Classes/classes.txt', 'w') as f:
    for class_id, class_name in classes.items():
      f.write(f"{class_name}\n")