# üì• Download Dataset iFood 2019 - Otomatis

Notebook ini akan **otomatis mendownload dan mengekstrak** dataset iFood 2019 ke Google Drive.

### Dataset Info:
| File | Size | Isi |
|------|------|-----|
| Annotations | 3 MB | Labels & class list |
| Train Images | 2.3 GB | 120,216 gambar |
| Val Images | 231 MB | 12,170 gambar |
| Test Images | 548 MB | 28,399 gambar |

**Total: ~3.1 GB**

---

In [None]:
# ============================================================
# STEP 1: Mount Google Drive
# ============================================================

from google.colab import drive
drive.mount('/content/drive')

import os

# Buat folder struktur
PROJECT_PATH = '/content/drive/MyDrive/AlexNet_iFood2019'
DATASET_PATH = os.path.join(PROJECT_PATH, 'dataset')

os.makedirs(DATASET_PATH, exist_ok=True)
os.makedirs(os.path.join(PROJECT_PATH, 'checkpoints'), exist_ok=True)
os.makedirs(os.path.join(PROJECT_PATH, 'evaluation_results'), exist_ok=True)
os.makedirs(os.path.join(PROJECT_PATH, 'analysis_results'), exist_ok=True)

print(f"‚úÖ Google Drive mounted")
print(f"üìÅ Dataset akan disimpan di: {DATASET_PATH}")

In [None]:
# ============================================================
# STEP 2: Download Dataset
# ============================================================

import os
import urllib.request
import tarfile
import time
from tqdm import tqdm

# Dataset URLs dari iFood 2019 Official
DATASET_URLS = {
    'annotations': {
        'url': 'https://food-x.s3.amazonaws.com/annot.tar',
        'filename': 'annot.tar',
        'size': '3 MB',
        'md5': '0c632c543ceed0e70f0eb2db58eda3ab'
    },
    'train': {
        'url': 'https://food-x.s3.amazonaws.com/train.tar',
        'filename': 'train.tar',
        'size': '2.3 GB',
        'md5': '8e56440e365ee852dcb0953a9307e27f'
    },
    'val': {
        'url': 'https://food-x.s3.amazonaws.com/val.tar',
        'filename': 'val.tar',
        'size': '231 MB',
        'md5': 'fa9a4c1eb929835a0fe68734f4868d3b'
    },
    'test': {
        'url': 'https://food-x.s3.amazonaws.com/test.tar',
        'filename': 'test.tar',
        'size': '548 MB',
        'md5': '32479146dd081d38895e46bb93fed58f'
    }
}

class DownloadProgressBar(tqdm):
    def update_to(self, b=1, bsize=1, tsize=None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)

def download_file(url, output_path, desc):
    """Download file dengan progress bar"""
    with DownloadProgressBar(unit='B', unit_scale=True, miniters=1, desc=desc) as t:
        urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)

# Download semua file
print("üì• Mulai download dataset iFood 2019...")
print("="*60)

download_dir = '/content/downloads'
os.makedirs(download_dir, exist_ok=True)

for name, info in DATASET_URLS.items():
    output_path = os.path.join(download_dir, info['filename'])
    
    # Skip jika sudah ada
    if os.path.exists(output_path):
        print(f"‚è≠Ô∏è  {name}: sudah ada, skip download")
        continue
    
    print(f"\nüì• Downloading {name} ({info['size']})...")
    start_time = time.time()
    
    try:
        download_file(info['url'], output_path, info['filename'])
        elapsed = time.time() - start_time
        print(f"‚úÖ {name} selesai dalam {elapsed:.1f} detik")
    except Exception as e:
        print(f"‚ùå Error downloading {name}: {e}")

print("\n" + "="*60)
print("‚úÖ Semua file berhasil didownload!")

In [None]:
# ============================================================
# STEP 3: Extract ke Google Drive
# ============================================================

import tarfile
import shutil

download_dir = '/content/downloads'
DATASET_PATH = '/content/drive/MyDrive/AlexNet_iFood2019/dataset'

print("üì¶ Mulai ekstraksi ke Google Drive...")
print("="*60)

# 1. Extract annotations
print("\nüì¶ Extracting annotations...")
annot_tar = os.path.join(download_dir, 'annot.tar')
if os.path.exists(annot_tar):
    with tarfile.open(annot_tar, 'r') as tar:
        tar.extractall(DATASET_PATH)
    print("‚úÖ Annotations extracted")

# 2. Extract train images
print("\nüì¶ Extracting train images (ini akan memakan waktu ~10-15 menit)...")
train_tar = os.path.join(download_dir, 'train.tar')
train_dir = os.path.join(DATASET_PATH, 'train_images')
if os.path.exists(train_tar):
    os.makedirs(train_dir, exist_ok=True)
    with tarfile.open(train_tar, 'r') as tar:
        # Extract to temp then move
        tar.extractall('/content/temp_train')
    # Move images to correct folder
    src_dir = '/content/temp_train/train_set'
    if os.path.exists(src_dir):
        for f in os.listdir(src_dir):
            shutil.move(os.path.join(src_dir, f), train_dir)
        shutil.rmtree('/content/temp_train')
    print(f"‚úÖ Train images extracted: {len(os.listdir(train_dir))} files")

# 3. Extract val images
print("\nüì¶ Extracting validation images...")
val_tar = os.path.join(download_dir, 'val.tar')
val_dir = os.path.join(DATASET_PATH, 'val_images')
if os.path.exists(val_tar):
    os.makedirs(val_dir, exist_ok=True)
    with tarfile.open(val_tar, 'r') as tar:
        tar.extractall('/content/temp_val')
    src_dir = '/content/temp_val/val_set'
    if os.path.exists(src_dir):
        for f in os.listdir(src_dir):
            shutil.move(os.path.join(src_dir, f), val_dir)
        shutil.rmtree('/content/temp_val')
    print(f"‚úÖ Val images extracted: {len(os.listdir(val_dir))} files")

# 4. Extract test images
print("\nüì¶ Extracting test images...")
test_tar = os.path.join(download_dir, 'test.tar')
test_dir = os.path.join(DATASET_PATH, 'test_images')
if os.path.exists(test_tar):
    os.makedirs(test_dir, exist_ok=True)
    with tarfile.open(test_tar, 'r') as tar:
        tar.extractall('/content/temp_test')
    src_dir = '/content/temp_test/test_set'
    if os.path.exists(src_dir):
        for f in os.listdir(src_dir):
            shutil.move(os.path.join(src_dir, f), test_dir)
        shutil.rmtree('/content/temp_test')
    print(f"‚úÖ Test images extracted: {len(os.listdir(test_dir))} files")

print("\n" + "="*60)
print("‚úÖ Ekstraksi selesai!")

In [None]:
# ============================================================
# STEP 4: Verifikasi Dataset
# ============================================================

import os

DATASET_PATH = '/content/drive/MyDrive/AlexNet_iFood2019/dataset'

print("üîç Verifikasi dataset...")
print("="*60)

# Check files
required_items = {
    'class_list.txt': 'file',
    'train_info.csv': 'file',
    'val_info.csv': 'file',
    'test_info.csv': 'file',
    'train_images': 'dir',
    'val_images': 'dir',
    'test_images': 'dir'
}

all_ok = True
for item, item_type in required_items.items():
    path = os.path.join(DATASET_PATH, item)
    
    if item_type == 'file':
        exists = os.path.isfile(path)
    else:
        exists = os.path.isdir(path)
    
    if exists:
        if item_type == 'dir':
            count = len(os.listdir(path))
            print(f"‚úÖ {item}: {count:,} files")
        else:
            print(f"‚úÖ {item}")
    else:
        print(f"‚ùå {item}: TIDAK DITEMUKAN")
        all_ok = False

print("\n" + "="*60)
if all_ok:
    print("üéâ DATASET SIAP DIGUNAKAN!")
    print(f"\nLokasi: {DATASET_PATH}")
else:
    print("‚ö†Ô∏è  Ada file yang hilang, coba jalankan ulang dari Step 2")

In [None]:
# ============================================================
# STEP 5: Cleanup (Hapus file tar untuk hemat storage)
# ============================================================

import shutil

download_dir = '/content/downloads'

if os.path.exists(download_dir):
    size_before = sum(os.path.getsize(os.path.join(download_dir, f)) 
                      for f in os.listdir(download_dir) 
                      if os.path.isfile(os.path.join(download_dir, f)))
    
    shutil.rmtree(download_dir)
    print(f"üóëÔ∏è  Deleted download cache: {size_before / 1e9:.2f} GB freed")
else:
    print("‚úÖ No cache to clean")

print("\n" + "="*60)
print("üéâ SELESAI!")
print("="*60)
print("\nDataset sudah tersimpan di Google Drive.")
print("Anda bisa menutup notebook ini dan lanjut ke training.")
print("\nNotebook selanjutnya:")
print("  - train_member1_baseline.ipynb (Member 1)")
print("  - train_member2_mod1.ipynb (Member 2)")
print("  - train_member3_mod2.ipynb (Member 3)")
print("  - train_member4_combined.ipynb (Member 4)")