In [None]:
# ============================================================
# ALL-IN-ONE SETUP NOTEBOOK - FIXED VERSION
# iFood 2019 AlexNet Project - Complete Automation
# 
# Repository: https://github.com/deftorch/alexnet-ifood2019
# Author: deftorch (qaidhaidaradila@gmail.com)
#
# FIXES:
# ‚úÖ Complete image download added
# ‚úÖ Repository structure verification
# ‚úÖ Better error handling
# ‚úÖ GPU checks improved
# ============================================================

# ============================================================
# PART 0: SANITY CHECK
# ============================================================
print("="*70)
print("üîç Pre-flight checks...")
print("="*70)

import sys

# Check Python version
python_version = sys.version_info
if python_version.major < 3 or (python_version.major == 3 and python_version.minor < 8):
    raise Exception(f"‚ùå Python 3.8+ required. Current: {sys.version}")

print(f"‚úÖ Python {python_version.major}.{python_version.minor}.{python_version.micro}")

# Check if running in Colab
try:
    from google.colab import drive
    print("‚úÖ Running in Google Colab")
except ImportError:
    print("‚ö†Ô∏è  Not in Colab - some features may not work")
    raise Exception("This notebook requires Google Colab")

print("\n‚úÖ Pre-flight checks passed!\n")

# ============================================================
# PART 1: MOUNT GOOGLE DRIVE & CREATE STRUCTURE
# ============================================================
print("="*70)
print("üöÄ iFood 2019 AlexNet Project - Automated Setup")
print("="*70)
print("\nPART 1: Setting up Google Drive...")
print("-"*70)

import os
import time

# Mount Drive
print(f"\nüìÇ Mounting Google Drive...")
drive.mount('/content/drive', force_remount=False)

# Define paths
DRIVE_ROOT = '/content/drive/MyDrive'
PROJECT_NAME = 'AlexNet_iFood2019'
PROJECT_PATH = os.path.join(DRIVE_ROOT, PROJECT_NAME)

# Create folder structure
folders = {
    'dataset': 'Dataset iFood 2019',
    'checkpoints': 'Model checkpoints',
    'evaluation_results': 'Evaluation metrics',
    'analysis_results': 'Analysis plots',
    'logs': 'Training logs'
}

print(f"\nüìÅ Creating project structure at:\n   {PROJECT_PATH}\n")
for folder, description in folders.items():
    folder_path = os.path.join(PROJECT_PATH, folder)
    os.makedirs(folder_path, exist_ok=True)
    print(f"  ‚úì {folder:20s} - {description}")

print("\n‚úÖ Google Drive setup complete!")

# ============================================================
# PART 2: INSTALL ALL DEPENDENCIES
# ============================================================
print("\n" + "="*70)
print("PART 2: Installing dependencies...")
print("-"*70)

!pip install --upgrade pip -q
!pip install -q torch torchvision torchaudio
!pip install -q pandas numpy pillow opencv-python scikit-learn
!pip install -q matplotlib seaborn tqdm
!pip install -q gdown requests

print("\n‚úÖ All dependencies installed!")

import torch
import pandas as pd
import numpy as np

print(f"\nüìä Installed versions:")
print(f"  PyTorch: {torch.__version__}")
print(f"  Pandas: {pd.__version__}")
print(f"  NumPy: {np.__version__}")
print(f"  CUDA available: {torch.cuda.is_available()}")

# ============================================================
# PART 3: DOWNLOAD DATASET WITH IMAGES
# ============================================================
print("\n" + "="*70)
print("PART 3: Dataset Download (WITH IMAGES)")
print("-"*70)

import requests
import tarfile
from tqdm import tqdm

DATASET_DIR = os.path.join(PROJECT_PATH, 'dataset')

def download_file(url, output_path):
    """Download file with progress bar"""
    print(f"  üì• Downloading from {url}")
    response = requests.get(url, stream=True, timeout=300)
    response.raise_for_status()
    
    total_size = int(response.headers.get('content-length', 0))
    
    with open(output_path, 'wb') as f, tqdm(
        total=total_size,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as pbar:
        for data in response.iter_content(chunk_size=1024*1024):
            f.write(data)
            pbar.update(len(data))

def extract_tar(tar_path, extract_to):
    """Extract tar file"""
    print(f"  üì¶ Extracting {os.path.basename(tar_path)}...")
    with tarfile.open(tar_path, 'r:*') as tar:
        tar.extractall(path=extract_to)

# Check if images already exist
def count_images(directory):
    if os.path.exists(directory) and os.path.isdir(directory):
        return len([f for f in os.listdir(directory) 
                   if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
    return 0

train_count = count_images(os.path.join(DATASET_DIR, 'train_images'))

if train_count < 100000:  # Threshold for "complete" dataset
    print("\n‚ö†Ô∏è  Images missing or incomplete!")
    print("üì¶ Downloading iFood 2019 dataset (~3.1 GB)\n")
    
    # Dataset URLs
    DATASETS = {
        'train': {
            'url': 'https://food-x.s3.amazonaws.com/train.tar',
            'size': '2.3 GB'
        },
        'val': {
            'url': 'https://food-x.s3.amazonaws.com/val.tar',
            'size': '231 MB'
        },
        'test': {
            'url': 'https://food-x.s3.amazonaws.com/test.tar',
            'size': '548 MB'
        }
    }
    
    DOWNLOAD_DIR = '/content/ifood_downloads'
    os.makedirs(DOWNLOAD_DIR, exist_ok=True)
    
    # Download and extract each split
    for i, (name, info) in enumerate(DATASETS.items(), 1):
        print(f"\n[{i}/3] {name.upper()} SET ({info['size']})")
        print("-"*70)
        
        tar_path = os.path.join(DOWNLOAD_DIR, f"{name}.tar")
        
        # Download
        try:
            download_file(info['url'], tar_path)
        except Exception as e:
            print(f"  ‚ùå Download failed: {e}")
            print(f"  üí° Try downloading manually from: {info['url']}")
            continue
        
        # Extract
        try:
            extract_tar(tar_path, DATASET_DIR)
            
            # Rename to standard names
            old_path = os.path.join(DATASET_DIR, name)
            new_path = os.path.join(DATASET_DIR, f'{name}_images')
            
            if os.path.exists(old_path) and not os.path.exists(new_path):
                os.rename(old_path, new_path)
            
            print(f"  ‚úÖ {name} complete!")
            
        except Exception as e:
            print(f"  ‚ùå Extraction failed: {e}")
            continue
        finally:
            # Cleanup
            if os.path.exists(tar_path):
                os.remove(tar_path)
    
    print("\n‚úÖ Dataset download complete!")
else:
    print(f"\n‚úÖ Images already exist ({train_count:,} training images)")
    print("‚è© Skipping download")

# ============================================================
# PART 4: VERIFY DATASET
# ============================================================
print("\n" + "="*70)
print("PART 4: Verifying dataset")
print("-"*70)

required = {
    'files': ['train_info.csv', 'val_info.csv', 'test_info.csv', 'class_list.txt'],
    'dirs': ['train_images', 'val_images', 'test_images']
}

print("\nüìÑ Checking files:")
for file in required['files']:
    path = os.path.join(DATASET_DIR, file)
    exists = os.path.exists(path)
    print(f"  {'‚úÖ' if exists else '‚ùå'} {file}")

print("\nüìÅ Checking image directories:")
total_images = 0
for dir_name in required['dirs']:
    path = os.path.join(DATASET_DIR, dir_name)
    count = count_images(path)
    total_images += count
    print(f"  {'‚úÖ' if count > 0 else '‚ùå'} {dir_name:15s} - {count:,} images")

if total_images > 0:
    print(f"\n‚úÖ Dataset verified! Total images: {total_images:,}")
else:
    print(f"\n‚ùå No images found!")
    print(f"üí° Check dataset manually or re-run PART 3")

# ============================================================
# PART 5: CLONE REPOSITORY
# ============================================================
print("\n" + "="*70)
print("PART 5: Cloning repository")
print("-"*70)

REPO_URL = "https://github.com/deftorch/alexnet-ifood2019.git"
REPO_DIR = "/content/alexnet-ifood2019"

if os.path.exists(REPO_DIR):
    print("\n‚è≥ Removing old repository...")
    !rm -rf {REPO_DIR}

print(f"\nüì• Cloning from: {REPO_URL}")
!git clone -q {REPO_URL} {REPO_DIR}

os.chdir(REPO_DIR)
print(f"‚úÖ Repository cloned!")
print(f"üìÇ Working directory: {os.getcwd()}")

# Verify repository structure
print("\nüìÅ Verifying repository structure:")
required_paths = [
    'src',
    'src/models',
    'src/models/alexnet.py',
    'src/data_loader.py',
    'src/train.py',
    'src/evaluate.py'
]

all_exist = True
for path in required_paths:
    full_path = os.path.join(REPO_DIR, path)
    exists = os.path.exists(full_path)
    print(f"  {'‚úÖ' if exists else '‚ùå'} {path}")
    if not exists:
        all_exist = False

if not all_exist:
    print("\n‚ùå Repository structure incomplete!")
    print("üí° Make sure all source files are pushed to GitHub")
    print("üí° Or use the generated source code files")
else:
    print("\n‚úÖ Repository structure verified!")

# ============================================================
# PART 6: CREATE SYMBOLIC LINKS
# ============================================================
print("\n" + "="*70)
print("PART 6: Creating symbolic links")
print("-"*70)

LINKS = {
    'data': 'dataset',
    'checkpoints': 'checkpoints',
    'evaluation_results': 'evaluation_results',
    'analysis_results': 'analysis_results',
    'logs': 'logs'
}

print("\nüîó Creating links...\n")

for local_name, drive_folder in LINKS.items():
    local_path = os.path.join(REPO_DIR, local_name)
    drive_path = os.path.join(PROJECT_PATH, drive_folder)
    
    if os.path.exists(local_path):
        !rm -rf {local_path}
    
    !ln -s {drive_path} {local_path}
    
    if os.path.islink(local_path):
        print(f"  ‚úÖ {local_name:20s} -> {drive_folder}")
    else:
        print(f"  ‚ùå {local_name:20s} - Failed")

print("\n‚úÖ All links created!")

# ============================================================
# PART 7: SETUP PYTHON ENVIRONMENT
# ============================================================
print("\n" + "="*70)
print("PART 7: Configuring Python environment")
print("-"*70)

paths_to_add = [
    REPO_DIR,
    os.path.join(REPO_DIR, 'src'),
]

for path in paths_to_add:
    if path not in sys.path:
        sys.path.insert(0, path)
        print(f"‚úÖ Added to path: {path}")

# ============================================================
# PART 8: TEST IMPORTS
# ============================================================
print("\n" + "="*70)
print("PART 8: Testing imports")
print("-"*70)

try:
    from src.models.alexnet import get_model
    from src.data_loader import get_dataloaders
    
    print("\n‚úÖ All imports successful!")
    
    # Test model creation
    print("\nüß™ Testing models:")
    for model_name in ['alexnet_baseline', 'alexnet_mod1', 
                       'alexnet_mod2', 'alexnet_combined']:
        model = get_model(model_name, num_classes=251)
        params = sum(p.numel() for p in model.parameters())
        print(f"  ‚úÖ {model_name:20s} - {params:,} parameters")
    
except Exception as e:
    print(f"\n‚ùå Import error: {e}")
    print("\nüí° Make sure:")
    print("  1. Repository has complete src/ folder")
    print("  2. All Python files are present")
    print("  3. __init__.py files exist")

# ============================================================
# PART 9: GPU CONFIGURATION
# ============================================================
print("\n" + "="*70)
print("PART 9: GPU configuration")
print("-"*70)

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    
    print(f"\nüéÆ GPU Information:")
    print(f"  Device: {gpu_name}")
    print(f"  Memory: {gpu_mem:.2f} GB")
    
    # Recommended batch size
    if gpu_mem >= 15:
        batch_size = 128
        print(f"  Recommended batch size: 128-256")
    elif gpu_mem >= 12:
        batch_size = 64
        print(f"  Recommended batch size: 64-128")
    else:
        batch_size = 32
        print(f"  Recommended batch size: 32-64")
    
    print("\n‚úÖ GPU ready for training!")
    
else:
    print("\n‚ö†Ô∏è  WARNING: GPU NOT AVAILABLE!")
    print("\nüí° To enable GPU:")
    print("  1. Runtime > Change runtime type")
    print("  2. Hardware accelerator > GPU (T4)")
    print("  3. Save and reconnect")
    print("\n‚ö†Ô∏è  Training on CPU will be EXTREMELY slow!")
    batch_size = 32

# ============================================================
# FINAL SUMMARY
# ============================================================
print("\n" + "="*70)
print("‚ú® SETUP COMPLETE ‚ú®")
print("="*70)

print(f"""
‚úÖ All components ready!

üìÇ Project: {PROJECT_PATH}
üì¶ Repository: {REPO_DIR}
üìä Dataset: {total_images:,} images
üéÆ GPU: {"‚úÖ " + (gpu_name if torch.cuda.is_available() else "‚ùå Not available")}

üöÄ Start Training:
  python src/train.py \\
      --data_dir data \\
      --model_name alexnet_baseline \\
      --num_epochs 50 \\
      --batch_size {batch_size} \\
      --lr 0.01

üìä Evaluate Model:
  python src/evaluate.py \\
      --model_name alexnet_baseline \\
      --checkpoint checkpoints/alexnet_baseline/best_model.pth

üí° All data auto-saves to Google Drive!

{"="*70}
""")