1. Load ace step following the instructions in the README.
2. Run the following code: 

In [None]:
import os
import json
import random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import librosa
#from openai import OpenAI
from google.cloud import storage
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
import tempfile
import warnings
warnings.filterwarnings('ignore')
# Import necessary libraries for authentication
from google.colab import auth
from google.cloud import storage
import os

# Authenticate with Google Cloud
print("🔐 Authenticating with Google Cloud...")
auth.authenticate_user()

# Set up your specific Google Cloud projectr project ID from the screenshot
project_id = "bayes-beats"

In [None]:
#!/usr/bin/env python3
"""
Script to download checkpoint files from Google Cloud Storage to local content folder
"""

import os
from google.cloud import storage
from pathlib import Path

# Configuration
BUCKET_NAME = "uchicago-bayesian-bayes-beats"
LOCAL_DOWNLOAD_PATH = "/content/checkpoints"  # Where to save in your content folder

def list_experiment_folders(bucket):
    """List all experiment folders in the bucket"""
    print("📋 Available experiment folders:")
    blobs = bucket.list_blobs(prefix="experiments/")
    folders = set()
    
    for blob in blobs:
        # Extract folder name from blob path
        parts = blob.name.split('/')
        if len(parts) >= 2:
            folder_name = '/'.join(parts[:2])  # experiments/folder_name
            folders.add(folder_name)
    
    for i, folder in enumerate(sorted(folders), 1):
        print(f"   {i}. {folder}")
    
    return sorted(folders)

def download_entire_folder(specific_folder="experiments/ace_lora_finetune_20250526_023307/checkpoints"):
    """Download entire checkpoint folder from Google Cloud Storage with parallel downloads"""
    import concurrent.futures
    from threading import Lock
    
    # Initialize the GCS client
    try:
        client = storage.Client()
        bucket = client.bucket(BUCKET_NAME)
        print(f"✅ Successfully connected to bucket: {BUCKET_NAME}")
    except Exception as e:
        print(f"❌ Error connecting to GCS: {e}")
        return False

    print(f"📂 Downloading entire folder: {specific_folder}")

    # Create local download directory
    os.makedirs(LOCAL_DOWNLOAD_PATH, exist_ok=True)
    
    # List ALL files in the checkpoint folder (not just .ckpt files)
    print("🔍 Scanning folder contents...")
    all_blobs = list(bucket.list_blobs(prefix=specific_folder))
    
    # Filter out directory markers and get actual files
    file_blobs = [blob for blob in all_blobs if not blob.name.endswith('/') and blob.size > 0]
    
    if not file_blobs:
        print(f"❌ No files found in {specific_folder}")
        return False
    
    # Calculate total size
    total_size = sum(blob.size for blob in file_blobs)
    total_size_gb = total_size / (1024**3)
    
    print(f"🎯 Found {len(file_blobs)} files to download")
    print(f"📦 Total size: {total_size_gb:.2f} GB")
    
    # Show file breakdown
    file_types = {}
    for blob in file_blobs:
        ext = os.path.splitext(blob.name)[1] or 'no_ext'
        file_types[ext] = file_types.get(ext, 0) + 1
    
    print("📊 File types:")
    for ext, count in file_types.items():
        print(f"   • {ext}: {count} files")
    
    print(f"\n🚀 Starting parallel download to {LOCAL_DOWNLOAD_PATH}/")
    print("🚄 Using A100 GPU's high-bandwidth for faster downloads!")
    print("=" * 60)
    
    # Parallel download function
    def download_file(blob):
        try:
            # Preserve folder structure
            relative_path = blob.name.replace(specific_folder, '').lstrip('/')
            local_path = os.path.join(LOCAL_DOWNLOAD_PATH, relative_path)
            
            # Create subdirectories if needed
            local_dir = os.path.dirname(local_path)
            if local_dir:
                os.makedirs(local_dir, exist_ok=True)
            
            # Download file
            blob.download_to_filename(local_path)
            
            # Verify download
            if os.path.exists(local_path):
                file_size_mb = os.path.getsize(local_path) / (1024**2)
                return f"✅ {relative_path} ({file_size_mb:.1f} MB)"
            else:
                return f"❌ Failed: {relative_path}"
                
        except Exception as e:
            return f"❌ Error downloading {blob.name}: {e}"
    
    # Use ThreadPoolExecutor for parallel downloads
    # A100 has high memory bandwidth, so we can handle many concurrent downloads
    downloaded_count = 0
    failed_count = 0
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
        # Submit all download tasks
        future_to_blob = {executor.submit(download_file, blob): blob for blob in file_blobs}
        
        # Process completed downloads
        for future in concurrent.futures.as_completed(future_to_blob):
            result = future.result()
            print(result)
            
            if result.startswith("✅"):
                downloaded_count += 1
            else:
                failed_count += 1
    
    print("=" * 60)
    print(f"📈 Download Summary:")
    print(f"   ✅ Successfully downloaded: {downloaded_count} files")
    print(f"   ❌ Failed downloads: {failed_count} files")
    print(f"   📍 Local location: {LOCAL_DOWNLOAD_PATH}/")
    
    # List all downloaded files
    if downloaded_count > 0:
        print(f"\n📋 Downloaded files:")
        for root, dirs, files in os.walk(LOCAL_DOWNLOAD_PATH):
            for file in files:
                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(full_path, LOCAL_DOWNLOAD_PATH)
                size_mb = os.path.getsize(full_path) / (1024**2)
                print(f"   • {rel_path} ({size_mb:.1f} MB)")
    
    return downloaded_count > 0

def main():
    print("🚀 Starting FULL FOLDER download from Google Cloud Storage...")
    print(f"🪣 Source bucket: {BUCKET_NAME}")
    print(f"📂 Target folder: experiments/ace_lora_finetune_20250526_023307/checkpoints")
    print(f"📂 Download location: {LOCAL_DOWNLOAD_PATH}")
    print("💪 Leveraging A100 GPU high-bandwidth connection!")
    print("-" * 60)
    
    success = download_entire_folder()
    
    if success:
        print("\n🎉 Full folder download completed successfully!")
        print(f"💡 All checkpoint files and configs are now available in {LOCAL_DOWNLOAD_PATH}/")
        print(f"🔧 You can now use the directory with:")
        print(f"   acestep --checkpoint_path {LOCAL_DOWNLOAD_PATH}/ --port 7865")
        print("📁 This should auto-select the best checkpoint and load configs!")
    else:
        print("\n💥 Download failed. Please check the error messages above.")

if __name__ == "__main__":
    main()

## IMPORTANT: put the checkpoints folder in a directory called "unzip" 
Your /content directory should look like this:
.
├── ACE-Step/
├── outputs/
├── sample_data/
├── unzip/
│   └── checkpoints/

(click this cell to see this in a more intuitive way)

Run the following: 

In [None]:
!export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
!export CUDA_LAUNCH_BLOCKING=0

Choose the public URL below:

In [None]:
!acestep --checkpoint_path /unzip/checkpoints/ --port 7865 --device_id 0 --share true --torch_compile true --cpu_offload false --overlapped_decode true