# 3D Data Management - Staging Operations

High-speed, concurrent staging system for production tasks.
Reads configuration from sheet, filters tasks, and copies folders to staging in parallel.

## Prerequisites
- Google account with access to the 3D Data Management sheet
- Production and Staging folder access
- Tasks with status='complete' and review_status='passed'

In [None]:
# Cell 1: Setup and Authentication with Shared Credentials
import os
import re
import time
import random
import threading
import socket
import ssl
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
import uuid

# Google API imports
from google.colab import auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import pandas as pd
from google.auth.transport.requests import Request
import google.auth

# HARDCODED SHEET ID - Replace with your actual Sheet ID
SHEET_ID = "1HmDdq5g0Zk7d7Uodbh7fIUFq5XiONDZyvrBBZQgNK0w"

# Global credentials storage
_global_credentials = None
thread_local = threading.local()

def get_credentials():
    """Get shared credentials"""
    global _global_credentials
    if _global_credentials is None:
        # Get default credentials from the authenticated session
        _global_credentials, _ = google.auth.default()
    return _global_credentials

def test_ssl_connection():
    """Test SSL connection to Google APIs"""
    try:
        context = ssl.create_default_context()
        with socket.create_connection(("www.googleapis.com", 443), timeout=10) as sock:
            with context.wrap_socket(sock, server_hostname="www.googleapis.com") as ssock:
                print(f"✅ SSL connection test: OK (Protocol: {ssock.version()})")
                return True
    except Exception as e:
        print(f"⚠️ SSL connection test: Issues detected - {e}")
        return False

def test_auth_with_simple_call():
    """Test authentication with a simple API call"""
    try:
        # Try a simple Drive API call
        drive_service = get_thread_safe_drive_service()
        about = drive_service.about().get(fields='user').execute()
        user_email = about.get('user', {}).get('emailAddress', 'Unknown')
        print(f"✅ Authentication test: OK (User: {user_email})")
        return True
    except Exception as e:
        print(f"❌ Authentication test: FAILED - {e}")
        return False

def get_thread_safe_drive_service():
    """Get thread-safe Drive service instance with shared credentials"""
    if not hasattr(thread_local, 'drive_service'):
        try:
            credentials = get_credentials()
            # Use credentials only (no custom http)
            thread_local.drive_service = build('drive', 'v3', credentials=credentials)
            print(f"🔧 Created new Drive service for thread {threading.current_thread().ident}")
        except Exception as e:
            print(f"❌ Failed to create Drive service: {e}")
            raise
    return thread_local.drive_service

def get_thread_safe_sheets_service():
    """Get thread-safe Sheets service instance with shared credentials"""
    if not hasattr(thread_local, 'sheets_service'):
        try:
            credentials = get_credentials()
            # Use credentials only (no custom http)
            thread_local.sheets_service = build('sheets', 'v4', credentials=credentials)
            print(f"🔧 Created new Sheets service for thread {threading.current_thread().ident}")
        except Exception as e:
            print(f"❌ Failed to create Sheets service: {e}")
            raise
    return thread_local.sheets_service

def reset_thread_services():
    """Reset thread-local services (useful after SSL errors)"""
    if hasattr(thread_local, 'drive_service'):
        delattr(thread_local, 'drive_service')
    if hasattr(thread_local, 'sheets_service'):
        delattr(thread_local, 'sheets_service')

def force_reauth():
    """Force re-authentication and refresh credentials"""
    global _global_credentials
    print("🔄 Forcing re-authentication...")
    try:
        # Clear cached services and credentials
        reset_thread_services()
        _global_credentials = None
        
        # Re-authenticate
        auth.authenticate_user()
        
        # Get fresh credentials
        _global_credentials = get_credentials()
        
        # Test with a simple call
        if test_auth_with_simple_call():
            print("✅ Re-authentication successful")
            return True
        else:
            print("❌ Re-authentication failed")
            return False
            
    except Exception as e:
        print(f"❌ Re-authentication error: {e}")
        return False

print("\n🔐 Starting Enhanced OAuth authentication...")
auth.authenticate_user()

# Initialize global credentials
try:
    _global_credentials = get_credentials()
    print("✅ Credentials established")
except Exception as e:
    print(f"⚠️ Credentials setup issue: {e}")

print("\n🔍 Running diagnostics...")
test_ssl_connection()
auth_ok = test_auth_with_simple_call()

if not auth_ok:
    print("\n⚠️ Initial authentication failed. Attempting re-authentication...")
    auth_ok = force_reauth()

if auth_ok:
    # Initialize main thread services
    drive_service = get_thread_safe_drive_service()
    sheets_service = get_thread_safe_sheets_service()
    
    print("✅ Authentication and service initialization complete.")
    print(f"📋 Using Sheet ID: {SHEET_ID}")
    print("🛡️ SSL error protection enabled with thread-safe service instances.")
else:
    print("❌ Authentication failed completely. Check your Google account permissions.")
    print("💡 Try: Runtime → Factory Reset Runtime, then re-run this cell")

In [None]:
# Cell 2: Configuration Manager with Fixed Authentication

class ConfigManager:
    def __init__(self, sheets_service, sheet_id):
        self.sheets_service = sheets_service
        self.sheet_id = sheet_id
        self.config = {}
        
    def load_config(self):
        """Load configuration from .env tab in the sheet"""
        try:
            # Use thread-safe sheets service
            sheets_service = get_thread_safe_sheets_service()
            
            # Read .env tab
            range_name = '.env!A:B'
            result = sheets_service.spreadsheets().values().get(
                spreadsheetId=self.sheet_id, 
                range=range_name
            ).execute()
            
            values = result.get('values', [])
            
            # Convert to config dictionary
            for row in values:
                if len(row) >= 2:
                    key = row[0].strip()
                    value = row[1].strip()
                    if key and value:
                        self.config[key] = value
            
            # Validate required keys
            required_keys = ['PRODUCTION_FOLDER_ID', 'STAGING_FOLDER_ID']
            missing_keys = [key for key in required_keys if key not in self.config]
            
            if missing_keys:
                raise ValueError(f"Missing required configuration keys: {missing_keys}")
                
            print(f"✅ Configuration loaded successfully")
            print(f"📁 Production Folder: {self.config['PRODUCTION_FOLDER_ID']}")
            print(f"📂 Staging Folder: {self.config['STAGING_FOLDER_ID']}")
            
            return self.config
            
        except Exception as e:
            error_msg = str(e)
            if "403" in error_msg and "unregistered callers" in error_msg:
                print("❌ Authentication Error: Google API credentials not properly established")
                print("🔧 SOLUTION: Restart your Colab runtime and re-run authentication:")
                print("   1. Runtime → Restart Runtime")
                print("   2. Re-run Cell 1 to authenticate")
                print("   3. Try again")
                print(f"\n🔍 Also verify the sheet ID is correct: {self.sheet_id}")
            else:
                print(f"❌ Failed to load configuration: {str(e)}")
            raise
    
    def get(self, key, default=None):
        """Get configuration value"""
        return self.config.get(key, default)

print("⚙️ ConfigManager ready with authentication troubleshooting.")

In [None]:
# Cell 3: Task Reader with Thread-Safe Services

class TaskReader:
    def __init__(self, sheets_service, sheet_id):
        # Don't store the service - use thread-safe getter instead
        self.sheet_id = sheet_id
        
    def get_stageable_tasks(self, batch_id_filter=None, agent_filter=None):
        """Get tasks that are ready for staging"""
        try:
            # Use thread-safe sheets service
            sheets_service = get_thread_safe_sheets_service()
            
            # Read all task data - expanded to include columns beyond Z
            range_name = 'Tasks!A:AZ'  # Extended range to include AB (Export Status)
            result = sheets_service.spreadsheets().values().get(
                spreadsheetId=self.sheet_id,
                range=range_name
            ).execute()
            
            values = result.get('values', [])
            
            if not values:
                print("❌ No data found in Tasks sheet")
                return []
            
            # Get headers and create dataframe
            headers = values[0]
            data_rows = values[1:]
            
            # Find the maximum row length
            max_cols = max(len(headers), max(len(row) for row in data_rows) if data_rows else 0)
            
            print(f"📋 Headers: {len(headers)} columns, Max data: {max_cols} columns")
            
            # Pad headers to match max columns
            if len(headers) < max_cols:
                for i in range(len(headers), max_cols):
                    headers.append(f'Column_{chr(65 + i)}')  # Add Column_AA, Column_AB, etc.
            
            # Pad all data rows to match header length
            padded_rows = []
            for row in data_rows:
                padded_row = row + [''] * (len(headers) - len(row))
                padded_rows.append(padded_row)
            
            df = pd.DataFrame(padded_rows, columns=headers)
            
            # DEBUG: Show actual column values for filtering
            print("\n🔍 DEBUGGING FILTER COLUMNS:")
            
            # Check Status column
            if 'Status' in df.columns:
                status_counts = df['Status'].value_counts()
                print(f"📊 Status column values: {dict(status_counts)}")
            
            # Check Review Status column  
            if 'Review Status' in df.columns:
                review_counts = df['Review Status'].value_counts()
                print(f"📊 Review Status column values: {dict(review_counts)}")
            
            # Check Export-related columns
            export_cols = ['Export Status', 'Export Batch', 'Export Time']
            for col in export_cols:
                if col in df.columns:
                    # Count non-empty values
                    non_empty = df[col].fillna('').astype(str).str.strip()
                    non_empty_count = sum(non_empty != '')
                    empty_count = len(df) - non_empty_count
                    print(f"📊 {col}: {non_empty_count} non-empty, {empty_count} empty")
                    if non_empty_count > 0:
                        # Show sample non-empty values
                        sample_values = non_empty[non_empty != ''].unique()[:5]
                        print(f"    Sample values: {list(sample_values)}")
                else:
                    print(f"❌ '{col}' column not found")
            
            print("🔍 END DEBUG\n")
            
            # Filter for stageable tasks with proper null handling
            # Clean and normalize the status values
            df['Status_clean'] = df['Status'].fillna('').astype(str).str.strip().str.lower()
            df['Review_Status_clean'] = df['Review Status'].fillna('').astype(str).str.strip().str.lower()
            
            # Check if task has been processed for export (has ANY value in export columns)
            export_processed = pd.Series([False] * len(df))
            
            for col in export_cols:
                if col in df.columns:
                    col_clean = df[col].fillna('').astype(str).str.strip()
                    export_processed = export_processed | (col_clean != '')
            
            # Apply filters: status=complete AND review_status=passed AND NOT already processed for export
            stageable_filter = (
                (df['Status_clean'] == 'complete') & 
                (df['Review_Status_clean'] == 'passed') &
                (~export_processed)  # NOT already processed for export
            )
            
            print(f"🎯 Tasks with status='complete': {sum(df['Status_clean'] == 'complete')}")
            print(f"🎯 Tasks with review_status='passed': {sum(df['Review_Status_clean'] == 'passed')}")
            print(f"🎯 Tasks already processed for export: {sum(export_processed)}")
            print(f"🎯 Tasks NOT processed for export: {sum(~export_processed)}")
            print(f"🎯 Tasks meeting ALL criteria: {sum(stageable_filter)}")
            
            stageable = df[stageable_filter].copy()
            
            # Apply additional filters
            if batch_id_filter:
                before_batch_filter = len(stageable)
                stageable = stageable[stageable['Batch ID'] == batch_id_filter]
                print(f"🔍 After batch filter '{batch_id_filter}': {len(stageable)} (was {before_batch_filter})")
            
            if agent_filter:
                before_agent_filter = len(stageable)
                stageable = stageable[
                    stageable['Agent Email'].str.contains(agent_filter, case=False, na=False)
                ]
                print(f"🔍 After agent filter '{agent_filter}': {len(stageable)} (was {before_agent_filter})")
            
            # Convert to list of task objects
            tasks = []
            for _, row in stageable.iterrows():
                task = {
                    'task_id': row['Task ID'],
                    'folder_name': row['Folder Name'],
                    'production_folder_id': self._extract_folder_id(row['Production Folder']),
                    'batch_id': row['Batch ID'],
                    'agent_email': row['Agent Email'],
                    'group': row.get('Group', ''),
                    'review_score': row.get('Review Score', ''),
                    'status': row['Status'],
                    'review_status': row['Review Status'],
                    'export_status': row.get('Export Status', ''),
                    'export_batch': row.get('Export Batch', ''),
                    'export_time': row.get('Export Time', '')
                }
                tasks.append(task)
            
            print(f"\n📊 Found {len(tasks)} tasks ready for staging")
            if len(tasks) > 0:
                sample = tasks[0]
                print(f"📋 Sample task: {sample['folder_name']}")
                print(f"    Status: '{sample['status']}', Review: '{sample['review_status']}'")
                print(f"    Export Status: '{sample['export_status']}', Export Batch: '{sample['export_batch']}'")
                
            return tasks
            
        except Exception as e:
            print(f"❌ Failed to read tasks: {str(e)}")
            raise
    
    def _extract_folder_id(self, folder_link):
        """Extract Google Drive folder ID from URL or return as-is if already an ID"""
        if not folder_link:
            return None
            
        # Try to extract from URL
        match = re.search(r'folders/([a-zA-Z0-9-_]+)', folder_link)
        if match:
            return match.group(1)
        
        # Try to match direct ID pattern
        if re.match(r'^[a-zA-Z0-9-_]{20,}$', folder_link):
            return folder_link
        
        return folder_link  # Return as-is and let Drive API handle it

print("📖 TaskReader ready with thread-safe services.")

In [None]:
# Cell 4: Thread-Safe Drive Operations

class DriveOperations:
    def __init__(self, drive_service=None):
        # Don't store the service - use thread-safe getter instead
        self.progress_lock = Lock()
        self.completed_count = 0
        
    def create_batch_folder(self, staging_folder_id, batch_id):
        """Create batch folder in staging"""
        try:
            drive_service = get_thread_safe_drive_service()
            timestamp = datetime.now().strftime('%Y-%m-%d')
            folder_name = f"Export_{timestamp}_{batch_id}"
            
            folder_metadata = {
                'name': folder_name,
                'parents': [staging_folder_id],
                'mimeType': 'application/vnd.google-apps.folder'
            }
            
            folder = drive_service.files().create(
                body=folder_metadata,
                fields='id,name,webViewLink'
            ).execute()
            
            print(f"📁 Created batch folder: {folder_name}")
            return {
                'id': folder.get('id'),
                'name': folder.get('name'),
                'url': folder.get('webViewLink')
            }
            
        except Exception as e:
            print(f"❌ Failed to create batch folder: {str(e)}")
            raise
    
    def copy_task_folder(self, task, batch_folder_id):
        """Copy single task folder to staging with thread-safe retry logic"""
        start_time = time.time()
        max_retries = 5
        base_delay = 1
        
        for attempt in range(max_retries):
            try:
                # Get thread-safe Drive service
                drive_service = get_thread_safe_drive_service()
                
                # Create task subfolder
                task_folder_metadata = {
                    'name': task['folder_name'],
                    'parents': [batch_folder_id],
                    'mimeType': 'application/vnd.google-apps.folder'
                }
                
                task_folder = drive_service.files().create(
                    body=task_folder_metadata,
                    fields='id,name'
                ).execute()
                
                task_folder_id = task_folder.get('id')
                
                # List files in production folder with retry
                production_folder_id = task['production_folder_id']
                files_query = f"'{production_folder_id}' in parents and trashed=false"
                
                files_result = self._thread_safe_api_call(
                    lambda svc: svc.files().list(
                        q=files_query,
                        fields='files(id,name,mimeType)'
                    ).execute()
                )
                
                files = files_result.get('files', [])
                copied_count = 0
                
                # Copy each file with enhanced retry logic
                for file in files:
                    if file['mimeType'] != 'application/vnd.google-apps.folder':
                        copy_metadata = {
                            'name': file['name'],
                            'parents': [task_folder_id]
                        }
                        
                        self._thread_safe_api_call(
                            lambda svc: svc.files().copy(
                                fileId=file['id'],
                                body=copy_metadata
                            ).execute()
                        )
                        
                        copied_count += 1
                
                duration = time.time() - start_time
                
                # Update progress
                with self.progress_lock:
                    self.completed_count += 1
                    print(f"✅ ({self.completed_count}) {task['folder_name']}: {copied_count} files in {duration:.1f}s")
                
                return {
                    'task_id': task['task_id'],
                    'folder_name': task['folder_name'],
                    'success': True,
                    'file_count': copied_count,
                    'duration': duration,
                    'task_folder_id': task_folder_id
                }
                
            except Exception as e:
                if attempt < max_retries - 1:
                    # Reset services on SSL errors
                    error_str = str(e).lower()
                    if any(keyword in error_str for keyword in [
                        'ssl', 'wrong_version_number', 'decryption_failed', 'incompleteread'
                    ]):
                        print(f"🔄 SSL error detected, resetting services for thread {threading.current_thread().ident}")
                        reset_thread_services()
                    
                    delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
                    print(f"⚠️ Attempt {attempt + 1} failed for {task['folder_name']}: {str(e)}")
                    print(f"   Retrying in {delay:.1f}s...")
                    time.sleep(delay)
                    continue
                else:
                    # Final attempt failed
                    duration = time.time() - start_time
                    
                    with self.progress_lock:
                        self.completed_count += 1
                        print(f"❌ ({self.completed_count}) {task['folder_name']}: Failed after {max_retries} attempts - {str(e)}")
                    
                    return {
                        'task_id': task['task_id'],
                        'folder_name': task['folder_name'],
                        'success': False,
                        'error': str(e),
                        'duration': duration
                    }
    
    def _thread_safe_api_call(self, api_call_func, max_retries=3):
        """Thread-safe API call with service recreation on SSL errors"""
        for attempt in range(max_retries):
            try:
                drive_service = get_thread_safe_drive_service()
                return api_call_func(drive_service)
            except Exception as e:
                if attempt < max_retries - 1:
                    # Check if it's a retryable error
                    error_str = str(e).lower()
                    if any(keyword in error_str for keyword in [
                        'ssl', 'connection', 'timeout', 'incomplete', 'network',
                        'internal error', 'service unavailable', 'rate limit',
                        'wrong_version_number', 'decryption_failed'
                    ]):
                        print(f"🔄 Recreating service due to: {e}")
                        reset_thread_services()
                        delay = (2 ** attempt) + random.uniform(0, 1)
                        time.sleep(delay)
                        continue
                raise
    
    def stage_tasks_concurrent(self, tasks, batch_folder_id, max_workers=2):
        """Stage tasks with thread-safe concurrency (SAFER)"""
        self.completed_count = 0
        results = []
        
        # Very conservative settings to prevent SSL issues
        chunk_size = 5  # Smaller chunks
        actual_workers = min(max_workers, 2)  # Maximum 2 workers
        
        print(f"🚀 Starting CONCURRENT staging of {len(tasks)} tasks")
        print(f"🛡️ Thread-safe mode: {actual_workers} workers, chunks of {chunk_size}")
        print(f"📊 Progress:")
        
        total_start_time = time.time()
        
        # Process in very small chunks
        for chunk_start in range(0, len(tasks), chunk_size):
            chunk_end = min(chunk_start + chunk_size, len(tasks))
            chunk_tasks = tasks[chunk_start:chunk_end]
            
            print(f"\n🔄 Processing chunk {chunk_start//chunk_size + 1}: tasks {chunk_start+1}-{chunk_end}")
            
            chunk_start_time = time.time()
            
            with ThreadPoolExecutor(max_workers=actual_workers) as executor:
                # Submit chunk tasks
                future_to_task = {
                    executor.submit(self.copy_task_folder, task, batch_folder_id): task 
                    for task in chunk_tasks
                }
                
                # Collect results as they complete
                for future in as_completed(future_to_task):
                    result = future.result()
                    results.append(result)
            
            chunk_duration = time.time() - chunk_start_time
            successful_in_chunk = sum(1 for r in results[-len(chunk_tasks):] if r['success'])
            
            print(f"✅ Chunk complete: {successful_in_chunk}/{len(chunk_tasks)} successful in {chunk_duration:.1f}s")
            
            # Longer pause between chunks for maximum stability
            if chunk_end < len(tasks):
                print("⏳ Extended pause for API stability...")
                time.sleep(10)  # 10 second pause
        
        total_duration = time.time() - total_start_time
        successful_count = sum(1 for r in results if r['success'])
        failed_count = len(results) - successful_count
        
        print(f"\n🎯 Concurrent Staging Complete:")
        print(f"   ✅ Successful: {successful_count}")
        print(f"   ❌ Failed: {failed_count}")
        print(f"   ⏱️ Total time: {total_duration:.1f}s")
        if total_duration > 0:
            print(f"   📈 Average speed: {len(tasks)/total_duration:.1f} tasks/second")
        
        return results
    
    def stage_tasks_sequential(self, tasks, batch_folder_id):
        """Stage tasks one by one (SAFEST - no SSL conflicts)"""
        self.completed_count = 0
        results = []
        
        print(f"🐌 Starting SEQUENTIAL staging of {len(tasks)} tasks")
        print(f"🛡️ Maximum stability mode - no concurrency")
        print(f"📊 Progress:")
        
        total_start_time = time.time()
        
        for i, task in enumerate(tasks, 1):
            print(f"\n📁 Processing task {i}/{len(tasks)}: {task['folder_name']}")
            
            result = self.copy_task_folder(task, batch_folder_id)
            results.append(result)
            
            # Small delay between tasks to be gentle on the API
            if i < len(tasks):
                time.sleep(2)
        
        total_duration = time.time() - total_start_time
        successful_count = sum(1 for r in results if r['success'])
        failed_count = len(results) - successful_count
        
        print(f"\n🎯 Sequential Staging Complete:")
        print(f"   ✅ Successful: {successful_count}")
        print(f"   ❌ Failed: {failed_count}")
        print(f"   ⏱️ Total time: {total_duration:.1f}s")
        if total_duration > 0:
            print(f"   📈 Average speed: {len(tasks)/total_duration:.1f} tasks/second")
        
        return results

print("🔧 Thread-safe DriveOperations ready with concurrent AND sequential modes.")

In [None]:
# Cell 5: Status Updater with Thread-Safe Services and Fixed Column Detection

class StatusUpdater:
    def __init__(self, sheets_service, sheet_id):
        # Don't store the service - use thread-safe getter instead
        self.sheet_id = sheet_id
        
    def update_export_statuses(self, staging_results, export_batch_id):
        """Update export status and batch ID for staged tasks"""
        try:
            # Use thread-safe sheets service
            sheets_service = get_thread_safe_sheets_service()
            
            # First, get current sheet data to find row positions - SAME RANGE as TaskReader
            range_name = 'Tasks!A:AZ'  # Match TaskReader exactly
            result = sheets_service.spreadsheets().values().get(
                spreadsheetId=self.sheet_id,
                range=range_name
            ).execute()
            
            values = result.get('values', [])
            if not values:
                print("❌ No data found in Tasks sheet")
                return
                
            headers = values[0]
            
            print(f"🔍 DEBUG: Found {len(headers)} columns in sheet")
            print(f"🔍 DEBUG: First 10 headers: {headers[:10] if len(headers) >= 10 else headers}")
            print(f"🔍 DEBUG: Last 10 headers: {headers[-10:] if len(headers) >= 10 else headers}")
            
            # Find column indices using same logic as TaskReader
            task_id_col = self._find_column_index(headers, 'Task ID')
            export_status_col = self._find_column_index(headers, 'Export Status')
            export_batch_col = self._find_column_index(headers, 'Export Batch')
            export_time_col = self._find_column_index(headers, 'Export Time')
            staged_count_col = self._find_column_index(headers, 'Staged Count')
            
            print(f"🔍 Column positions:")
            print(f"   Task ID: {task_id_col} ({'Found' if task_id_col >= 0 else 'NOT FOUND'})") 
            print(f"   Export Status: {export_status_col} ({'Found' if export_status_col >= 0 else 'NOT FOUND'})")
            print(f"   Export Batch: {export_batch_col} ({'Found' if export_batch_col >= 0 else 'NOT FOUND'})") 
            print(f"   Export Time: {export_time_col} ({'Found' if export_time_col >= 0 else 'NOT FOUND'})") 
            print(f"   Staged Count: {staged_count_col} ({'Found' if staged_count_col >= 0 else 'NOT FOUND'})") 
            
            # Try to find Export Status with variations if not found
            if export_status_col == -1:
                print("🔍 Trying to find Export Status with variations...")
                possible_names = ['Export Status', 'export status', 'Export_Status', 'export_status', 'ExportStatus']
                for name in possible_names:
                    col_idx = self._find_column_index(headers, name)
                    if col_idx >= 0:
                        print(f"✅ Found Export Status as '{name}' at column {col_idx}")
                        export_status_col = col_idx
                        break
                
                # If still not found, show all headers for debugging
                if export_status_col == -1:
                    print("❌ Export Status column still not found. All headers:")
                    for i, header in enumerate(headers):
                        print(f"   {i}: '{header}'")
                    print("⚠️ Warning: Export Status column not found - no status updates will be made")
                    return
            
            # Create batch updates
            batch_updates = []
            current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            
            successful_updates = 0
            
            for result in staging_results:
                task_id = result['task_id']
                
                # Find the row for this task
                row_index = None
                for i, row in enumerate(values[1:], start=2):  # Start at row 2 (1-indexed)
                    if len(row) > task_id_col and row[task_id_col] == task_id:
                        row_index = i
                        break
                
                if row_index:
                    # Update export status
                    new_status = 'staged' if result['success'] else 'staging_failed'
                    
                    # Export Status update
                    status_range = self._get_column_letter(export_status_col) + str(row_index)
                    batch_updates.append({
                        'range': f'Tasks!{status_range}',
                        'values': [[new_status]]
                    })
                    
                    # Export Batch ID update  
                    if export_batch_col >= 0:
                        batch_range = self._get_column_letter(export_batch_col) + str(row_index)
                        batch_updates.append({
                            'range': f'Tasks!{batch_range}',
                            'values': [[export_batch_id]]
                        })
                    
                    # Export Time update
                    if export_time_col >= 0:
                        time_range = self._get_column_letter(export_time_col) + str(row_index)
                        batch_updates.append({
                            'range': f'Tasks!{time_range}',
                            'values': [[current_time]]
                        })
                    
                    # Staged Count update (for successful tasks)
                    if staged_count_col >= 0 and result['success']:
                        count_range = self._get_column_letter(staged_count_col) + str(row_index)
                        file_count = result.get('file_count', 0)
                        batch_updates.append({
                            'range': f'Tasks!{count_range}',
                            'values': [[str(file_count)]]
                        })
                    
                    successful_updates += 1
                else:
                    print(f"⚠️ Could not find row for task ID: {task_id}")
            
            # Execute batch update
            if batch_updates:
                body = {
                    'valueInputOption': 'RAW',
                    'data': batch_updates
                }
                
                sheets_service.spreadsheets().values().batchUpdate(
                    spreadsheetId=self.sheet_id,
                    body=body
                ).execute()
                
                print(f"📝 Updated {len(batch_updates)} sheet cells for {successful_updates} tasks")
                print(f"✅ Export status tracking completed successfully")
            else:
                print("⚠️ No updates to make - check task IDs match between staging results and sheet")
            
        except Exception as e:
            print(f"❌ Failed to update sheet statuses: {str(e)}")
            print(f"🔍 Error details: {e}")
            # Don't raise - staging succeeded even if status update failed
    
    def _find_column_index(self, headers, column_name):
        """Find column index by name (case-insensitive)"""
        for i, header in enumerate(headers):
            if header and header.strip().lower() == column_name.lower():
                return i
        return -1
    
    def _get_column_letter(self, col_index):
        """Convert column index to Excel-style column letter (0->A, 25->Z, 26->AA, etc.)"""
        if col_index < 26:
            return chr(65 + col_index)  # A-Z
        else:
            # For columns beyond Z (AA, AB, AC, etc.)
            first_letter = chr(65 + (col_index // 26) - 1)
            second_letter = chr(65 + (col_index % 26))
            return first_letter + second_letter

print("📊 StatusUpdater ready with improved column detection and debugging.")

In [None]:
# Cell 6: CLI Interface - Main Staging Workflow with Concurrency Toggle

def run_staging_workflow(use_concurrent=True):
    """Main CLI-driven staging workflow with concurrency options"""
    
    print("\n" + "="*60)
    print("🎯 3D DATA MANAGEMENT - STAGING OPERATIONS")
    print("="*60)
    
    try:
        # Step 1: Use hardcoded Sheet ID
        sheet_id = SHEET_ID
        if sheet_id == "your_sheet_id_here":
            print("❌ Please update SHEET_ID in Cell 1 with your actual Google Sheet ID")
            return
        
        print(f"📋 Using Sheet ID: {sheet_id}")
        
        # Step 2: Load Configuration - use thread-safe service
        print("\n⚙️ Loading configuration...")
        config_manager = ConfigManager(None, sheet_id)  # Pass None, it will use thread-safe getter
        config = config_manager.load_config()
        
        # Step 3: Get CLI Parameters
        print("\n🔍 Staging filters:")
        batch_id_filter = input("   Batch ID (optional): ").strip() or None
        agent_filter = input("   Agent email contains (optional): ").strip() or None
        
        # Step 4: Load and Filter Tasks - use thread-safe service
        print("\n📖 Reading tasks from sheet...")
        task_reader = TaskReader(None, sheet_id)  # Pass None, it will use thread-safe getter
        tasks = task_reader.get_stageable_tasks(batch_id_filter, agent_filter)
        
        if not tasks:
            print("\n❌ No tasks found matching criteria")
            print("   Make sure tasks have status='complete' and review_status='passed'")
            return
        
        # Step 5: Preview Tasks
        print(f"\n📋 Found {len(tasks)} tasks ready for staging:")
        for i, task in enumerate(tasks[:5], 1):
            print(f"   {i}. {task['folder_name']} (Group: {task.get('group', 'N/A')})")
        if len(tasks) > 5:
            print(f"   ... and {len(tasks) - 5} more")
        
        # Step 6: Choose Processing Mode
        if not use_concurrent:
            # Skip user prompt if mode is pre-determined
            concurrent_mode = False
        else:
            print("\n🚀 Choose processing mode:")
            print("   [C] Concurrent - Faster but may have SSL connection issues")
            print("   [S] Sequential - Slower but maximum stability (RECOMMENDED)")
            mode_choice = input("   Choose mode [C/S]: ").strip().upper()
            
            concurrent_mode = mode_choice == 'C'
        
        mode_name = "CONCURRENT" if concurrent_mode else "SEQUENTIAL"
        print(f"   Selected: {mode_name} mode")
        
        # Step 7: Confirm Staging
        confirm = input(f"\n🚀 Stage {len(tasks)} tasks in {mode_name} mode? [y/N]: ").strip().lower()
        if confirm != 'y':
            print("❌ Staging cancelled")
            return
        
        # Step 8: Generate Export Batch ID
        export_batch_id = input("\n🏷️ Enter export batch ID (or press Enter for auto-generated): ").strip()
        if not export_batch_id:
            export_batch_id = f"staging_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        
        print(f"   Using batch ID: {export_batch_id}")
        
        # Step 9: Create Batch Folder
        print("\n📁 Creating batch folder...")
        drive_ops = DriveOperations()
        batch_folder = drive_ops.create_batch_folder(
            config['STAGING_FOLDER_ID'], 
            export_batch_id
        )
        
        # Step 10: Stage Tasks with Selected Mode
        print(f"\n🔥 Starting {mode_name} staging...")
        
        if concurrent_mode:
            # Concurrent mode with very conservative settings
            max_workers = min(2, len(tasks))  # Maximum 2 workers
            staging_results = drive_ops.stage_tasks_concurrent(
                tasks, 
                batch_folder['id'], 
                max_workers=max_workers
            )
        else:
            # Sequential mode - maximum stability
            staging_results = drive_ops.stage_tasks_sequential(
                tasks, 
                batch_folder['id']
            )
        
        # Step 11: Update Sheet Statuses - use thread-safe service
        print("\n📊 Updating sheet statuses...")
        status_updater = StatusUpdater(None, sheet_id)  # Pass None, it will use thread-safe getter
        status_updater.update_export_statuses(staging_results, export_batch_id)
        
        # Step 12: Final Summary
        successful_tasks = [r for r in staging_results if r['success']]
        failed_tasks = [r for r in staging_results if not r['success']]
        
        print("\n" + "="*60)
        print("🎉 STAGING COMPLETE")
        print("="*60)
        print(f"📁 Batch Folder: {batch_folder['name']}")
        print(f"🔗 Folder URL: {batch_folder['url']}")
        print(f"⚙️ Processing Mode: {mode_name}")
        print(f"✅ Successful: {len(successful_tasks)}")
        print(f"❌ Failed: {len(failed_tasks)}")
        
        if failed_tasks:
            print("\n❌ Failed Tasks:")
            for task in failed_tasks:
                print(f"   • {task['folder_name']}: {task['error']}")
        
        if len(failed_tasks) > 0 and concurrent_mode:
            print(f"\n💡 TIP: If you see SSL errors, try running again with sequential mode:")
            print(f"   run_staging_workflow(use_concurrent=False)")
        
        print("\n🏁 Staging operation completed successfully!")
        
    except Exception as e:
        print(f"\n💥 Staging failed: {str(e)}")
        raise

# Ready to run
print("\n🎯 Enhanced CLI Staging Workflow ready!")
print("📞 Run: run_staging_workflow()  # Interactive mode selection")
print("📞 Or: run_staging_workflow(use_concurrent=False)  # Force sequential mode")

In [None]:
# Cell 7: Execute Staging with Configuration

# CONFIGURATION - Edit these settings as needed
USE_CONCURRENT = False  # Set to True for concurrent mode, False for sequential mode

# Mode explanations:
# - Sequential (False): Processes tasks one by one. Slower but extremely stable.
#   Use this mode if you experience SSL connection errors or need maximum reliability.
#
# - Concurrent (True): Processes multiple tasks simultaneously with 2 workers max.
#   Faster but may encounter SSL connection issues in unstable network environments.
#   Uses thread-safe service instances to minimize conflicts.

print("🎯 STAGING CONFIGURATION:")
print(f"   Mode: {'CONCURRENT' if USE_CONCURRENT else 'SEQUENTIAL'}")
print(f"   SSL Protection: Thread-safe services enabled")

if USE_CONCURRENT:
    print("   ⚠️  Using concurrent mode - monitor for SSL errors")
    print("   💡 If you see SSL errors, change USE_CONCURRENT = False above")
else:
    print("   🛡️  Using sequential mode - maximum stability")

print("\n" + "="*50)

# Run the staging workflow with the configured mode
run_staging_workflow(use_concurrent=USE_CONCURRENT)