In [None]:
# ============================================================================
# Wetland Classification with Cloud Run - Complete Workflow
# ============================================================================
# Run each cell in order. Your config is already set!

# CELL 1: Configuration (PRE-FILLED FOR YOU)
# ============================================================================

PROJECT_ID = "wetmaps-476922"
BUCKET_NAME = "wetmaps"
REGION = "northamerica-northeast2"  #(Toronto region)
SERVICE_NAME = "wetland-classifier"

print(f"‚úì Project ID: {PROJECT_ID}")
print(f"‚úì Bucket: {BUCKET_NAME}")
print(f"‚úì Region: {REGION}")

In [None]:
# CELL 2: Install Dependencies
# ============================================================================

!pip install -q google-cloud-storage earthengine-api

print("‚úì Dependencies installed")

In [None]:
# CELL 3: Authenticate with Google Cloud
# ============================================================================

from google.colab import auth
auth.authenticate_user()

print("‚úì Authenticated with Google Cloud")

In [None]:
# CELL 4: Set Default Project
# ============================================================================

!gcloud config set project {PROJECT_ID}

print(f"‚úì Using project: {PROJECT_ID}")

In [None]:
# CELL 5: Verify/Create Cloud Storage Bucket
# ============================================================================

from google.cloud import storage

storage_client = storage.Client(project=PROJECT_ID)

try:
    bucket = storage_client.get_bucket(BUCKET_NAME)
    print(f"‚úì Bucket exists: gs://{BUCKET_NAME}")
except:
    print(f"Creating bucket: gs://{BUCKET_NAME}")
    bucket = storage_client.create_bucket(BUCKET_NAME, location=REGION)
    print(f"‚úì Created bucket: gs://{BUCKET_NAME}")

# Create folder structure
folders = ['training_data/', 'inference_data/', 'models/', 'results/']
for folder in folders:
    blob = bucket.blob(folder + '.keep')
    if not blob.exists():
        blob.upload_from_string('')
    
print(f"‚úì Folder structure ready")

In [None]:
# CELL 6: Create Cloud Run Deployment Files
# ============================================================================

import os

# Create deployment directory
!mkdir -p /content/cloud_run_deploy
os.chdir('/content/cloud_run_deploy')

# Write Dockerfile
dockerfile = """FROM python:3.10-slim

WORKDIR /app

RUN apt-get update && apt-get install -y \\
    gdal-bin \\
    libgdal-dev \\
    gcc \\
    g++ \\
    && rm -rf /var/lib/apt/lists/*

ENV CPLUS_INCLUDE_PATH=/usr/include/gdal
ENV C_INCLUDE_PATH=/usr/include/gdal

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY main.py .

ENV PORT=8080

CMD exec gunicorn --bind :$PORT --workers 1 --threads 4 --timeout 3600 main:app
"""

with open('Dockerfile', 'w') as f:
    f.write(dockerfile)

# Write requirements.txt
requirements = """flask==3.0.0
gunicorn==21.2.0
numpy==1.24.3
scikit-learn==1.3.2
google-cloud-storage==2.10.0
GDAL==3.6.2
rasterio==1.3.9
pandas==2.0.3
joblib==1.3.2
"""

with open('requirements.txt', 'w') as f:
    f.write(requirements)

print("‚úì Created Dockerfile and requirements.txt")
print("\n" + "="*70)
print("‚ö†Ô∏è  NEXT STEP: Upload main.py")
print("="*70)
print("1. Click the folder icon on the left sidebar")
print("2. Navigate to: /content/cloud_run_deploy/")
print("3. Click the upload button (up arrow)")
print("4. Upload the main.py file")
print("5. Then run the next cell to verify")


In [None]:
# CELL 7: Verify main.py Upload
# ============================================================================

import os

if os.path.exists('/content/cloud_run_deploy/main.py'):
    size = os.path.getsize('/content/cloud_run_deploy/main.py')
    print(f"‚úì main.py found!")
    print(f"  Size: {size:,} bytes")
    
    # Quick validation
    with open('/content/cloud_run_deploy/main.py', 'r') as f:
        content = f.read()
        if 'RandomForestClassifier' in content and 'flask' in content:
            print(f"‚úì File looks correct!")
        else:
            print(f"‚ö†Ô∏è  Warning: File may be incorrect")
else:
    print("‚úó main.py NOT FOUND!")
    print("Please upload main.py before continuing")
    raise FileNotFoundError("main.py is required")

In [None]:
# CELL 8: Deploy to Cloud Run
# ============================================================================

os.chdir('/content/cloud_run_deploy')

print("="*70)
print("üöÄ DEPLOYING TO CLOUD RUN")
print("="*70)
print("This will take 5-10 minutes...")
print("You'll see a lot of output (building Docker container)")
print()

!gcloud run deploy {SERVICE_NAME} \
  --source . \
  --platform managed \
  --region {REGION} \
  --memory 4Gi \
  --cpu 2 \
  --timeout 3600 \
  --set-env-vars BUCKET_NAME={BUCKET_NAME} \
  --allow-unauthenticated \
  --project {PROJECT_ID}

print("\n‚úì Deployment complete!")

In [None]:
# CELL 9: Get Service URL and Test
# ============================================================================

import subprocess
import requests

# Get service URL
result = subprocess.run(
    f"gcloud run services describe {SERVICE_NAME} --region {REGION} --format 'value(status.url)'",
    shell=True,
    capture_output=True,
    text=True
)

SERVICE_URL = result.stdout.strip()

print("="*70)
print(f"‚úì Service deployed!")
print("="*70)
print(f"URL: {SERVICE_URL}")
print()

# Test health endpoint
print("Testing service health...")
try:
    response = requests.get(f"{SERVICE_URL}/health", timeout=10)
    if response.status_code == 200:
        print("‚úì Service is healthy and responding!")
    else:
        print(f"‚ö†Ô∏è  Service responded with status: {response.status_code}")
except Exception as e:
    print(f"‚úó Health check failed: {e}")

In [None]:
# CELL 10: Earth Engine Export Instructions
# ============================================================================

print("="*70)
print("STEP: EXPORT DATA FROM EARTH ENGINE")
print("="*70)
print()
print("‚ö†Ô∏è  MANUAL STEP - Do this now:")
print()
print("1. Open Earth Engine Code Editor:")
print("   https://code.earthengine.google.com/")
print()
print("2. Copy the script: 1_earth_engine_export.js")
print()
print("3. Lines 8-9 are ALREADY CORRECT:")
print(f"   var PROJECT_ID = '{PROJECT_ID}';")
print(f"   var BUCKET_NAME = '{BUCKET_NAME}';")
print()
print("4. Click RUN")
print()
print("5. Click Tasks tab (orange icon, right side)")
print()
print("6. You'll see 2 tasks - click RUN on BOTH:")
print("   - wetland_training_samples_calgary")
print("   - alphaearth_calgary_full")
print()
print("7. Wait for BOTH to show 'Completed ‚úì' status")
print("   (Training CSV: ~5-10 min, GeoTIFF: ~10-15 min)")
print()
print("8. Then run the next cell")
print("="*70)

In [None]:
# CELL 11: Wait for Earth Engine Exports
# ============================================================================

import time
from google.cloud import storage

print("="*70)
print("WAITING FOR EARTH ENGINE EXPORTS")
print("="*70)
print("Checking Cloud Storage every 30 seconds...")
print("Press Ctrl+C to skip if you know exports are done")
print()

def check_exports():
    """Check if both exports are in Cloud Storage"""
    blobs = list(bucket.list_blobs())
    blob_names = [b.name for b in blobs]
    
    training_ready = any('calgary_samples' in name and name.endswith('.csv') 
                        for name in blob_names)
    inference_ready = any('calgary_alphaearth' in name and name.endswith('.tif') 
                         for name in blob_names)
    
    return training_ready, inference_ready

# Check exports
attempts = 0
max_attempts = 120  # 60 minutes max

while attempts < max_attempts:
    training_ready, inference_ready = check_exports()
    
    status_training = "‚úì Training CSV" if training_ready else "‚è≥ Training CSV"
    status_inference = "‚úì GeoTIFF" if inference_ready else "‚è≥ GeoTIFF"
    
    elapsed_min = attempts * 30 // 60
    print(f"[{elapsed_min:2d} min] {status_training:20s} | {status_inference:20s}", end='\r')
    
    if training_ready and inference_ready:
        print(f"\n\n{'='*70}")
        print("‚úì BOTH EXPORTS COMPLETE!")
        print("="*70)
        break
    
    time.sleep(30)
    attempts += 1
else:
    print("\n\n‚úó Timeout waiting for exports")
    print("Check Earth Engine Tasks tab manually")
    print("If exports are complete, continue anyway")

In [None]:
# CELL 12: Train Model via Cloud Run
# ============================================================================

import requests
import json

print("="*70)
print("TRAINING RANDOM FOREST MODEL")
print("="*70)
print()

train_request = {
    'training_csv': 'training_data/calgary_samples.csv',
    'model_output': 'models/wetland_rf_model.joblib'
}

print(f"Sending request to: {SERVICE_URL}/train")
print("This will take 5-10 minutes...")
print("Watch for progress updates below:")
print()

try:
    response = requests.post(
        f'{SERVICE_URL}/train',
        json=train_request,
        timeout=1800
    )
    
    if response.status_code == 200:
        results = response.json()
        
        print("\n" + "="*70)
        print("‚úì TRAINING COMPLETE!")
        print("="*70)
        print()
        print(f"Validation Accuracy: {results['results']['val_accuracy']*100:.1f}%")
        print(f"Test Accuracy:       {results['results']['test_accuracy']*100:.1f}%")
        print()
        print(f"Model saved to: {results['model_path']}")
        print()
        
        # Confusion matrix
        conf = results['results']['confusion_matrix']
        classes = ['Marsh', 'SW', 'Swamp', 'Fen']
        
        print("Confusion Matrix:")
        print("                    Predicted")
        print(f"{'':>12} " + "".join(f"{c:>8}" for c in classes))
        for i, row in enumerate(conf):
            print(f"Actual {classes[i]:>6} " + "".join(f"{v:>8}" for v in row))
        print()
        
        # Check target
        test_acc = results['results']['test_accuracy']
        if test_acc >= 0.80:
            print(f"‚úì TARGET MET: {test_acc*100:.1f}% ‚â• 80%")
        else:
            print(f"‚ö†Ô∏è  Below 80% target: {test_acc*100:.1f}%")
            print("   (Will improve with more training data)")
        
    else:
        print(f"\n‚úó Training failed!")
        print(f"Status: {response.status_code}")
        print(f"Response: {response.text}")
        
except Exception as e:
    print(f"\n‚úó Error: {e}")

In [None]:
# CELL 13: Classify Full Image
# ============================================================================

import requests

print("="*70)
print("CLASSIFYING FULL CALGARY REGION")
print("="*70)
print()

classify_request = {
    'model_path': 'models/wetland_rf_model.joblib',
    'input_tif': 'inference_data/calgary_alphaearth.tif',
    'output_tif': 'results/calgary_classified.tif'
}

print(f"Sending request to: {SERVICE_URL}/classify")
print("This will take 10-20 minutes...")
print("Processing entire Calgary region...")
print()

try:
    response = requests.post(
        f'{SERVICE_URL}/classify',
        json=classify_request,
        timeout=3600
    )
    
    if response.status_code == 200:
        results = response.json()
        
        print("\n" + "="*70)
        print("‚úì CLASSIFICATION COMPLETE!")
        print("="*70)
        print()
        print(f"Output: {results['output_path']}")
        print()
        print("Class Distribution:")
        
        total = sum(results['class_distribution'].values())
        for class_name, count in results['class_distribution'].items():
            pct = count / total * 100
            print(f"  {class_name:>20}: {count:>10,} pixels ({pct:>5.1f}%)")
        
        print()
        print(f"Total classified: {total:,} pixels")
        
    else:
        print(f"\n‚úó Classification failed!")
        print(f"Status: {response.status_code}")
        print(f"Response: {response.text}")
        
except Exception as e:
    print(f"\n‚úó Error: {e}")

In [None]:
# CELL 14: Download Classified GeoTIFF
# ============================================================================

from google.cloud import storage
from google.colab import files

print("="*70)
print("DOWNLOADING RESULT")
print("="*70)
print()

# Download from Cloud Storage
print("Downloading from Cloud Storage...")
blob = bucket.blob('results/calgary_classified.tif')
blob.download_to_filename('/content/calgary_classified.tif')

size_mb = os.path.getsize('/content/calgary_classified.tif') / 1024 / 1024

print(f"‚úì Downloaded to: /content/calgary_classified.tif")
print(f"  Size: {size_mb:.2f} MB")
print()

# Download to your computer
print("Downloading to your computer...")
files.download('/content/calgary_classified.tif')

print("‚úì File downloaded!")

In [None]:
# CELL 15: Upload to Earth Engine Instructions
# ============================================================================

print("="*70)
print("UPLOAD TO EARTH ENGINE")
print("="*70)
print()
print("‚ö†Ô∏è  MANUAL STEP - Do this now:")
print()
print("1. Go to Earth Engine Code Editor:")
print("   https://code.earthengine.google.com/")
print()
print("2. Click 'Assets' tab (left sidebar)")
print()
print("3. Click 'NEW' ‚Üí 'Image upload' ‚Üí 'GeoTIFF files'")
print()
print("4. Click 'SELECT' and choose:")
print("   calgary_classified.tif")
print("   (Already downloaded to your computer)")
print()
print("5. Asset ID:")
print("   users/YOUR_USERNAME/calgary_wetlands_classified")
print("   (Replace YOUR_USERNAME with your EE username)")
print()
print("6. Click 'UPLOAD'")
print()
print("7. Wait 5-10 minutes for upload")
print()
print("8. Then run the visualization code in next cell")
print("="*70)

In [None]:
# CELL 16: Earth Engine Visualization Code
# ============================================================================

print("="*70)
print("EARTH ENGINE VISUALIZATION")
print("="*70)
print()
print("Copy this code into Earth Engine Code Editor:")
print()
print("-" * 70)

viz_code = """// =====================================================================
// Visualize Cloud Run Classification Results  
// =====================================================================

// Load classified image (UPDATE YOUR_USERNAME!)
var classified = ee.Image('users/YOUR_USERNAME/calgary_wetlands_classified');

// Define visualization
var palette = ['blue', 'cyan', 'green', 'yellow'];
var classNames = ['Marsh', 'Shallow Open Water', 'Swamp', 'Fen'];

// Center on Calgary
Map.setCenter(-114.0, 51.05, 11);

// Add classification layer
Map.addLayer(
  classified,
  {min: 0, max: 3, palette: palette},
  'Wetland Classification (Cloud Run)'
);

// Create legend
var legend = ui.Panel({
  style: {
    position: 'bottom-left',
    padding: '8px 15px',
    backgroundColor: 'white'
  }
});

var title = ui.Label({
  value: 'Calgary Wetlands',
  style: {fontWeight: 'bold', fontSize: '16px', margin: '0 0 8px 0'}
});
legend.add(title);

// Add color boxes
for (var i = 0; i < classNames.length; i++) {
  var colorBox = ui.Label({
    style: {
      backgroundColor: palette[i],
      padding: '8px',
      margin: '2px 8px 2px 0'
    }
  });
  
  var label = ui.Label({
    value: classNames[i],
    style: {margin: '2px 0'}
  });
  
  var row = ui.Panel({
    widgets: [colorBox, label],
    layout: ui.Panel.Layout.Flow('horizontal')
  });
  
  legend.add(row);
}

// Add info
legend.add(ui.Label({
  value: 'Cloud Run Random Forest',
  style: {margin: '8px 0 0 0', fontSize: '11px', fontStyle: 'italic'}
}));

Map.add(legend);

print('‚úì Classification loaded');
print('‚úì 4-class wetland map');
"""

print(viz_code)
print("-" * 70)

In [None]:
# CELL 17: Summary and Costs
# ============================================================================

print("="*70)
print("WORKFLOW COMPLETE!")
print("="*70)
print()
print(f"‚úì Cloud Run service: {SERVICE_URL}")
print(f"‚úì Model trained and saved")
print(f"‚úì Calgary region classified")
print(f"‚úì Results ready for visualization")
print()
print("="*70)
print("ESTIMATED COSTS")
print("="*70)
print()
print("Cloud Run (4GB RAM, 2 CPU):")
print("  Training:       5-10 min  ‚Üí  ~$0.25-0.50")
print("  Classification: 10-20 min  ‚Üí  ~$0.50-1.00")
print()
print("Cloud Storage:")
print("  ~2-3 GB data  ‚Üí  ~$0.05-0.10/month")
print()
print("TOTAL: ~$0.80-1.60 USD")
print()
print("Note: Free tier likely covers this!")
print("  - 360,000 GB-seconds/month")
print("  - 2 million requests/month")
print()
print("="*70)
print("NEXT STEPS")
print("="*70)
print()
print("To scale to Alberta:")
print("1. Update bounds in 1_earth_engine_export.js")
print("2. Re-run EE exports")
print("3. Use SAME Cloud Run service")
print("4. Just run cells 12-13 again")
print()
print("No redeployment needed! üéâ")

In [None]:
# CELL 18: Clean Up (Optional)
# ============================================================================

print("="*70)
print("CLEAN UP RESOURCES (Optional)")
print("="*70)
print()
print("‚ö†Ô∏è  WARNING: This will DELETE everything!")
print()
print("Only run this when completely done.")
print()
print("Uncomment and run to delete:")
print()
print(f"# Delete Cloud Run service:")
print(f"# !gcloud run services delete {SERVICE_NAME} --region {REGION} --quiet")
print()
print(f"# Delete Cloud Storage bucket:")
print(f"# !gsutil -m rm -r gs://{BUCKET_NAME}")
print()
print("# In Earth Engine: Assets ‚Üí Right-click ‚Üí Delete")
print()
print("="*70)