## 1Ô∏è‚É£ Clone & Setup

In [None]:
# Clone repository
!git clone https://github.com/Usernamenisiya/thesis-cloud-rl.git
%cd thesis-cloud-rl

# Verify
!pwd
!ls -la | head -15

In [None]:
# Install dependencies
!pip install -r requirements.txt
!pip install gymnasium  # Updated from deprecated gym

import torch
import stable_baselines3
import rasterio

print("‚úÖ Dependencies installed")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

In [None]:
# Check GPU
!nvidia-smi

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n‚úÖ Using device: {device}")

## 2Ô∏è‚É£ Setup Data from Google Drive

**Create in Google Drive first:**
- Folder: `Colab_Data/thesis_cloud_rl/`
- Files: `sentinel2_image.tif`, `ground_truth.tif`

In [None]:
# Mount Google Drive
from google.colab import drive
import os
from pathlib import Path

drive.mount('/content/drive')

# Create data directory
Path('data').mkdir(exist_ok=True)

# Copy from Google Drive
gdrive_path = '/content/drive/MyDrive/Colab_Data/thesis_cloud_rl'

if os.path.exists(gdrive_path):
    !cp {gdrive_path}/sentinel2_image.tif data/ 2>/dev/null || true
    !cp {gdrive_path}/ground_truth.tif data/ 2>/dev/null || true
    print("‚úÖ Files copied from Google Drive")
else:
    print(f"‚ùå Path not found: {gdrive_path}")
    print("Please create folder structure in Google Drive first")

!ls -lh data/ 2>/dev/null || echo "No data files yet"

In [None]:
# Verify data files
import os
from pathlib import Path

files_ok = os.path.exists('data/sentinel2_image.tif') and os.path.exists('data/ground_truth.tif')

if files_ok:
    print("‚úÖ All data files present!")
    print("Ready to proceed.")
else:
    print("‚ùå Missing data files")
    print("\nüìÇ Create in Google Drive:")
    print("   MyDrive/Colab_Data/thesis_cloud_rl/")
    print("\nUpload files:")
    print("   - sentinel2_image.tif")
    print("   - ground_truth.tif")

## 3Ô∏è‚É£ Check CNN Baseline

In [None]:
# Load and test CNN baseline
from cnn_inference import load_sentinel2_image, get_cloud_mask
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import rasterio
import numpy as np

# Load data
image = load_sentinel2_image('data/sentinel2_image.tif')
cnn_prob = get_cloud_mask(image)

with rasterio.open('data/ground_truth.tif') as src:
    ground_truth = src.read(1)

# Binary conversion
gt_binary = (ground_truth > 0).astype(np.uint8)
cnn_binary = (cnn_prob > 0.5).astype(np.uint8)

# Metrics
accuracy = accuracy_score(gt_binary.flatten(), cnn_binary.flatten())
precision = precision_score(gt_binary.flatten(), cnn_binary.flatten(), zero_division=0)
recall = recall_score(gt_binary.flatten(), cnn_binary.flatten(), zero_division=0)
f1 = f1_score(gt_binary.flatten(), cnn_binary.flatten(), zero_division=0)

print("üß† CNN Baseline:")
print(f"  Accuracy:  {accuracy:.4f}")
print(f"  Precision: {precision:.4f}")
print(f"  Recall:    {recall:.4f}")
print(f"  F1-Score:  {f1:.4f}")
print(f"\nüìä Ground truth: {gt_binary.sum():,} cloud pixels")
print(f"üìä CNN predicted: {cnn_binary.sum():,} cloud pixels")

## 4Ô∏è‚É£ Pull Latest Code & Train PPO

In [None]:
# Get latest code with PPO improvements
!git pull origin master
print("‚úÖ Repository updated")

In [None]:
# Run PPO training (main step - takes 1-2 hours)
print("üöÄ Starting PPO training...")
print("This will take 1-2 hours with GPU")
print("="*60)

!python train_ppo.py

## 5Ô∏è‚É£ Results & Download

In [None]:
# Display training results
import json
from pathlib import Path

results_file = Path('results/ppo_training_results.json')

if results_file.exists():
    with open(results_file) as f:
        results = json.load(f)
    
    cnn = results['cnn_baseline']
    ppo = results['ppo_refined']
    imp = results['improvements']
    
    print("\n" + "="*60)
    print("üìà PPO TRAINING RESULTS")
    print("="*60)
    
    print("\nüß† CNN Baseline:")
    print(f"  Accuracy:  {cnn['accuracy']:.4f}")
    print(f"  Precision: {cnn['precision']:.4f}")
    print(f"  Recall:    {cnn['recall']:.4f}")
    print(f"  F1-Score:  {cnn['f1_score']:.4f}")
    
    print("\nü§ñ PPO Refined:")
    print(f"  Accuracy:  {ppo['accuracy']:.4f}")
    print(f"  Precision: {ppo['precision']:.4f}")
    print(f"  Recall:    {ppo['recall']:.4f}")
    print(f"  F1-Score:  {ppo['f1_score']:.4f}")
    
    print("\nüéØ Improvements:")
    print(f"  F1-Score:  {imp['f1_score_percent']:+.2f}%")
    print(f"  Accuracy:  {imp['accuracy_percent']:+.2f}%")
    print(f"  Precision: {imp['precision_delta']:+.4f}")
    print(f"  Recall:    {imp['recall_delta']:+.4f}")
    print("\n" + "="*60)
else:
    print("‚ùå Results file not found")
    print("Make sure PPO training completed successfully")

In [None]:
# Save to Google Drive
import shutil
from pathlib import Path

gdrive_results = '/content/drive/MyDrive/Colab_Data/thesis_results'
Path(gdrive_results).mkdir(parents=True, exist_ok=True)

# Copy results
try:
    shutil.copy('results/ppo_training_results.json', f'{gdrive_results}/ppo_results.json')
    print("‚úÖ Results saved to Google Drive")
except:
    print("‚ö†Ô∏è  Could not save results to Google Drive")

# Copy model
try:
    import glob
    model_files = glob.glob('models/ppo_cloud_refinement_model*')
    for f in model_files:
        shutil.copy(f, f'{gdrive_results}/{Path(f).name}')
    print("‚úÖ Model saved to Google Drive")
except:
    print("‚ö†Ô∏è  Could not save model")

print(f"\nüìÇ Results at: {gdrive_results}")

## ‚úÖ Summary

**Done!** Your PPO agent has been trained.

**What happened:**
1. ‚úÖ Loaded CNN baseline performance
2. ‚úÖ Trained PPO with balanced reward structure
3. ‚úÖ Evaluated on test data
4. ‚úÖ Saved results and model

**Key improvements in PPO:**
- Better exploration with entropy coefficient
- Policy gradient approach handles reward shaping better
- Larger patch size (64√ó64) for better context
- 100k timesteps for better convergence

**Next steps:**
1. Download results from Google Drive
2. Analyze the refined cloud mask
3. Consider hyperparameter tuning if needed

**For thesis writing:**
- See `thesis_recommendations.md` for advanced techniques
- Check `training_results.json` for detailed metrics