# Section III
Validate your installations, in a notebook by executing:

!pip install earthengine-api geemap folium matplotlib pandas geopandas rasterio rasterstats scipy scikit-learn seaborn statsmodels
!earthengine authenticate

In [3]:
# Replace by your project numbber:
my_project = 'my-project-00000000000'

In [11]:
import ee
import geemap
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import folium
from IPython.display import Image, display

# Initialize Earth Engine
try:
    ee.Initialize(project=my_project)
    print("Earth Engine initialized successfully!")
except Exception as e:
    print(f"Error initializing Earth Engine: {e}")
    print("Please authenticate using: ee.Authenticate()")


Earth Engine initialized successfully!


In [13]:
def mask_landsat8_clouds(image):
    """
    Applies cloud and cloud shadow masking to Landsat 8 images using QA_PIXEL band.
    
    Args:
        image (ee.Image): Landsat 8 surface reflectance image
        
    Returns:
        ee.Image: Cloud-masked image
    """
    # QA_PIXEL bit meanings for Landsat 8
    qa = image.select('QA_PIXEL')
    
    # Bits for cloud, cloud confidence, cloud shadow, and snow
    cloud_bit = 1 << 3  # Bit 3: Cloud
    cloud_confidence_bit = 1 << 8  # Bit 8: Cloud confidence (high)
    cloud_shadow_bit = 1 << 4  # Bit 4: Cloud shadow
    snow_bit = 1 << 5  # Bit 5: Snow
    
    # Create masks
    cloud_mask = qa.bitwiseAnd(cloud_bit).eq(0)
    cloud_confidence_mask = qa.bitwiseAnd(cloud_confidence_bit).eq(0)
    cloud_shadow_mask = qa.bitwiseAnd(cloud_shadow_bit).eq(0)
    snow_mask = qa.bitwiseAnd(snow_bit).eq(0)
    
    # Combine all masks
    clear_mask = cloud_mask.And(cloud_confidence_mask).And(cloud_shadow_mask).And(snow_mask)
    
    return image.updateMask(clear_mask)

def mask_landsat7_clouds(image):
    """
    Applies cloud masking to Landsat 7 images using QA_PIXEL band.
    
    Args:
        image (ee.Image): Landsat 7 surface reflectance image
        
    Returns:
        ee.Image: Cloud-masked image
    """
    qa = image.select('QA_PIXEL')
    
    # Similar bit positions for Landsat 7
    cloud_bit = 1 << 3
    cloud_shadow_bit = 1 << 4
    snow_bit = 1 << 5
    
    clear_mask = qa.bitwiseAnd(cloud_bit).eq(0).And(
                 qa.bitwiseAnd(cloud_shadow_bit).eq(0)).And(
                 qa.bitwiseAnd(snow_bit).eq(0))
    
    return image.updateMask(clear_mask)

In [6]:
def calculate_ndvi_landsat8(image):
    """
    Calculates NDVI for Landsat 8 images.
    
    Args:
        image (ee.Image): Landsat 8 surface reflectance image
        
    Returns:
        ee.Image: Image with NDVI band added
    """
    ndvi = image.normalizedDifference(['SR_B5', 'SR_B4']).rename('NDVI')
    return image.addBands(ndvi)

def calculate_ndvi_landsat7(image):
    """
    Calculates NDVI for Landsat 7 images.
    
    Args:
        image (ee.Image): Landsat 7 surface reflectance image
        
    Returns:
        ee.Image: Image with NDVI band added
    """
    ndvi = image.normalizedDifference(['SR_B4', 'SR_B3']).rename('NDVI')
    return image.addBands(ndvi)

In [65]:
class DeforestationAnalyzer:
    """
    A comprehensive class for deforestation analysis using Google Earth Engine.
    """
    
    def __init__(self, study_area, start_year, end_year):
        """
        Initialize the deforestation analyzer.
        
        Args:
            study_area (ee.Geometry): Area of interest for analysis
            start_year (int): Baseline year for comparison
            end_year (int): End year for change detection
        """
        self.study_area = study_area
        self.start_year = start_year
        self.end_year = end_year
        self.start_composite = None
        self.end_composite = None
        self.change_image = None
        self.deforestation_mask = None
        
    def create_annual_composite(self, year, cloud_coverage_threshold=20):
        """
        Creates cloud-free annual NDVI composite for a given year.
        
        Args:
            year (int): Year for composite creation
            cloud_coverage_threshold (int): Maximum cloud coverage percentage
            
        Returns:
            ee.Image: Annual NDVI composite
        """
        # Date range for the year
        start_date = f'{year}-01-01'
        end_date = f'{year}-12-31'
        
        # Landsat 8 collection
        l8_collection = (ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
                        .filterBounds(self.study_area)
                        .filterDate(start_date, end_date)
                        .filter(ee.Filter.lt('CLOUD_COVER', cloud_coverage_threshold))
                        .map(mask_landsat8_clouds)
                        .map(calculate_ndvi_landsat8))
        
        # Landsat 7 collection (for gap-filling)
        l7_collection = (ee.ImageCollection('LANDSAT/LE07/C02/T1_L2')
                        .filterBounds(self.study_area)
                        .filterDate(start_date, end_date)
                        .filter(ee.Filter.lt('CLOUD_COVER', cloud_coverage_threshold))
                        .map(mask_landsat7_clouds)
                        .map(calculate_ndvi_landsat7))
        
        # Merge collections
        merged_collection = l8_collection.merge(l7_collection)
        
        # Create median composite
        composite = merged_collection.select('NDVI').median()
        
        # Add year as a property
        composite = composite.set('year', year)
        
        return composite.clip(self.study_area)
    
    def generate_composites(self):
        """
        Generates NDVI composites for start and end years.
        """
        print(f"Creating {self.start_year} composite...")
        self.start_composite = self.create_annual_composite(self.start_year)
        
        print(f"Creating {self.end_year} composite...")
        self.end_composite = self.create_annual_composite(self.end_year)
        
        print("Composites generated successfully!")
    
    def detect_forest_change(self, ndvi_threshold=-0.2):
        """
        Performs change detection between start and end year composites.
        
        Args:
            ndvi_threshold (float): NDVI difference threshold for deforestation detection
        """
        if self.start_composite is None or self.end_composite is None:
            raise ValueError("Composites must be generated first using generate_composites()")
        
        # Calculate NDVI difference (end - start)
        self.change_image = self.end_composite.subtract(self.start_composite)
        
        # Apply threshold for deforestation detection
        # Negative change below threshold indicates potential deforestation
        self.deforestation_mask = self.change_image.lt(ndvi_threshold)
        
        print(f"Change detection completed with threshold: {ndvi_threshold}")
    
    def apply_forest_mask(self, min_forest_cover=30):
        """
        Applies forest mask to focus analysis on forested areas using Hansen data.
        
        Args:
            min_forest_cover (int): Minimum forest cover percentage for baseline year
        """
        # Load Hansen Global Forest Change dataset
        hansen = ee.Image('UMD/hansen/global_forest_change_2024_v1_12')
        # UMD/hansen/global_forest_change_2022_v1_10
        
        # Create forest mask for baseline year
        forest_cover_2000 = hansen.select('treecover2000')
        forest_mask = forest_cover_2000.gte(min_forest_cover)
        
        # Get loss year data
        loss_year = hansen.select('lossyear')
        
        # Create mask for areas that were forested in start year
        # (areas with forest cover in 2000 and no loss before start year)
        years_since_2000 = self.start_year - 2000
        no_prior_loss = loss_year.eq(0).Or(loss_year.gt(years_since_2000))
        
        baseline_forest_mask = forest_mask.And(no_prior_loss)
        
        # Apply forest mask to deforestation results
        if self.deforestation_mask is not None:
            self.deforestation_mask = self.deforestation_mask.And(baseline_forest_mask)
        
        print("Forest mask applied successfully!")
    
    def calculate_statistics(self, upscale=90):
        """
        Calculates deforestation statistics for the study area.
        
        Returns:
            dict: Dictionary containing area statistics
        """
        if self.deforestation_mask is None:
            raise ValueError("Change detection must be performed first")
        
        # Calculate area of deforestation
        pixel_area = ee.Image.pixelArea()
        deforested_area = self.deforestation_mask.multiply(pixel_area).divide(10000)  # Convert to hectares
        
        # Sum total deforested area
        stats = deforested_area.reduceRegion(
            reducer=ee.Reducer.sum(),
            geometry=self.study_area,
            scale=upscale,
            maxPixels=1e13
        )
        info = stats.getInfo()
        print("info", info)
        total_area_ha = info.get('NDVI', 0)
        total_area_km2 = total_area_ha / 100  # Convert to km²
        
        return {
            'total_deforested_hectares': total_area_ha,
            'total_deforested_km2': total_area_km2,
            'analysis_period': f'{self.start_year}-{self.end_year}'
        }


In [29]:
def create_interactive_map(analyzer, center_lat=-10.5, center_lon=-63.0, zoom=8):
    """
    Creates an interactive map showing deforestation results.
    
    Args:
        analyzer (DeforestationAnalyzer): Initialized analyzer with results
        center_lat (float): Map center latitude
        center_lon (float): Map center longitude
        zoom (int): Initial zoom level
        
    Returns:
        geemap.Map: Interactive map object
    """
    # Initialize map
    Map = geemap.Map(center=[center_lat, center_lon], zoom=zoom)
    
    # Add study area boundary
    Map.addLayer(analyzer.study_area, {'color': 'yellow'}, 'Study Area', opacity=0.8)
    
    # Visualization parameters for NDVI
    ndvi_vis = {
        'min': 0,
        'max': 1,
        'palette': ['red', 'yellow', 'green']
    }
    
    # Add NDVI composites
    if analyzer.start_composite is not None:
        Map.addLayer(analyzer.start_composite, ndvi_vis, 
                    f'NDVI {analyzer.start_year}', shown=False)
    
    if analyzer.end_composite is not None:
        Map.addLayer(analyzer.end_composite, ndvi_vis, 
                    f'NDVI {analyzer.end_year}', shown=False)
    
    # Add change detection results
    if analyzer.change_image is not None:
        change_vis = {
            'min': -0.5,
            'max': 0.5,
            'palette': ['red', 'white', 'green']
        }
        Map.addLayer(analyzer.change_image, change_vis, 'NDVI Change', shown=False)
    
    # Add deforestation mask
    if analyzer.deforestation_mask is not None:
        deforestation_vis = {
            'min': 0,
            'max': 1,
            'palette': ['green', 'red']
        }
        Map.addLayer(analyzer.deforestation_mask, deforestation_vis, 
                    'Deforestation Hotspots', shown=True)
    
    return Map

def plot_time_series(study_area, start_year, end_year):
    """
    Creates a time series plot of NDVI values for the study area.
    
    Args:
        study_area (ee.Geometry): Area of interest
        start_year (int): Start year for time series
        end_year (int): End year for time series
    """
    years = list(range(start_year, end_year + 1))
    ndvi_values = []
    
    for year in years:
        # Create temporary analyzer for each year
        temp_analyzer = DeforestationAnalyzer(study_area, year, year)
        composite = temp_analyzer.create_annual_composite(year)
        
        # Calculate mean NDVI for the area
        mean_ndvi = composite.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=study_area,
            scale=30,
            maxPixels=1e8
        ).getInfo()['NDVI']
        
        ndvi_values.append(mean_ndvi)
    
    # Create plot
    plt.figure(figsize=(10, 6))
    plt.plot(years, ndvi_values, marker='o', linewidth=2, markersize=8)
    plt.xlabel('Year')
    plt.ylabel('Mean NDVI')
    plt.title('NDVI Time Series Analysis')
    plt.grid(True, alpha=0.3)
    plt.xticks(years, rotation=45)
    plt.tight_layout()
    plt.show()

In [85]:
def run_deforestation_analysis():
    """
    Runs the complete deforestation analysis workflow.
    """
    # Define study area (around Rondônia, Brazil)
    study_area = ee.Geometry.Rectangle([-65.5, -13.5, -59.5, -7.5])
    study_area = ee.Geometry.Rectangle([-74, -15, -44, 5])
    # Initialize analyzer
    print("Initializing Deforestation Analyzer...")
    analyzer = DeforestationAnalyzer(study_area, 2020, 2023)
    
    # Generate NDVI composites
    print("Generating NDVI composites...")
    analyzer.generate_composites()
    
    # Perform change detection 
    # Typical deforestation may lower NDVI by 0.1–0.15
    print("Performing change detection...")
    analyzer.detect_forest_change(ndvi_threshold=-0.15)
    
    
    # Apply forest mask to focus on area with forest in 2000 following Hansen
    print("Applying forest mask...")
    analyzer.apply_forest_mask(min_forest_cover=30)
    
    # Calculate statistics
    print("Calculating statistics...")
    stats = analyzer.calculate_statistics(upscale=500)
    
    # Display results
    print("\n" + "="*50)
    print("DEFORESTATION ANALYSIS RESULTS")
    print("="*50)
    print(f"Analysis Period: {stats['analysis_period']}")
    print(f"Total Deforested Area: {stats['total_deforested_hectares']:.2f} hectares")
    print(f"Total Deforested Area: {stats['total_deforested_km2']:.2f} km²")
    print("="*50)
    
    # Create interactive map
    print("\nCreating interactive map...")
    map_viz = create_interactive_map(analyzer, center_lat=-10.5, center_lon=-63.0, zoom=8)
    
    return analyzer, map_viz, stats



In [86]:
analyzer, interactive_map, results = run_deforestation_analysis()

# Display the map
display(interactive_map)

Initializing Deforestation Analyzer...
Generating NDVI composites...
Creating 2020 composite...
Creating 2023 composite...
Composites generated successfully!
Performing change detection...
Change detection completed with threshold: -0.15
Applying forest mask...
Forest mask applied successfully!
Calculating statistics...
info {'NDVI': 352114.2427969918}

DEFORESTATION ANALYSIS RESULTS
Analysis Period: 2020-2023
Total Deforested Area: 352114.24 hectares
Total Deforested Area: 3521.14 km²

Creating interactive map...


Map(center=[-10.5, -63.0], controls=(WidgetControl(options=['position', 'transparent_bg'], position='topright'…

In [91]:
def validate_against_hansen(analyzer, hansen_threshold_year=20, upscale=30):
    """
    Validates deforestation results against Hansen Global Forest Change dataset.
    
    Args:
        analyzer (DeforestationAnalyzer): Analyzer with deforestation results
        hansen_threshold_year (int): Hansen loss year threshold (years since 2000)
        
    Returns:
        dict: Validation statistics
    """
    # Load Hansen dataset
    hansen = ee.Image('UMD/hansen/global_forest_change_2024_v1_12') \
                     .clip(analyzer.study_area)
    
    loss_year = hansen.select('lossyear')
    
    # Create Hansen loss mask for analysis period
    start_hansen_year = analyzer.start_year - 2000
    end_hansen_year = analyzer.end_year - 2000
    
    hansen_loss = loss_year.gte(start_hansen_year).And(loss_year.lte(end_hansen_year))
    
    # Compare with our results
    our_results = analyzer.deforestation_mask
    
    # Calculate confusion matrix components
    true_positives = hansen_loss.And(our_results)
    false_positives = our_results.And(hansen_loss.Not())
    false_negatives = hansen_loss.And(our_results.Not())
    true_negatives = hansen_loss.Not().And(our_results.Not())
    
    # Calculate areas
    pixel_area = ee.Image.pixelArea().divide(10000)  # Convert to hectares

    ourloss_stats = our_results.multiply(pixel_area).reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=analyzer.study_area,
        scale=upscale,
        maxPixels=1e12
    )
    info = ourloss_stats.getInfo()
    print("info ourloss_stats", info)
    ourloss = info.get('NDVI', 0)
    
    tp_stats = true_positives.multiply(pixel_area).reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=analyzer.study_area,
        scale=upscale,
        maxPixels=1e12
    )
    info = tp_stats.getInfo()
    print("info tp_stats", info)
    tp_area = info.get('lossyear', 0)

    
    fp_stats = false_positives.multiply(pixel_area).reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=analyzer.study_area,
        scale=upscale,
        maxPixels=1e12
    )
    info = fp_stats.getInfo()
    print("info fp_area", info)
    fp_area = info.get('NDVI', 0)
    
    fn_stats = false_negatives.multiply(pixel_area).reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=analyzer.study_area,
        scale=upscale,
        maxPixels=1e12
    )
    info = fn_stats.getInfo()
    print("info fn_stats", info)
    fn_area = info.get('lossyear', 0)

    # Calculate accuracy metrics
    precision = tp_area / (tp_area + fp_area) if (tp_area + fp_area) > 0 else 0
    recall = tp_area / (tp_area + fn_area) if (tp_area + fn_area) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return {
        'our_loss' : ourloss,
        'true_positives_ha': tp_area,
        'false_positives_ha': fp_area,
        'false_negatives_ha': fn_area,
        'precision': precision,
        'recall': recall,
        'f1_score': f1_score
    }

def plot_validation_results(validation_stats):
    """
    Creates visualization of validation results.
    
    Args:
        validation_stats (dict): Results from validate_against_hansen()
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Confusion matrix visualization
    confusion_data = [
        ['True Positive', validation_stats['true_positives_ha']],
        ['False Positive', validation_stats['false_positives_ha']],
        ['False Negative', validation_stats['false_negatives_ha']]
    ]
    
    categories = [item[0] for item in confusion_data]
    values = [item[1] for item in confusion_data]
    colors = ['green', 'orange', 'red']
    
    ax1.bar(categories, values, color=colors, alpha=0.7)
    ax1.set_ylabel('Area (hectares)')
    ax1.set_title('Validation Results: Area by Category')
    ax1.tick_params(axis='x', rotation=45)
    
    # Accuracy metrics
    metrics = ['Precision', 'Recall', 'F1-Score']
    metric_values = [
        validation_stats['precision'],
        validation_stats['recall'],
        validation_stats['f1_score']
    ]
    
    bars = ax2.bar(metrics, metric_values, color='skyblue', alpha=0.7)
    ax2.set_ylabel('Score')
    ax2.set_title('Accuracy Metrics')
    ax2.set_ylim(0, 1)
    
    # Add value labels on bars
    for bar, value in zip(bars, metric_values):
        ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                f'{value:.3f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

In [47]:
class GlobalDeforestationProcessor:
    """
    Processes global deforestation analysis using a tiling strategy.
    """
    
    def __init__(self, tile_size=5.0):
        """
        Initialize global processor with tile configuration.
        
        Args:
            tile_size (float): Size of each tile in degrees
        """
        self.tile_size = tile_size
        self.tiles = []
        
    def generate_global_tiles(self, bounds=None):
        """
        Generates tiles for global processing.
        
        Args:
            bounds (list): [west, south, east, north] bounds for analysis
                         If None, uses global extent
        """
        if bounds is None:
            # Global extent
            west, south, east, north = -180, -60, 180, 60
        else:
            west, south, east, north = bounds
        
        tiles = []
        
        lon = west
        while lon < east:
            lat = south
            while lat < north:
                tile_bounds = [
                    lon, lat,
                    min(lon + self.tile_size, east),
                    min(lat + self.tile_size, north)
                ]
                
                tile_geom = ee.Geometry.Rectangle(tile_bounds)
                tiles.append({
                    'id': f'tile_{len(tiles)}',
                    'bounds': tile_bounds,
                    'geometry': tile_geom
                })
                
                lat += self.tile_size
            lon += self.tile_size
        
        self.tiles = tiles
        print(f"Generated {len(tiles)} tiles for processing")
        
    def process_tile(self, tile, start_year, end_year, upscale=90):
        """
        Processes a single tile for deforestation analysis.
        
        Args:
            tile (dict): Tile information dictionary
            start_year (int): Start year for analysis
            end_year (int): End year for analysis
            
        Returns:
            dict: Processing results for the tile
        """
        try:
            analyzer = DeforestationAnalyzer(tile['geometry'], start_year, end_year)
            analyzer.generate_composites()
            analyzer.detect_forest_change()
            analyzer.apply_forest_mask()
            
            stats = analyzer.calculate_statistics(upscale=upscale)
            stats['tile_id'] = tile['id']
            stats['bounds'] = tile['bounds']
            
            return stats
            
        except Exception as e:
            print(f"Error processing tile {tile['id']}: {str(e)}")
            return {
                'tile_id': tile['id'],
                'bounds': tile['bounds'],
                'error': str(e),
                'total_deforested_hectares': 0,
                'total_deforested_km2': 0
            }
    
    def export_tile_results(self, tile_results, output_collection_id):
        """
        Exports tile results to Earth Engine Asset.
        
        Args:
            tile_results (list): List of tile processing results
            output_collection_id (str): Earth Engine Asset ID for output
        """
        # This would typically involve exporting each tile's deforestation mask
        # to an Earth Engine Asset collection for later mosaicking
        print(f"Exporting {len(tile_results)} tiles to {output_collection_id}")
        
        # Example export task (would need to be implemented per tile)
        for result in tile_results:
            if 'error' not in result:
                print(f"Tile {result['tile_id']}: {result['total_deforested_km2']:.2f} km² deforested")

# Example usage for regional analysis
def run_regional_analysis(region_bounds, start_year=2020, end_year=2023, tile_size=1.0, upscale=90):
    """
    Runs deforestation analysis for a large region using tiling.
    
    Args:
        region_bounds (list): [west, south, east, north] bounds
        start_year (int): Start year for analysis
        end_year (int): End year for analysis
    """
    processor = GlobalDeforestationProcessor(tile_size=tile_size)
    processor.generate_global_tiles(region_bounds)
    
    results = []
    for i, tile in enumerate(processor.tiles):
        print(f"Processing tile {i+1}/{len(processor.tiles)}: {tile['id']}")
        result = processor.process_tile(tile, start_year, end_year, upscale=upscale)
        results.append(result)
    
    # Aggregate results
    total_deforestation = sum([r.get('total_deforested_km2', 0) for r in results])
    print(f"\nTotal deforestation across region: {total_deforestation:.2f} km²")
    
    return results

In [48]:
def analyze_deforestation_trends(study_area, years_list):
    """
    Analyzes deforestation trends across multiple years.
    
    Args:
        study_area (ee.Geometry): Area of interest
        years_list (list): List of years for trend analysis
        
    Returns:
        dict: Trend analysis results
    """
    annual_loss = []
    
    for i in range(len(years_list) - 1):
        start_year = years_list[i]
        end_year = years_list[i + 1]
        
        analyzer = DeforestationAnalyzer(study_area, start_year, end_year)
        analyzer.generate_composites()
        analyzer.detect_forest_change()
        analyzer.apply_forest_mask()
        
        stats = analyzer.calculate_statistics(upscale=90)
        annual_loss.append({
            'period': f'{start_year}-{end_year}',
            'loss_km2': stats['total_deforested_km2']
        })
    
    # Calculate trend
    loss_values = [item['loss_km2'] for item in annual_loss]
    trend = np.polyfit(range(len(loss_values)), loss_values, 1)[0]
    
    return {
        'annual_loss': annual_loss,
        'trend_km2_per_year': trend,
        'total_loss': sum(loss_values)
    }

In [98]:
def identify_deforestation_hotspots(analyzer, hotspot_threshold_km2=1.0, cluster_distance=1000, upscale=90):
    """
    Identifies and ranks deforestation hotspots.
    
    Args:
        analyzer (DeforestationAnalyzer): Analyzer with results
        hotspot_threshold_km2 (float): Minimum area for hotspot classification
        cluster_distance (int): Distance for clustering hotspots (meters)
        
    Returns:
        ee.FeatureCollection: Hotspot locations with attributes
    """
    if analyzer.deforestation_mask is None:
        raise ValueError("Deforestation analysis must be completed first")
    
    # Convert deforestation mask to vectors
    vectors = analyzer.deforestation_mask.selfMask().reduceToVectors(
        geometry=analyzer.study_area,
        scale=upscale,
        bestEffort=True,
        geometryType='polygon',
        eightConnected=False,
        maxPixels=1e12
    )
    
    # Calculate area for each polygon
    vectors_with_area = vectors.map(
        lambda f: f.set({
            'area_m2': f.geometry(maxError=1).area(maxError=1),
            'area_km2': f.geometry(maxError=1).area(maxError=1).divide(1e6)
        })
    )
    
    # Filter by minimum area
    hotspots = vectors_with_area.filter(
        ee.Filter.gte('area_km2', hotspot_threshold_km2)
    )
    
    return hotspots

def create_hotspot_report(hotspots, study_area_name="Study Area"):
    """
    Creates a comprehensive report of deforestation hotspots.
    
    Args:
        hotspots (ee.FeatureCollection): Hotspot features from identify_deforestation_hotspots
        study_area_name (str): Name of the study area for reporting
        
    Returns:
        dict: Hotspot analysis report
    """
    # Get hotspot statistics
    hotspot_count = hotspots.size().getInfo()
    
    if hotspot_count == 0:
        return {
            'study_area': study_area_name,
            'total_hotspots': 0,
            'total_hotspot_area_km2': 0,
            'largest_hotspot_km2': 0,
            'average_hotspot_km2': 0,
            'hotspots': []
        }
    
    # Calculate statistics
    total_area = hotspots.aggregate_sum('area_km2').getInfo()
    max_area = hotspots.aggregate_max('area_km2').getInfo()
    mean_area = hotspots.aggregate_mean('area_km2').getInfo()
    
    # Get top 10 largest hotspots
    top_hotspots = hotspots.sort('area_km2', False).limit(10)
    
    # Extract hotspot details
    hotspot_list = []
    top_features = top_hotspots.getInfo()['features']
    
    for i, feature in enumerate(top_features):
        props = feature['properties']
        geometry = feature['geometry']
        
        # Calculate centroid for location reference
        # centroid = ee.Geometry(geometry).centroid().coordinates().getInfo()
        geom = ee.Geometry(geometry).simplify(10)         # simplify to 10 m
        centroid = geom.centroid(maxError=1).coordinates().getInfo()
        
        hotspot_list.append({
            'rank': i + 1,
            'area_km2': props['area_km2'],
            'location_lon': centroid[0],
            'location_lat': centroid[1],
            'geometry_type': geometry['type']
        })
    
    return {
        'study_area': study_area_name,
        'total_hotspots': hotspot_count,
        'total_hotspot_area_km2': total_area,
        'largest_hotspot_km2': max_area,
        'average_hotspot_km2': mean_area,
        'top_hotspots': hotspot_list
    }

In [45]:
class DeforestationAlertSystem:
    """
    Automated alert system for near real-time deforestation monitoring.
    """
    
    def __init__(self, monitoring_areas, alert_threshold_ha=10):
        """
        Initialize alert system.
        
        Args:
            monitoring_areas (dict): Dictionary of named monitoring areas
            alert_threshold_ha (float): Minimum deforestation area to trigger alert
        """
        self.monitoring_areas = monitoring_areas
        self.alert_threshold_ha = alert_threshold_ha
        self.alert_history = []
        
    def check_recent_deforestation(self, days_back=30):
        """
        Checks for recent deforestation in all monitoring areas.
        
        Args:
            days_back (int): Number of days to look back for recent imagery
            
        Returns:
            list: List of alerts generated
        """
        alerts = []
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days_back)
        
        for area_name, geometry in self.monitoring_areas.items():
            try:
                # Get recent imagery
                recent_collection = (ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
                                   .filterBounds(geometry)
                                   .filterDate(start_date.strftime('%Y-%m-%d'), 
                                             end_date.strftime('%Y-%m-%d'))
                                   .filter(ee.Filter.lt('CLOUD_COVER', 30)))
                
                if recent_collection.size().getInfo() > 0:
                    # Create recent composite
                    recent_composite = (recent_collection
                                      .map(mask_landsat8_clouds)
                                      .map(calculate_ndvi_landsat8)
                                      .select('NDVI')
                                      .median()
                                      .clip(geometry))
                    
                    # Get baseline (30 days earlier)
                    baseline_start = start_date - timedelta(days=30)
                    baseline_collection = (ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
                                         .filterBounds(geometry)
                                         .filterDate(baseline_start.strftime('%Y-%m-%d'),
                                                   start_date.strftime('%Y-%m-%d'))
                                         .filter(ee.Filter.lt('CLOUD_COVER', 30)))
                    
                    if baseline_collection.size().getInfo() > 0:
                        baseline_composite = (baseline_collection
                                            .map(mask_landsat8_clouds)
                                            .map(calculate_ndvi_landsat8)
                                            .select('NDVI')
                                            .median()
                                            .clip(geometry))
                        
                        # Detect changes
                        change = recent_composite.subtract(baseline_composite)
                        deforestation = change.lt(-0.2)
                        
                        # Calculate area
                        area_image = deforestation.multiply(ee.Image.pixelArea()).divide(10000)
                        total_area = area_image.reduceRegion(
                            reducer=ee.Reducer.sum(),
                            geometry=geometry,
                            scale=30,
                            maxPixels=1e8
                        ).getInfo()['NDVI']
                        
                        if total_area > self.alert_threshold_ha:
                            alert = {
                                'area_name': area_name,
                                'date': end_date.strftime('%Y-%m-%d'),
                                'deforested_area_ha': total_area,
                                'alert_level': self._calculate_alert_level(total_area),
                                'geometry': geometry
                            }
                            alerts.append(alert)
                            self.alert_history.append(alert)
            
            except Exception as e:
                print(f"Error checking area {area_name}: {str(e)}")
        
        return alerts
    
    def _calculate_alert_level(self, area_ha):
        """
        Calculates alert level based on deforested area.
        
        Args:
            area_ha (float): Deforested area in hectares
            
        Returns:
            str: Alert level
        """
        if area_ha < 50:
            return "LOW"
        elif area_ha < 200:
            return "MEDIUM"
        elif area_ha < 500:
            return "HIGH"
        else:
            return "CRITICAL"
    
    def generate_alert_report(self, alerts):
        """
        Generates formatted alert report.
        
        Args:
            alerts (list): List of alerts from check_recent_deforestation
            
        Returns:
            str: Formatted alert report
        """
        if not alerts:
            return "No deforestation alerts detected."
        
        report = "DEFORESTATION ALERT REPORT\n"
        report += "=" * 50 + "\n"
        report += f"Report Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
        report += f"Total Alerts: {len(alerts)}\n\n"
        
        # Sort alerts by area (largest first)
        sorted_alerts = sorted(alerts, key=lambda x: x['deforested_area_ha'], reverse=True)
        
        for i, alert in enumerate(sorted_alerts, 1):
            report += f"Alert {i}: {alert['area_name']}\n"
            report += f"  - Area Affected: {alert['deforested_area_ha']:.2f} hectares\n"
            report += f"  - Alert Level: {alert['alert_level']}\n"
            report += f"  - Detection Date: {alert['date']}\n"
            report += "-" * 30 + "\n"
        
        return report

In [44]:
def export_deforestation_results(analyzer, output_filename, scale=30):
    """
    Exports deforestation results to Google Drive.
    
    Args:
        analyzer (DeforestationAnalyzer): Analyzer with results
        output_filename (str): Output filename (without extension)
        scale (int): Export resolution in meters
    """
    if analyzer.deforestation_mask is None:
        raise ValueError("Deforestation analysis must be completed first")
    
    # Create export image with multiple bands
    export_image = ee.Image.cat([
        analyzer.start_composite.rename('NDVI_start'),
        analyzer.end_composite.rename('NDVI_end'),
        analyzer.change_image.rename('NDVI_change'),
        analyzer.deforestation_mask.rename('deforestation')
    ])
    
    # Export to Google Drive
    task = ee.batch.Export.image.toDrive(
        image=export_image,
        description=f'{output_filename}_deforestation',
        folder='GEE_Exports',
        fileNamePrefix=output_filename,
        scale=scale,
        region=analyzer.study_area,
        maxPixels=1e9
    )
    
    task.start()
    print(f"Export task started: {output_filename}")
    print(f"Task ID: {task.id}")
    
    return task

def export_hotspots_to_shapefile(hotspots, output_filename):
    """
    Exports hotspot polygons as shapefile to Google Drive.
    
    Args:
        hotspots (ee.FeatureCollection): Hotspot features
        output_filename (str): Output filename (without extension)
    """
    task = ee.batch.Export.table.toDrive(
        collection=hotspots,
        description=f'{output_filename}_hotspots',
        folder='GEE_Exports',
        fileNamePrefix=output_filename,
        fileFormat='SHP'
    )
    
    task.start()
    print(f"Hotspots export task started: {output_filename}")
    print(f"Task ID: {task.id}")
    
    return task

In [100]:
def generate_comprehensive_report(analyzer, validation_stats=None, hotspot_report=None, upscale=120):
    """
    Generates a comprehensive analysis report.
    
    Args:
        analyzer (DeforestationAnalyzer): Completed analysis
        validation_stats (dict, optional): Validation results
        hotspot_report (dict, optional): Hotspot analysis results
        
    Returns:
        str: Comprehensive formatted report
    """
    # Get basic statistics
    stats = analyzer.calculate_statistics(upscale=upscale)
    
    # Start building report
    report = []
    report.append("COMPREHENSIVE DEFORESTATION ANALYSIS REPORT")
    report.append("=" * 60)
    report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    report.append("")
    
    # Analysis Overview
    report.append("ANALYSIS OVERVIEW")
    report.append("-" * 30)
    report.append(f"Analysis Period: {stats['analysis_period']}")
    report.append(f"Study Area: Custom defined region")
    report.append(f"Methodology: NDVI-based change detection using Landsat imagery")
    report.append(f"Change Threshold: NDVI decrease > 0.2")
    report.append("")
    
    # Key Findings
    report.append("KEY FINDINGS")
    report.append("-" * 30)
    report.append(f"Total Deforested Area: {stats['total_deforested_hectares']:.2f} hectares")
    report.append(f"Total Deforested Area: {stats['total_deforested_km2']:.2f} km²")
    
    # Calculate annual rate if multi-year analysis
    years_diff = analyzer.end_year - analyzer.start_year
    if years_diff > 1:
        annual_rate = stats['total_deforested_km2'] / years_diff
        report.append(f"Average Annual Deforestation Rate: {annual_rate:.2f} km²/year")
    
    report.append("")
    
    # Validation Results
    if validation_stats:
        report.append("VALIDATION RESULTS")
        report.append("-" * 30)
        report.append(f"Validation Method: Comparison with Hansen Global Forest Change")
        report.append(f"Precision: {validation_stats['precision']:.3f}")
        report.append(f"Recall: {validation_stats['recall']:.3f}")
        report.append(f"F1-Score: {validation_stats['f1_score']:.3f}")
        report.append(f"True Positives: {validation_stats['true_positives_ha']:.2f} ha")
        report.append(f"False Positives: {validation_stats['false_positives_ha']:.2f} ha")
        report.append(f"False Negatives: {validation_stats['false_negatives_ha']:.2f} ha")
        report.append("")
    
    # Hotspot Analysis
    if hotspot_report and hotspot_report['total_hotspots'] > 0:
        report.append("HOTSPOT ANALYSIS")
        report.append("-" * 30)
        report.append(f"Total Hotspots Identified: {hotspot_report['total_hotspots']}")
        report.append(f"Total Hotspot Area: {hotspot_report['total_hotspot_area_km2']:.2f} km²")
        report.append(f"Largest Hotspot: {hotspot_report['largest_hotspot_km2']:.2f} km²")
        report.append(f"Average Hotspot Size: {hotspot_report['average_hotspot_km2']:.2f} km²")
        report.append("")
        
        if hotspot_report['top_hotspots']:
            report.append("TOP 5 LARGEST HOTSPOTS:")
            for i, hotspot in enumerate(hotspot_report['top_hotspots'][:5]):
                report.append(f"{i+1}. Area: {hotspot['area_km2']:.2f} km² "
                            f"(Lat: {hotspot['location_lat']:.4f}, "
                            f"Lon: {hotspot['location_lon']:.4f})")
        report.append("")
    
    # Methodology Details
    report.append("METHODOLOGY")
    report.append("-" * 30)
    report.append("1. Data Sources:")
    report.append("   - Landsat 8 Collection 2 Surface Reflectance")
    report.append("   - Landsat 7 Collection 2 Surface Reflectance (gap-filling)")
    report.append("   - Hansen Global Forest Change (forest mask)")
    report.append("")
    report.append("2. Processing Steps:")
    report.append("   - Cloud masking using QA_PIXEL band")
    report.append("   - NDVI calculation and annual compositing")
    report.append("   - Change detection via image differencing")
    report.append("   - Forest mask application")
    report.append("   - Threshold-based deforestation classification")
    report.append("")
    report.append("3. Quality Control:")
    report.append("   - Cloud coverage < 20% for input images")
    report.append("   - Minimum 30% forest cover for baseline areas")
    report.append("   - 30m spatial resolution analysis")
    report.append("")
    
    # Limitations and Caveats
    report.append("LIMITATIONS AND CAVEATS")
    report.append("-" * 30)
    report.append("• Results depend on cloud-free imagery availability")
    report.append("• NDVI-based detection may miss subtle forest degradation")
    report.append("• Natural forest loss (fires, storms) may be included")
    report.append("• 30m resolution may miss small-scale clearing")
    report.append("• Temporal compositing may introduce timing uncertainty")
    report.append("")
    
    # Recommendations
    report.append("RECOMMENDATIONS")
    report.append("-" * 30)
    report.append("• Validate results with field observations or high-resolution imagery")
    report.append("• Implement regular monitoring for trend analysis")
    report.append("• Consider integrating SAR data for cloud-prone areas")
    report.append("• Develop automated alert systems for rapid response")
    report.append("• Coordinate with local enforcement agencies")
    report.append("")
    
    report.append("=" * 60)
    
    return "\n".join(report)

def save_report_to_file(report_text, filename):
    """
    Saves report to a text file (for local environments).
    
    Args:
        report_text (str): Report content
        filename (str): Output filename
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(report_text)
    print(f"Report saved to: {filename}")

In [94]:
def complete_deforestation_analysis(study_area, start_year, end_year, 
                                  output_prefix="deforestation_analysis",
                                  export_results=True, run_validation=True,
                                  ndvi_threshold=-0.15, upscale = 500):
    """
    Runs complete deforestation analysis workflow with all components.
    
    Args:
        study_area (ee.Geometry): Area of interest
        start_year (int): Baseline year
        end_year (int): Comparison year
        output_prefix (str): Prefix for output files
        export_results (bool): Whether to export results
        run_validation (bool): Whether to run validation
        
    Returns:
        dict: Complete analysis results
    """
    print("Starting Complete Deforestation Analysis Workflow")
    print("=" * 60)
    
    # Step 1: Initialize and run basic analysis
    print("Step 1: Running deforestation analysis...")
    analyzer = DeforestationAnalyzer(study_area, start_year, end_year)
    analyzer.generate_composites()
    analyzer.detect_forest_change(ndvi_threshold=ndvi_threshold)
    analyzer.apply_forest_mask()
    
    basic_stats = analyzer.calculate_statistics(upscale = upscale)
    print(f"✓ Detected {basic_stats['total_deforested_km2']:.2f} km² of deforestation")
    
    # Step 2: Validation (optional)
    validation_stats = None
    if run_validation:
        print("Step 2: Running validation against Hansen dataset...")
        validation_stats = validate_against_hansen(analyzer, upscale=upscale)
        print(f"✓ Validation complete - F1 Score: {validation_stats['f1_score']:.3f}")
    
    # Step 3: Hotspot analysis
    print("Step 3: Identifying deforestation hotspots...")
    hotspots = identify_deforestation_hotspots(analyzer, hotspot_threshold_km2=2.0, upscale=upscale)
    hotspot_report = create_hotspot_report(hotspots, "Analysis Region")
    print(f"✓ Identified {hotspot_report['total_hotspots']} deforestation hotspots")
    
    # Step 4: Generate comprehensive report
    print("Step 4: Generating comprehensive report...")
    report = generate_comprehensive_report(analyzer, validation_stats, hotspot_report, upscale=upscale)
    
    # Step 5: Create visualizations
    print("Step 5: Creating interactive visualization...")
    interactive_map = create_interactive_map(analyzer)
    
    # Step 6: Export results (optional)
    export_tasks = []
    if export_results:
        print("Step 6: Exporting results...")
        
        # Export main results
        task1 = export_deforestation_results(analyzer, f"{output_prefix}_main")
        export_tasks.append(task1)
        
        # Export hotspots if any found
        if hotspot_report['total_hotspots'] > 0:
            task2 = export_hotspots_to_shapefile(hotspots, f"{output_prefix}_hotspots")
            export_tasks.append(task2)
        
        print(f"✓ Started {len(export_tasks)} export tasks")
    
    print("\nAnalysis Complete!")
    print("=" * 60)
    
    # Display summary
    print("\nSUMMARY:")
    print(f"Period: {start_year}-{end_year}")
    print(f"Total Deforestation: {basic_stats['total_deforested_km2']:.2f} km²")
    if validation_stats:
        print(f"Validation F1-Score: {validation_stats['f1_score']:.3f}")
    print(f"Hotspots Found: {hotspot_report['total_hotspots']}")
    
    return {
        'analyzer': analyzer,
        'basic_stats': basic_stats,
        'validation_stats': validation_stats,
        'hotspot_report': hotspot_report,
        'hotspots': hotspots,
        'interactive_map': interactive_map,
        'report': report,
        'export_tasks': export_tasks
    }

# Example usage for different regions
def analyze_amazon_deforestation():
    """Example: Amazon rainforest analysis"""
    amazon_region = ee.Geometry.Rectangle([-74, -15, -44, 5])  # Amazon basin
    return complete_deforestation_analysis(
        study_area=amazon_region,
        start_year=2010,
        end_year=2023,
        output_prefix="amazon_deforestation_2020_2023",
        upscale = 250, 
        ndvi_threshold = -0.15
        
    )

def analyze_congo_deforestation():
    """Example: Congo Basin analysis"""
    congo_region = ee.Geometry.Rectangle([11, -5, 30, 7])  # Congo basin
    return complete_deforestation_analysis(
        study_area=congo_region,
        start_year=2010,
        end_year=2023,
        output_prefix="congo_deforestation_2020_2023"
    )

def analyze_indonesia_deforestation():
    """Example: Indonesian forests analysis"""
    indonesia_region = ee.Geometry.Rectangle([95, -11, 141, 6])  # Indonesia
    return complete_deforestation_analysis(
        study_area=indonesia_region,
        start_year=2010,
        end_year=2023,
        output_prefix="indonesia_deforestation_2020_2023"
    )

In [101]:
analyze_amazon_deforestation()

Starting Complete Deforestation Analysis Workflow
Step 1: Running deforestation analysis...
Creating 2010 composite...
Creating 2023 composite...
Composites generated successfully!
Change detection completed with threshold: -0.15
Forest mask applied successfully!
info {'NDVI': 2306037.0368022914}
✓ Detected 23060.37 km² of deforestation
Step 2: Running validation against Hansen dataset...
info ourloss_stats {'NDVI': 2306037.036802292}
info tp_stats {'lossyear': 2268216.1167788906}
info fp_area {'NDVI': 37820.92002340072}
info fn_stats {'lossyear': 35692546.589055784}
✓ Validation complete - F1 Score: 0.113
Step 3: Identifying deforestation hotspots...
✓ Identified 1884 deforestation hotspots
Step 4: Generating comprehensive report...
info {'NDVI': 2858783.1172786797}
Step 5: Creating interactive visualization...
Step 6: Exporting results...
Export task started: amazon_deforestation_2020_2023_main
Task ID: G3MWF563BFAXM7SOUJFGJ237
Hotspots export task started: amazon_deforestation_2020_

{'analyzer': <__main__.DeforestationAnalyzer at 0x290f72b8e60>,
 'basic_stats': {'total_deforested_hectares': 2306037.0368022914,
  'total_deforested_km2': 23060.370368022915,
  'analysis_period': '2010-2023'},
 'validation_stats': {'our_loss': 2306037.036802292,
  'true_positives_ha': 2268216.1167788906,
  'false_positives_ha': 37820.92002340072,
  'false_negatives_ha': 35692546.589055784,
  'precision': 0.9835991706031548,
  'recall': 0.05975159493911485,
  'f1_score': 0.11265936857540551},
 'hotspot_report': {'study_area': 'Analysis Region',
  'total_hotspots': 1884,
  'total_hotspot_area_km2': 12187.278568920765,
  'largest_hotspot_km2': 391.18399273679614,
  'average_hotspot_km2': 6.468831512166011,
  'top_hotspots': [{'rank': 1,
    'area_km2': 391.18399273679614,
    'location_lon': -68.25363627104689,
    'location_lat': -15.188192772317844,
    'geometry_type': 'Polygon'},
   {'rank': 2,
    'area_km2': 205.2769848884119,
    'location_lon': -44.09463633196677,
    'location_l

In [None]:
analyze_congo_deforestation()

In [None]:
analyze_indonesia_deforestation()