# Approach 1 : LULC using hand annotated data & L8 

Data is labelled using hand annotated polygons. 

Author: Morgane Magnier (morgane.magnier@vattenfall.com)

Copyright © 2024 Magnier Morgane 

This notebook is part of a thesis project. The copyright of the thesis itself belongs to the student Morgane Magnier.  

**Rights and Intellectual Property**:  
- Vattenfall has the right to use the findings, methods, and conclusions of this thesis in its operations.  
- Any material generated within the framework of this thesis that is subject to intellectual property protection (e.g., source code, computer program, design, or invention) belongs to Vattenfall, unless otherwise agreed in writing.  

Permission is granted to view, copy, and share this notebook for **educational or personal purposes only**, provided that this notice is included in all copies.  

---

In [1]:
import ee, eemont, geemap

ee.Authenticate()
ee.Initialize()

import sys
sys.path.append('../preprocessing/clouds')
import landsat_preprocessing
sys.path.append('../wetlands_detection')
import wetlands_unsupervised_clustering

# Get collection

In [3]:
roi = ee.Geometry.Polygon([[[17.204933,60.402663],[17.204933,60.455525],[17.2645,60.455525],[17.2645,60.402663],[17.204933,60.402663]]])
indices = ['NDVI', 'MNDWI', 'NDMI', 'EVI','SAVI', 'ARVI']
thresh = 0.6

l8 = landsat_preprocessing.get_l8_cloud_free_col(roi, indices, thresh)
l7 = landsat_preprocessing.get_l7_cloud_free_col(roi, indices, thresh)
l9 = landsat_preprocessing.get_l9_cloud_free_col(roi, indices, thresh)

In [5]:
def create_annual_composites_may_to_september(collection):
    def add_year_month(image):
        date = ee.Date(image.get('system:time_start'))
        year = date.get('year')
        month = date.get('month')
        return image.set('year', year).set('month', month)
    
    # Ajouter les propriétés 'year' et 'month' à chaque image
    collection = collection.map(add_year_month)
    
    # Obtenir la liste des années uniques
    years = ee.List(collection.aggregate_array('year')).distinct().sort()
    
    # Fonction pour créer les composites annuels en utilisant uniquement les mois de mai à septembre
    def composite_year(year):
        year = ee.Number(year)
        filtered = collection.filter(ee.Filter.calendarRange(year, year, 'year'))\
                             .filter(ee.Filter.calendarRange(5, 9, 'month'))
        return filtered.median().set('year', year).set('system:time_start', ee.Date.fromYMD(year, 5, 1))

    # Créer les composites pour chaque année
    composites = years.map(lambda year: composite_year(year))

    # Retourner la collection de composites
    return ee.ImageCollection(composites)

def filter_non_empty_bands(collection):
    def has_bands(image):
        # Vérifier si l'image a des bandes en comptant les noms des bandes
        band_count = image.bandNames().size()
        return image.set('band_count', band_count)
    
    # Ajouter la propriété 'has_bands' à chaque image
    collection_with_band_info = collection.map(has_bands)
    
    # Filtrer les images qui contiennent au moins une bande
    filtered_collection = collection_with_band_info.filter(ee.Filter.gt('band_count', 0))
    
    return filtered_collection


l8 = create_annual_composites_may_to_september(l8)
l7 = create_annual_composites_may_to_september(l7)
l8 = create_annual_composites_may_to_september(l9)

l8 = filter_non_empty_bands(l8)
l7 = filter_non_empty_bands(l7)
l9 = filter_non_empty_bands(l9)
display(l8)

# Preprocessing

In [6]:
def add_elev_slope(image): 

    jax_dsm = ee.ImageCollection('JAXA/ALOS/AW3D30/V3_2')
    jax_elevation = jax_dsm.select('DSM')

    proj = jax_elevation.first().select(0).projection()
    slopeReprojected =  (jax_elevation.mosaic() \
                                .setDefaultProjection(proj)).resample('bicubic')

    # Reduce the collection with a median reducer.
    elevation = slopeReprojected.reduce(ee.Reducer.mean()).rename('elev')

    slope = ee.Terrain.slope(elevation).rename('slope')

    image = image.addBands(elevation).addBands(slope)

    return image

def normalize(image, roi):
    bandNames = image.bandNames()
  # Compute min and max of the image
    minDict = image.reduceRegion(
        reducer = ee.Reducer.min(),
        geometry = roi,
        scale = 10,
        maxPixels=1e9,
        bestEffort=True,
        tileScale= 16)
    
    maxDict = image.reduceRegion(
        reducer= ee.Reducer.max(),
        geometry= roi,
        scale= 10,
        maxPixels= 1e9,
        bestEffort= True,
        tileScale= 16)
    
    mins = ee.Image.constant(minDict.values(bandNames))
    maxs = ee.Image.constant(maxDict.values(bandNames))

    normalized = image.subtract(mins).divide(maxs.subtract(mins))
    return normalized

# Labelling

In [7]:
svamangar = ee.Geometry.MultiPolygon([
    [
        [17.220554, 60.431477],
        [17.222099, 60.429359],
        [17.223301, 60.427664],
        [17.221069, 60.427241],
        [17.220554, 60.431477]
    ],
    [
        [17.213345, 60.427664],
        [17.209568, 60.426055],
        [17.213345, 60.424614],
        [17.213345, 60.427664]
    ]
])

trees = ee.Geometry.MultiPolygon([[[17.231197, 60.415039],
      [17.238579, 60.417073],
      [17.239265, 60.415378],
      [17.238579, 60.412242],
      [17.237892, 60.410716],
      [17.229481, 60.410716],
      [17.231197, 60.415039]],
      [[17.205791, 60.413259],
      [17.215233, 60.413852],
      [17.215233, 60.410547],
      [17.208881, 60.410377],
      [17.205791, 60.413259]],
      [[17.231026, 60.434949],
      [17.222443, 60.433255],
      [17.226563, 60.429359],
      [17.231026, 60.434949]]])

water = ee.Geometry.MultiPolygon([[[17.215748, 60.428935],
      [17.219181, 60.425462],
      [17.217464, 60.421734],
      [17.215233, 60.425631],
      [17.215748, 60.428935]],[[17.25214, 60.450192],
      [17.256775, 60.447229],
      [17.24905, 60.444858],
      [17.246304, 60.446636],
      [17.25214, 60.450192]]])

built = ee.Geometry.MultiPolygon([[[17.226262, 60.449176],
      [17.226605, 60.448308],
      [17.22609, 60.447419],
      [17.227163, 60.446361],
      [17.226691, 60.44598],
      [17.225833, 60.446615],
      [17.22609, 60.447737],
      [17.225318, 60.448202],
      [17.226262, 60.449176]],[[17.228837, 60.441682],
      [17.230253, 60.441259],
      [17.23021, 60.441809],
      [17.228837, 60.441682]],[[17.22403, 60.440963],
      [17.224803, 60.440963],
      [17.224331, 60.440433],
      [17.22343, 60.44037],
      [17.22403, 60.440963]]])

bare =  ee.Geometry.Polygon([[[17.21755, 60.450954],
      [17.220383, 60.450954],
      [17.220469, 60.449557],
      [17.224159, 60.448837],
      [17.220984, 60.447229],
      [17.218752, 60.448626],
      [17.215919, 60.448753],
      [17.217808, 60.449557],
      [17.21755, 60.450954]]])

# Create feature collection for training

In [9]:
polygons = ee.FeatureCollection([
  ee.Feature(svamangar, {'class': 0}),
  ee.Feature(water, {'class': 1}),
  ee.Feature(trees, {'class': 2}),
  ee.Feature(built, {'class': 3}),
  ee.Feature(crops, {'class': 4})
])

In [25]:
img = l8.median()

palette = ['#91cf60', '#4575b4', '#1a9850','#d73027','#fee08b']

def apply_scale_factors(image):
  optical_bands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
  thermal_bands = image.select('ST_B.*').multiply(0.00341802).add(149.0)
  return image.addBands(optical_bands, None, True).addBands(
      thermal_bands, None, True
  )

def add_color(feature):
    class_value = feature.get('class')
    color = ee.List(palette).get(class_value)
    return feature.set({'style': {'color': color, 'width': 2}})

styled_polygons = polygons.map(add_color)

polygons_image = styled_polygons.style(**{
    'styleProperty': 'style',
    'neighborhood': 8,
})

m = geemap.Map()
m.centerObject(roi, 13)
m.addLayer(apply_scale_factors(img).clip(roi), {'bands': ['SR_B4', 'SR_B3', 'SR_B2'], 'min': 0, 'max': 0.1}, 'Sentinel-2 Image')
m.addLayer(polygons_image, {}, 'Polygons')

# Ajouter une légende à la carte
legend_dict = {
    'Northern Alluvial Meadows': '#91cf60',
    'Water': '#4575b4',
    'Trees': '#1a9850',
    'Built': '#d73027',
    'Crops': '#fee08b'
}

m.add_legend(title="Legend", legend_dict=legend_dict)

# Afficher la carte
m

Map(center=[60.42909015971474, 17.23471649999347], controls=(WidgetControl(options=['position', 'transparent_b…

In [49]:
def Random_Forest(composite, polygons): 

        # Get the values for all pixels in each polygon in the training.
    training_data = composite.sampleRegions(**{
    # Get the sample from the polygons FeatureCollection.
    'collection': polygons,
    # Keep this list of properties from the polygons.
    'properties': ['class'],
    # Set the scale to get Landsat pixels in the polygons.
    'scale': 10
    })

    # Training & validation splitting
    sample = training_data.randomColumn()
    train_sample = sample.filter('random <= 0.8')
    val_sample = sample.filter('random > 0.8')

    clsfr_rf = ee.Classifier.smileRandomForest(numberOfTrees = 500, variablesPerSplit = 2, minLeafPopulation = 1, bagFraction = 0.5, maxNodes = 7, seed = 11)

    trained_clsfr_rf = clsfr_rf.train(
        features=train_sample,
        classProperty='class',
        inputProperties=composite.bandNames(),)

    # Get information about the trained classifier.
    #display('Results of trained classifier RF', trained_clsfr_rf.explain())
 
    # Get a confusion matrix and overall accuracy for the training sample.
    train_accuracy_rf = trained_clsfr_rf.confusionMatrix()
    # Get a confusion matrix and overall accuracy for the validation sample.
    val_sample_rf = val_sample.classify(trained_clsfr_rf)

    #val_accuracy_rf = val_sample_rf.errorMatrix('class', 'classification')
    val_accuracy_rf = val_sample_rf.errorMatrix('class', 'classification')
    
    img_classified = composite.classify(trained_clsfr_rf) 

    return img_classified

In [60]:
def Random_Forest(composites, polygons): 

    trained_clsfr_rf = ee.Classifier.smileRandomForest(numberOfTrees = 500, variablesPerSplit = 2, minLeafPopulation = 1, bagFraction = 0.5, maxNodes = 7, seed = 11)
    val_samples = ee.FeatureCollection([])

    for composite in composites :
        
        composite_img = ee.Image(composite)
            # Get the values for all pixels in each polygon in the training.
        training_data = composite_img.sampleRegions(**{
        # Get the sample from the polygons FeatureCollection.
        'collection': polygons,
        # Keep this list of properties from the polygons.
        'properties': ['class'],
        # Set the scale to get Landsat pixels in the polygons.
        'scale': 30
        })

        # Training & validation splitting
        sample = training_data.randomColumn()
        train_sample = sample.filter('random <= 0.8')
        val_sample = sample.filter('random > 0.8')

        val_samples = val_samples.merge(val_sample)

        trained_clsfr_rf = trained_clsfr_rf.train(
            features=train_sample,
            classProperty='class',
            inputProperties=composite_img.bandNames(),)

    # Get information about the trained classifier.
    #display('Results of trained classifier RF', trained_clsfr_rf.explain())
 
    # Get a confusion matrix and overall accuracy for the training sample.
    train_accuracy_rf = trained_clsfr_rf.confusionMatrix()
    # Get a confusion matrix and overall accuracy for the validation sample.
    val_sample_rf = val_samples.classify(trained_clsfr_rf)
    val_accuracy_rf = val_sample_rf.errorMatrix('class', 'classification')

    display(val_accuracy_rf)
    display(val_accuracy_rf.accuracy())
    

    return trained_clsfr_rf

In [10]:
def Random_Forest(composite, polygons): 

        # Get the values for all pixels in each polygon in the training.
    training_data = composite.sampleRegions(**{
    # Get the sample from the polygons FeatureCollection.
    'collection': polygons,
    # Keep this list of properties from the polygons.
    'properties': ['class'],
    # Set the scale to get Landsat pixels in the polygons.
    'scale': 10
    })

    # Training & validation splitting
    sample = training_data.randomColumn()
    train_sample = sample.filter('random <= 0.8')
    val_sample = sample.filter('random > 0.8')

    clsfr_rf = ee.Classifier.smileRandomForest(numberOfTrees = 500, variablesPerSplit = 2, minLeafPopulation = 1, bagFraction = 0.5, maxNodes = 7, seed = 11)

    trained_clsfr_rf = clsfr_rf.train(
        features=train_sample,
        classProperty='class',
        inputProperties=composite.bandNames(),)

    # Get information about the trained classifier.
    #display('Results of trained classifier RF', trained_clsfr_rf.explain())
 
    # Get a confusion matrix and overall accuracy for the training sample.
    train_accuracy_rf = trained_clsfr_rf.confusionMatrix()
    # Get a confusion matrix and overall accuracy for the validation sample.
    val_sample_rf = val_sample.classify(trained_clsfr_rf)

    #val_accuracy_rf = val_sample_rf.errorMatrix('class', 'classification')
    val_accuracy_rf = val_sample_rf.errorMatrix('class', 'classification')
    
    #img_classified = composite.classify(trained_clsfr_rf) 

    return ee.Feature(None, ee.Number(val_accuracy_rf.accuracy()))

In [11]:
validation_accuracy = l8.map(lambda image : Random_Forest(image,polygons))
display(validation_accuracy)

Feature, argument 'metadata': Invalid type.
Expected type: Dictionary<Object>.
Actual type: Float.
Actual value: 0.9959204487506375'. Falling back to string repr.
  warn(f"Getting info failed with: '{e}'. Falling back to string repr.")


In [1]:
display(l8, polygons)

NameError: name 'l8' is not defined

In [28]:
l8_classified = l8.map(lambda image : Random_Forest(image,polygons))
display(ee.ImageCollection(l8_classified))

In [38]:
img = l8_classified.first()

legend_dict = {
    'Northern Alluvial Meadows': '#91cf60',
    'Water': '#4575b4',
    'Trees': '#1a9850',
    'Built': '#d73027',
    'Crops': '#fee08b'
}

mb = geemap.Map()

left_layer = geemap.ee_tile_layer(img.select('classification').clip(roi), {'min': 0, 'max': 4, 'palette':  ['#91cf60', '#4575b4', '#1a9850','#d73027','#fee08b']}, 'RF classification')
right_layer = geemap.ee_tile_layer(apply_scale_factors(l8.first()).clip(roi), {'min': 0, 'max': 0.1, 'bands': ['SR_B4', 'SR_B3', 'SR_B2']}, 'RGB color')
mb.split_map(left_layer, right_layer)
mb.centerObject(roi, 14)
mb.add_legend(legend_title="Classification", legend_dict=legend_dict)
mb

Map(center=[60.42909015971474, 17.23471649999347], controls=(ZoomControl(options=['position', 'zoom_in_text', …

In [30]:
class_labels = [
    'svamangar', 'bare', 'water', 'trees', 'built'
]
def calculate_pixel_counts(image):
    pixel_count_stats = image.reduceRegion(
        reducer=ee.Reducer.frequencyHistogram().unweighted(),
        geometry=roi,
        scale=30,
        maxPixels=1e10
    )
    pixel_counts = ee.Dictionary(pixel_count_stats.get('classification'))
    return ee.Feature(None,pixel_counts)

# Appliquer la fonction à chaque image de la collection
#pixel_counts_fc = ee.FeatureCollection(l8Classified.map(calculate_pixel_counts))
pixel_counts_fc = l8_classified.map(calculate_pixel_counts)
display(pixel_counts_fc)

In [31]:
classes_surfaces_df_raw = ee.data.computeFeatures({
    'expression': pixel_counts_fc,
    'fileFormat': 'PANDAS_DATAFRAME'
})

In [32]:
classes_surfaces_df_raw.head()

classes_surfaces_df = classes_surfaces_df_raw

In [None]:
import pandas as pd
import plotly.express as px

# Création d'un dictionnaire inverse pour la légende
reverse_legend_dict = {
    '0': 'Northern Alluvial Meadows',
    '1': 'Water',
    '2': 'Trees',
    '3': 'Built',
    '4': 'Crops',
    'null': 'Unknown'
}

# Remplacement des noms des classes par les étiquettes
classes_surfaces_df.rename(columns=reverse_legend_dict, inplace=True)

# Remplacement des surfaces en pixels par des hectares (en supposant 1 pixel = 1 hectare pour simplifier)
classes_surfaces_df= classes_surfaces_df.apply(pd.to_numeric, errors='ignore') * 0.09

#classes_surfaces_df.drop(columns=['geo'], inplace=True)

# Ajout de la colonne 'year'
years = list(range(2023, 2023 - len(classes_surfaces_df), -1))
classes_surfaces_df['year'] = years

# Conversion de la dataframe pour une meilleure visualisation
df_melted = classes_surfaces_df.melt(id_vars=['year'], var_name='Class', value_name='Surface (hectares)')

# Création du graphique
fig = px.line(df_melted, x='year', y='Surface (hectares)', color='Class',
              title='Surface of different classes in Breforsen area derived from Landsat data yearly summer composite',
              labels={'year': 'Year', 'Surface (hectares)': 'Surface in Hectares'}, markers= True)

# Affichage du graphique
fig.show()