## **Landscape Data Preparation**

### **Import Dependencies**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ee
import geemap
from tqdm.auto import tqdm
import os

import warnings
warnings.filterwarnings('ignore')

# create necessary folders
if os.path.exists('datasets'):
    print("'datasets' folder is already existed.")
else:
    os.makedirs('datasets')
    print("'datasets' folder has been successfully created.")

'datasets' folder is already existed.


### **Initialize a Map Object**

In [59]:
ee.Authenticate()
ee.Initialize(opt_url="https://earthengine-highvolume.googleapis.com")

Map = geemap.Map(basemap="Esri.WorldImagery")
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childrâ€¦

### **Import World Administrative Boundary Layer**

In [60]:
# Import World Administrative Layer
world = ee.FeatureCollection('projects/ee-geonextgis/assets/world_administrative_boundaries')

# Filter the countries belonging to Europe
europe = world.filter(ee.Filter.eq('continent', 'Europe'))
europe_country_names = europe.aggregate_array('name').getInfo()

# Define the countries to remove
countries_to_remove = {'Russian Federation', 'Belarus', 'Ukraine', 'Moldova, Republic of', 'Svalbard and Jan Mayen Islands'}
europe_country_names = list(set(europe_country_names).difference(countries_to_remove))
europe_country_names.append('Turkey')
europe = world.filter(ee.Filter.inList('name', europe_country_names))

style = {'fillColor': '00000000', 'color': 'red', 'width': 1}
Map.addLayer(europe.style(**style), {}, 'Europe')

### **Calculate the Area and Number of Samples per Country**

In [61]:
# Calculate the area for each country
area = europe.map(lambda f: ee.Feature(None, {'country': f.get('name'), 'area': f.area()}))
area = geemap.ee_to_df(area)
area['area'] = (area['area'] // 1000000).astype('int')
area = area[['country', 'area']]

# Calculate the area proportion
area['area_proportion'] = ((area['area'] / area['area'].sum())).round(4)

# Calculate the number of samples per country
total_n_samples = 1e5
area['n_of_smaples'] = (area['area_proportion'] * total_n_samples).astype('int')

# Drop the columns where number of samples is 0
area = area[area['n_of_smaples']>0]
area.sort_values(by='n_of_smaples', ascending=False, inplace=True)

print(area.shape)
area.head()

(41, 4)


Unnamed: 0,country,area,area_proportion,n_of_smaples
0,Turkey,779333,0.1345,13450
32,France,546646,0.0943,9430
30,Spain,505656,0.0873,8730
17,Sweden,443345,0.0765,7650
38,Germany,355924,0.0614,6140


### **Import the CORINE LULC Product for 2018**

In [62]:
# Import the CORINE LULC 2018
corine_2018 = ee.Image('COPERNICUS/CORINE/V20/100m/2018')\
                .select('landcover')
Map.setCenter(16.436, 39.825, 6);
Map.addLayer(corine_2018, {}, 'CORINE Land Cover')

In [63]:
# Extract all the class values from the image
class_values = corine_2018.get('landcover_class_values').getInfo()
class_palette = corine_2018.get('landcover_class_palette').getInfo()

print('Number of classes in the CORINE LULC:', len(class_values))

Number of classes in the CORINE LULC: 44


### **Generate Equal Number of Samples from Each Class**

In [157]:
# Function to generate sample points
def generateSamplePoints(image, country_name, band_name, scale=100, seed=42, save=False, output_folder=None):

    country_geom = europe.filter(ee.Filter.eq('name', country_name)).first().geometry()
    country_n_samples = area[area['country']==country_name].values[0][-1]

    country_lulc = image.clip(country_geom)

    # Calculate the frequency histogram
    freq_hist = country_lulc.reduceRegion(
        reducer=ee.Reducer.frequencyHistogram(), 
        geometry=country_geom,
        scale=100, 
        bestEffort=True,
        maxPixels=1e10,
        tileScale=8
    )

    # Extract the class values
    country_class_values = [int(i) for i in freq_hist.getInfo()[band_name].keys()]

    # Calculate the sample per class
    sample_per_calss = country_n_samples // len(country_class_values)

    # Generate samples for each class
    lulc_samples = country_lulc.stratifiedSample(
        numPoints=sample_per_calss, 
        classBand=band_name,
        region=country_geom, 
        scale=scale, 
        seed=seed, 
        classValues=country_class_values, 
        classPoints=[sample_per_calss for i in range(len(country_class_values))], 
        dropNulls=True, 
        tileScale=8, 
        geometries=True
    )

    lulc_samples_gdf = geemap.ee_to_gdf(lulc_samples)

    if save!=False:
        if os.path.exists(output_folder)==False:
            os.makedirs(output_folder)

        file_name = f'{country_name}_samples.shp'
        out_file_path = os.path.join(output_folder, file_name)
        lulc_samples_gdf.to_file(out_file_path, driver='ESRI Shapefile')
        print(f'{file_name} saved successfully at {out_file_path}.')

    return lulc_samples_gdf

In [159]:
# Apply the function over all the images
germ_samples = generateSamplePoints(
    image=corine_2018,
    country_name='Germany', 
    band_name='landcover',
    scale=100, 
    seed=42, 
    save=True, 
    output_folder='datasets\\shapefiles')

Germany_samples.shp saved successfully at datasets\shapefiles\Germany_samples.shp.
