In [1]:
import ee
import geemap

# Run as service account
service_account = 'ee-account@airy-galaxy-398310.iam.gserviceaccount.com'
credentials = ee.ServiceAccountCredentials(service_account, '../.private-key.json')

ee.Initialize(credentials)

import json
import os
import pandas as pd
import geopandas as gpd

#----------------> We initialize some constants for standard use
Map = geemap.Map(center=[43.7196, 10.4250], zoom=5)

countries = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")
Italy = countries.filter(ee.Filter.eq('country_na', 'Italy'))

In [2]:
lucas = ee.FeatureCollection('JRC/LUCAS_HARMO/THLOC/V1') \
  .filterBounds(Italy.geometry()) \
  .select(['point_id', 'lu1', 'lu1_label'])
  
randomLucas = lucas.randomColumn('random')
lucas_fc = randomLucas.sort('random').limit(5000)

# df_filter = df[(df['var'] == 'lu1') | (df['var'] == 'lu1_type')]
# df_code = ee.List(df_filter['code'].tolist())
# Map.addLayer(lucas_fc, {}, 'Points')
# Map

In [3]:
label = 'landuse'

gdf = geemap.ee_to_geopandas(lucas_fc)

gdf[label] = gdf['lu1'].copy().apply(lambda x: x[:2] if len(x) >= 2 else '')

# print(sorted(gdf[label].unique()))
# from_list_3 = ['', 'U111', 'U112', 'U113', 'U120', 'U130', 'U140', 'U150', 'U210', 'U221', 'U222', 'U223', 'U224', 'U225', 'U226', 'U227', 'U228', 'U311', 'U312', 'U313', 'U314', 'U315', 'U316', 'U317', 'U318', 'U319', 'U321', 'U322', 'U330', 'U341', 'U342', 'U350', 'U361', 'U362', 'U370', 'U411', 'U412', 'U413', 'U414', 'U415', 'U420']
# from_list_2 = ['', 'U11', 'U12', 'U13', 'U14', 'U15', 'U21', 'U22', 'U31', 'U32', 'U33', 'U34', 'U35', 'U36', 'U37', 'U41', 'U42']
from_list = ['', 'U1', 'U2', 'U3', 'U4']

print(len(from_list))
to_list = [i for i in range(len(from_list))]

gdf[label] = gdf[label].replace(from_list, to_list)
gdf[label] = pd.to_numeric(gdf[label], errors='coerce')
gdf = gdf.dropna(subset=[label])

gdf.crs = 'WGS84' # Watch out for projection

points = geemap.geopandas_to_ee(gdf)

print(points.getInfo())

{'type': 'FeatureCollection', 'columns': {'landuse': 'Integer', 'lu1': 'String', 'lu1_label': 'String', 'point_id': 'Integer', 'random': 'Float', 'system:index': 'String'}, 'features': [{'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [15.191185205060457, 41.47095077275638]}, 'id': '0', 'properties': {'landuse': 1, 'lu1': 'U111', 'lu1_label': 'Agriculture (excluding fallow land and kitchen gardens)', 'point_id': 47562056, 'random': 1.1906835458930232e-05}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [9.262868322817855, 40.282667228528865]}, 'id': '1', 'properties': {'landuse': 1, 'lu1': 'U111', 'lu1_label': 'Agriculture (excluding fallow land and kitchen gardens)', 'point_id': 42581910, 'random': 1.9336872961073226e-05}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [9.610901160039305, 44.51213795502543]}, 'id': '2', 'properties': {'landuse': 2, 'lu1': 'U120', 'lu1_label': 'Forestry', 'point_id': 42902378, 'random': 2.7134666523

In [4]:
bands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']

# Reference: https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C02_T1#bands
landsat8 = ee.ImageCollection('LANDSAT/LC08/C02/T1')

image = ee.Algorithms.Landsat.simpleComposite(**{
    'collection': landsat8\
        .filterBounds(Italy)\
        .filterDate('2018-01-01', '2018-12-31'),
    'asFloat': True
})

In [6]:
training = image.select(bands).sampleRegions(
    collection=ee.FeatureCollection(points),
    properties=[label],
    scale=30
    )

task = ee.batch.Export.table.toDrive(
    collection=training,
    description='1sample_3-3.csv',
    folder='output',
    fileFormat='CSV',
)

task.start()

import time

while task.active():
  print(task.status())
  time.sleep(10)