## Java is a little complicated as there are many more potential tea areas that are not captured by the RA polygons and it is more difficult to survey manually

### Instead we manually drew some polygons where we were certain there was no tea, an easier task than drawing polygons around all the tea plantations



In [5]:
import geopandas as gpd
java_negative = gpd.read_file("../java_negative_sampling.shp")
java_negative_union = gpd.GeoDataFrame(geometry=gpd.GeoSeries([java_negative.unary_union]))
java_negative_union.to_file('gs://demeter-labs/tea/geometries/java_manual_negative_aoi.geojson')

In [2]:
import ee
credentials = ee.ServiceAccountCredentials(
    '242968905260-compute@developer.gserviceaccount.com',
    '/home/christopher.x.ren/.config/earthengine/earthindex-7d2c9b94c507.json'
)
ee.Initialize(project='earthindex', credentials=credentials)
import geemap
import geopandas as gpd

pos_gdf = gpd.read_parquet("/home/christopher.x.ren/datasets/ra_tea/pos_gdf_v1_java_2024-11-08.parquet")

m = geemap.Map(center=[-0.7893, 113.9213], zoom=5)

# Add Google Maps hybrid layer
url = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}'
m.add_tile_layer(url, name='Google Hybrid', attribution='Google')

lulc_viz = {
    'names': ['Water', 'Trees', 'Flooded Vegetation', 'Crops', 'Built Area', 
              'Bare Ground', 'Snow/Ice', 'Clouds', 'Rangeland'],
    'colors': ['#1A5BAB', '#358221', '#87D19E', '#FFDB5C', '#ED022A',
               '#EDE9E4', '#F2FAFF', '#C8C8C8', '#800080']
}

def remap_lulc(image):
    return image.remap([1,2,4,5,7,8,9,10,11], [1,2,3,4,5,6,7,8,9])

esri_lulc = ee.ImageCollection("projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m_TS")
lulc2023 = esri_lulc.filterDate('2023-01-01', '2023-12-31').mosaic()
lulc2023_remapped = remap_lulc(lulc2023)
m.addLayer(lulc2023_remapped, {'min': 1, 'max': 9, 'palette': lulc_viz['colors']}, 'ESRI LULC 2023')
m.addLayer(geemap.gdf_to_ee(pos_gdf), {}, 'Positive Points')

# Add legend
legend_dict = dict(zip(lulc_viz['names'], lulc_viz['colors']))
m.add_legend(title="Land Use/Land Cover", legend_dict=legend_dict)

m

Map(center=[-0.7893, 113.9213], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchD…

In [7]:
import geopandas as gpd
import shapely
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/home/christopher.x.ren/.config/earthengine/earthindex-7d2c9b94c507.json'
java_geometry = gpd.GeoDataFrame(geometry=[shapely.geometry.shape(m.draw_last_feature.geometry().getInfo())], crs="EPSG:4326")
java_geometry.to_file('gs://demeter-labs/tea/geometries/java_manual_negative_aoi.geojson')


In [2]:
import shapely
ee_java_aoi = ee.Geometry(m.draw_last_feature.geometry())
num_samples_per_class = 100
lulc_samples = lulc2023_remapped.stratifiedSample(
    region=ee_java_aoi,
    scale=320,
    numPoints=0,
    classValues=[1, 2, 4, 5, 9], 
    classPoints=[200, 3000, 3000, 3000, 1000],
    seed=0,
    geometries=True
)

m.addLayer(lulc_samples, {}, 'LULC Sampled Points')

In [15]:
lulc_samples_gdf = geemap.ee_to_gdf(lulc_samples)


In [19]:
print(f"Original number of samples: {len(lulc_samples_gdf)}")

# Create buffer around positive points and remove samples within buffer
# First convert to a projected CRS (UTM) for accurate distances in meters
utm_zone = int(((pos_gdf.geometry.iloc[0].x + 180) / 6) + 1)
utm_crs = f'EPSG:326{utm_zone:02d}' # Northern hemisphere

# Project both dataframes to UTM
pos_gdf_utm = pos_gdf.to_crs(utm_crs)
lulc_samples_utm = lulc_samples_gdf.to_crs(utm_crs)

# Create buffer in UTM coordinates (meters)
pos_gdf_buffered = pos_gdf_utm.copy()
pos_gdf_buffered.geometry = pos_gdf_utm.geometry.buffer(320)
pos_union = pos_gdf_buffered.geometry.unary_union

# Remove samples that intersect with buffered positive points
lulc_samples_utm = lulc_samples_utm[~lulc_samples_utm.geometry.intersects(pos_union)]

# Convert back to EPSG:4326
lulc_samples_gdf_filtered = lulc_samples_utm.to_crs('EPSG:4326')

# Create mapping dictionary
class_mapping = {
    1: 'Water',
    2: 'Trees', 
    4: 'Crops',
    5: 'Crops',
    9: 'Rangeland'
}

# Add class column
lulc_samples_gdf_filtered['class'] = lulc_samples_gdf_filtered['remapped'].map(class_mapping)





Original number of samples: 10200


In [20]:
lulc_samples_gdf_filtered.to_parquet(
    '/home/christopher.x.ren/datasets/ra_tea/java_neg_water_built_tree_rangeland_samples_10090.parquet')

In [21]:
lulc_samples_gdf_filtered

Unnamed: 0,geometry,remapped,class
0,POINT (108.12985 -6.29971),1,Water
1,POINT (106.4942 -5.85702),1,Water
2,POINT (105.9624 -6.87463),1,Water
3,POINT (109.40905 -7.85774),1,Water
4,POINT (106.35334 -5.83689),1,Water
...,...,...,...
10194,POINT (105.34436 -6.66191),9,Rangeland
10195,POINT (109.32281 -7.00111),9,Rangeland
10197,POINT (110.23981 -6.99824),9,Rangeland
10198,POINT (107.42845 -6.45206),9,Rangeland
