# From image-level to pixel-level labeling: A weakly-supervised learning method for identifying aquaculture ponds using iterative anti-adversarial attacks guided by aquaculture features

Author: Boyi Li, Adu Gong*, Jiaming Zhang, Zexin Fu

Update: 2024/07/14

This is the code used in the dataset preparation stage in the paper, and it can be run on Google Colab after editing the code.

To run this code, a Google Colab account and a Google Earth Engine (GEE) account are required.

## Input

points_aqua: The file path of aquaculture sample points (FeatureCollection format) in your GEE asset.

points_water: The file path of non-aquaculture sample points (FeatureCollection format) in your GEE asset.

## Output

out_dir_aqua: A folder to store the output aquaculture imagery.

out_dir_water: A folder to store the output non-aquaculture imagery.

#Setting

In [None]:
!pip install geemap
!pip install geedim
import geemap
import sys
import os

import ee
from ee.batch import Export, Task
ee.Authenticate()
# todo: input your project name
ee.Initialize(project='ee-XXX',opt_url='https://earthengine-highvolume.googleapis.com')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# todo: input your working directory
workdir='/content/drive/MyDrive/Colab Notebooks/WSLM_AQ'
packagesdir='/content/drive/MyDrive/Colab Notebooks/WSLM_AQ/packages/packages_gee'
os.chdir(workdir)
sys.path.insert(0,packagesdir)

from basic_vector import *
from basic_raster import *
from basic import *
from s2 import *

In [None]:
# todo: input your GEE asset
asset='users/XXX/assetdir'
# todo: Input the GEE asset file path for the study area file (FeatureCollection format)
study_area_path="users/XXX/assetdir/gba_v"
# todo: Input the GEE asset file path for the ground truth image (aquaculture pond: label = 1; other labels = 0).
# Please note that the labels are for verification purposes only and are not used in model training.
aqua_path="users/XXX/assetdir/aqua"
# todo: Input the GEE asset file path for aquaculture sample points (FeatureCollection format)
points_aqua='uusers/XXX/assetdir/points_aqua'
# todo: Input the GEE asset file path for non-aquaculture sample points (FeatureCollection format)
points_water='users/XXX/assetdir/points_water'

#todo: The output directory for aquaculture imagery
out_dir_aqua=workdir+'/inputs/dataset_orig/1'
#todo: The output directory for non-aquaculture imagery
out_dir_water=workdir+'/inputs/dataset_orig/0'
if os.path.isdir(out_dir_aqua)==False:
  os.makedirs(out_dir_aqua)
if os.path.isdir(out_dir_water)==False:
  os.makedirs(out_dir_water)

#todo: Input your own image acquisition time phase
start='2022-01-01'
end='2023-01-01'

#todo: Revise the following parameters according to your sitation.
proj_text='PROJCS["Asia_North_Albers_Equal_Area_Conic_China",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Albers"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",105.0],PARAMETER["Standard_Parallel_1",25.0],PARAMETER["Standard_Parallel_2",47.0],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]'
size_meters=2560
scale=10
size_pixels=int(size_meters/scale)

In [None]:
proj=ee.Projection(proj_text)
study_area=ee.FeatureCollection(study_area_path)
area_feature=study_area.union().first()
area_geometry=area_feature.geometry()

Map = geemap.Map()
Map.add_basemap('SATELLITE')
Map.addLayer(area_geometry,{},'area_geometry')
Map.centerObject(area_geometry, 14)
Map

#Imagery generation

In [None]:
ic_s2=s2_rc_qa(start,end,50)
s2img=ic_s2.median()#.clip(area_geometry)
bands=['B2','B3','B4','B8']
s2_img=s2img.select(bands)

aqua_mask=aqua.mask().rename('label')
img=img.addBands(aqua_mask,aqua_mask.bandNames())

ndwi_collection=nwi_col(ic_s2,7,2)
ndwi_cf=get_cf(ndwi_collection,0,'ndwi_cf')
label=ee.Image(aqua_path).rename('label')

img=s2_img.addBands(ndwi_cf).addBands(label)

## Download imagery

In [None]:
!pip install retry
import logging
import multiprocessing
import os
import requests
import shutil
from retry import retry

In [None]:
@retry(tries=10, delay=1, backoff=2)
def getResult(index, point):
  point = ee.Geometry.Point(point['coordinates'])
  region = point.buffer(params['buffer']).bounds()

  if params['format'] in ['png', 'jpg']:
      url = img.getThumbURL(
          {
              'region': region,
              'dimensions': params['dimensions'],
              'format': params['format'],
          }
      )
  else:
      url = img.getDownloadURL(
          {
              'region': region,
              'dimensions': params['dimensions'],
              'format': params['format'],
          }
      )

  if params['format'] == "GEO_TIFF":
      ext = 'tif'
  else:
      ext = params['format']

  r = requests.get(url, stream=True)
  if r.status_code != 200:
      r.raise_for_status()

  out_dir = os.path.abspath(params['out_dir'])
  basename = str(index).zfill(len(str(params['count'])))
  filename = f"{out_dir}/{params['prefix']}{basename}.{ext}"
  with open(filename, 'wb') as out_file:
      shutil.copyfileobj(r.raw, out_file)
  print("Done: ", basename)

In [None]:
# generating aquaculture imagery

%%time

params = {
    # 'count': num_aqua,  # How many image chips to export
    'count': 100,  # How many image chips to export
    'start_count':10,
    'buffer': size_meters/2,  # The buffer distance (m) around each point
    'scale': scale,  # The scale to do stratified sampling
    'seed': 1,  # A randomization seed to use for subsampling.
    'dimensions': str(size_pixels)+('x')+(str(size_pixels)),  # '256x256'The dimension of each image chip
    'format': "GEO_TIFF",  # The output image format, can be png, jpg, ZIPPED_GEO_TIFF, GEO_TIFF, NPY
    'prefix': 'aqua_',  # The filename prefix
    'processes': 25,  # How many processes to used for parallel processing
    'out_dir': out_dir_aqua,  # The output directory. Default to the current working directly
}

logging.basicConfig()
points=ee.FeatureCollection(points_aqua)
items=points.aggregate_array('.geo').getInfo()
pool = multiprocessing.Pool(params['processes'])
pool.starmap(getResult, enumerate(items))

pool.close()

In [None]:
# generating non-aquaculture imagery

%%time

params = {
    # 'count': num_aqua,  # How many image chips to export
    'count': 100,  # How many image chips to export
    'start_count':10,
    'buffer': size_meters/2,  # The buffer distance (m) around each point
    'scale': scale,  # The scale to do stratified sampling
    'seed': 1,  # A randomization seed to use for subsampling.
    'dimensions': str(size_pixels)+('x')+(str(size_pixels)),  # '256x256'The dimension of each image chip
    'format': "GEO_TIFF",  # The output image format, can be png, jpg, ZIPPED_GEO_TIFF, GEO_TIFF, NPY
    'prefix': 'water_',  # The filename prefix
    'processes': 25,  # How many processes to used for parallel processing
    'out_dir': out_dir_water,  # The output directory. Default to the current working directly
}

logging.basicConfig()
points=ee.FeatureCollection(points_water)
items=points.aggregate_array('.geo').getInfo()
pool = multiprocessing.Pool(params['processes'])
pool.starmap(getResult, enumerate(items))

pool.close()