# Setup


In [None]:
import ee
import urllib.request
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import PIL
from PIL import Image, ImageDraw
from osgeo import gdal
from osgeo import osr
import numpy as np
import os, sys
import random
import shutil
import pandas as pd

ee.Authenticate()
ee.Initialize()

# Sentinel 2 CSV List Export


In [None]:
#Point to the location of your CSV with the locations you would like to export
#The default points to a file called "random_points" that you can drop on the right
locations = pd.read_csv('file_name.csv', sep='\t')
locations = locations.rename(columns={'x': 'X', 'y': 'Y', 'Unnamed: 0':'id'})
print(locations)


In [None]:
#Point to the location of your CSV with the locations you would like to export
#The default points to a file called "random_points" that you can drop on the right
a_large_mw_locations = pd.read_csv('a_and_large_mw.csv', sep=',')
a_large_mw_locations = a_large_mw_locations.columns.tolist()
a_large_mw_locations = [int(location) for location in a_large_mw_locations]
a_large_mw_locations

In [55]:
# export settings
only_export_large_locations = True
export_path = "a_large_sentinel_export"

In [None]:
# Variety of Configuations
vis_min = 0  #Visualization settings for the thumbnail
vis_max = 1024 #Visualization settings for the thumbnail
vis_bands = ['B4', 'B3', 'B2'] #Includes the bands for RGB
imageDimensions = '512x512' #Set thumbnail image size (can't be too big, or you run into problems)
nir_bands = ['B8'] #Includes the bands for nir
swirB11_bands = ['B11'] #Includes the bands for swir B11
swirB12_bands = ['B12'] #Includes the bands for swir B11
#0
startLocation = 4429
last_id=-1
for index, row in locations.iloc[0:].iterrows():
 #skip
  if int(row['id'])<startLocation or last_id==int(row['id']) or (only_export_large_locations and int(row['id']) not in a_large_mw_locations):
    continue
  last_id=int(row['id'])
  print(last_id)

  #Choose your location to request an image from
  longitude = row['X']
  latitude = row['Y']
  id = row['id']

  center = ee.Geometry.Point(longitude,latitude)
  # Import Sentinel dataset
  s2 = (ee.ImageCollection("COPERNICUS/S2_SR")
    .filterBounds(center)
    .sort('CLOUDY_PIXEL_PERCENTAGE')
    .filterDate('2018-01-01', '2018-12-30')
    .first()
  )
  # Working, but with getInfo(), which makes the code a bit slower
  try:
    global sentinel_footprint
    sentinel_footprint = (ee.Geometry.Polygon((s2.getInfo().get('properties').get('system:footprint').get('coordinates'))))
  except:
    print("No Footprint found")
    break
    continue
  footprint = (ee.Geometry.Polygon((s2.getInfo().get('properties').get('system:footprint').get('coordinates'))))

  sentinel_centroid = center
  # Create a rectangular export area
  exportAreaSmall = (sentinel_centroid.buffer(2530).bounds());

  if sentinel_footprint.contains(exportAreaSmall,1).getInfo() == True:
    exportArea = exportAreaSmall
  else:
    print("No fit")
    continue

# save rgb
  s2Vis = {
      'region': exportArea,
      'crs': (s2.select('B4').projection()),
      'dimensions': imageDimensions,
      'format': 'jpg',
      'min': vis_min,
      'max': vis_max,
      'bands': vis_bands
  }

  s2_url = (s2.getThumbURL(s2Vis))

  #Change the location where the images are saved by replacing "content" with the location in your Google Drive
  s2_name = "{}/preview/S2_{}_{}_{}.jpg".format(export_path,id,longitude,latitude)
  # save
  urllib.request.urlretrieve(s2_url, s2_name)

  # find min and max of df
  x_min = min(pd.DataFrame(exportArea.getInfo().get('coordinates')[0], columns = ['x', 'y'])['x'])
  y_min = min(pd.DataFrame(exportArea.getInfo().get('coordinates')[0], columns = ['x', 'y'])['y'])
  x_max = max(pd.DataFrame(exportArea.getInfo().get('coordinates')[0], columns = ['x', 'y'])['x'])
  y_max = max(pd.DataFrame(exportArea.getInfo().get('coordinates')[0], columns = ['x', 'y'])['y'])

  # generate a tif file with all bands!
  s2_tif_name = "S2_tif_{}_{}_{}_merged".format(id,longitude,latitude)

  _region = ee.Geometry.BBox(x_min, y_min, x_max, y_max);
  task = ee.batch.Export.image.toDrive(
      image = s2.select('B2','B3','B4','B8','B11','B12'),
      description=s2_tif_name,
      folder='large_tif_files',
      region=_region,
      dimensions=imageDimensions,
      crs=(s2.select('B4').projection())
  )
  task.start()

  # generate mask
  _locations = locations.loc[(locations['id']==last_id)]

  # Define the dimensions of the image
  width = 512
  height = 512

  # Create a new black image
  img = Image.new("RGB", (width, height), "black")
  # draw main one
  draw = ImageDraw.Draw(img)
  # (0 2024) (2024 2024)
  # (0 0) (2024 0)

  one_x_px_in_degree = (x_max-x_min)/width
  one_y_px_in_degree = (y_max-y_min)/height

  # Define the coordinates of the polygon
  polygon_coords = list(_locations[["X","Y"]].to_records(index=False))
  export_coords = []
  for coord in polygon_coords:
    x,y = coord
    x = (x-x_min)/one_x_px_in_degree
    y = (y-y_min)/one_y_px_in_degree
    export_coords.append((x,y))
    if len(export_coords)>2:
      draw.polygon(export_coords, fill="white")

  # calculate how much lookback and lookforward is possible
  lookback = 700
  if last_id<lookback:
    lookback=0
    
  lookforward = 700

  # check before and after to see if there are more solar panels in the picture!
  for near_locations in range(last_id-lookback,last_id):
    export_coords = []
    polygon_coords = list(locations.loc[(locations['id']==near_locations)][["X","Y"]].to_records(index=False))
    for coord in polygon_coords:
      x,y = coord
      if x > x_min and x < x_max and y > y_min and y < y_max:
        x = (x-x_min)/one_x_px_in_degree
        y = (y-y_min)/one_y_px_in_degree
        export_coords.append((x,y))
    if len(export_coords)>3:
        draw.polygon(export_coords, fill="white")
  for near_locations in range(last_id+1,last_id+lookforward):
    export_coords = []
    polygon_coords = list(locations.loc[(locations['id']==near_locations)][["X","Y"]].to_records(index=False))
    for coord in polygon_coords:
      x,y = coord
      if x > x_min and x < x_max and y > y_min and y < y_max:
        x = (x-x_min)/one_x_px_in_degree
        y = (y-y_min)/one_y_px_in_degree
        export_coords.append((x,y))
    if len(export_coords)>3:
        draw.polygon(export_coords, fill="white")
  # Draw the white polygon on the black image
  # Save the image as a JPEG
  #flip because of wrong coordinate system
  plt.axis('off')
  plt.imshow(img, origin="lower", extent=(0, width, 0, height))
  plt.savefig('{}/masks/S2_{}_{}_{}.jpg'.format(export_path,id,longitude,latitude),dpi=500, bbox_inches='tight', transparent=True, pad_inches=0)
  # since we only created mask preview files so far, lets also create the matching geotif files!
  # resize mask to match 512 dimensions of tif image!

  im = Image.open('{}/masks/S2_{}_{}_{}.jpg'.format(export_path,id,longitude,latitude))
  maxsize = (width, height)
  im.thumbnail(maxsize, Image.Resampling.LANCZOS)
  im.save('{}/masks/S2_{}_{}_{}.jpg'.format(export_path,id,longitude,latitude))
  # since mask is based on different res than 512 (which is the output), so we need to rescale!
  arr = np.array(im)
  #  Choose some Geographic Transform
  lat = [x_min,x_max]
  lon = [y_min,y_max]

  # set geotransform
  nx = 512
  ny = 512
  xmin, ymin, xmax, ymax = [min(lon), min(lat), max(lon), max(lat)]
  xres = (xmax - xmin) / float(nx)
  yres = (ymax - ymin) / float(ny)
  geotransform = (xmin, xres, 0, ymax, 0, -yres)

  mask_pixels = np.zeros((nx,ny), dtype=np.uint8)

  for x in range(0,nx):
      for y in range(0,ny):
        if arr[y][x][0]!= 0:
         mask_pixels[y,x] = 1
        else:
          mask_pixels[y,x] = 0

  # create the raster file
  dst_ds = gdal.GetDriverByName('GTiff').Create('{}/tif_masks/S2_{}_{}_{}.tif'.format(export_path,id,longitude,latitude), ny, nx, 1, gdal.GDT_Byte)

  dst_ds.SetGeoTransform(geotransform)    # specify coords
  srs = osr.SpatialReference()            # establish encoding
  srs.ImportFromEPSG(32632)                # WGS84 lat/long
  dst_ds.SetProjection(srs.ExportToWkt()) # export coords to file
  dst_ds.GetRasterBand(1).WriteArray(mask_pixels)   # write r-band to the raster
  dst_ds.FlushCache()                     # write to disk
  dst_ds = None

In [None]:
# check epsg nrw
s2.select('B4').projection().getInfo()["crs"]

In [None]:
# This is for updating the mask to different values!
vis_min = 0  #Visualization settings for the thumbnail
vis_max = 1024 #Visualization settings for the thumbnail
vis_bands = ['B4', 'B3', 'B2'] #Includes the bands for RGB
imageDimensions = '512x512' #Set thumbnail image size (can't be too big, or you run into problems)
nir_bands = ['B8'] #Includes the bands for nir
swirB11_bands = ['B11'] #Includes the bands for swir B11
swirB12_bands = ['B12'] #Includes the bands for swir B11

startLocation = 0
last_id=-1
for index, row in locations.iloc[0:].iterrows():
 #skip
  if int(row['id'])<startLocation or last_id==int(row['id']):
    continue

  print(last_id)
  last_id=int(row['id'])
  #Choose your location to request an image from
  longitude = row['X']
  latitude = row['Y']
  id = row['id']

  file = '{}/tif_masks/S2_{}_{}_{}.tif'.format(export_path,id,longitude,latitude)
  ds = gdal.Open(file)
  if not ds:
    continue
  band = ds.GetRasterBand(1)
  arr = band.ReadAsArray()
  [rows, cols] = arr.shape
  arr_min = arr.min()
  arr_max = arr.max()
  arr_mean = int(arr.mean())
  arr_out = np.where((arr < 1), 0, arr)
  arr_out = np.where((arr_out >= 1), 1, arr_out)
  driver = gdal.GetDriverByName("GTiff")
  outdata = driver.Create('{}/tif_masks/S2_{}_{}_{}.tif'.format(export_path,id,longitude,latitude), cols, rows, 1, gdal.GDT_Byte)
  outdata.SetGeoTransform(ds.GetGeoTransform())##sets same geotransform as input
  outdata.SetProjection(ds.GetProjection())##sets same projection as input
  print(arr_out.shape)
  outdata.GetRasterBand(1).WriteArray(arr_out)
  outdata.FlushCache() ##saves to disk!!
  outdata = None
  band=None
  break

-1
(512, 512)


In [None]:
# for
import random
import shutil
# create test/train sets of drive data for model!
# get list of available data
data=[entry.name for entry in os.scandir('{}/tif_files'.format(export_path)) if entry.is_file()]
mask=[entry.name for entry in os.scandir('{}/tif_masks'.format(export_path)) if entry.is_file()]
print(data)
print(mask)
# generate test/train from available data
random.shuffle(data)
length = int(len(data)*0.7)
print(len(data))
train_data = data[:length]
test_data = data[length:]
shutil.rmtree('validation')
shutil.rmtree('training')
os.mkdir("validation")
os.mkdir("training")

# build test and train data folders
# copy both tif files to training or validation!
for entry in os.scandir('{}/tif_files'.format(export_path)):
  if entry.is_file():
    if entry.name in train_data and (entry.name.replace("_merged","").replace("_tif","") in mask):
      # data
      shutil.copyfile(f'{export_path}/tif_files/{entry.name}', f'training/{entry.name}')
      # mask
      shutil.copyfile(f'{export_path}/tif_masks/{entry.name.replace("_merged","").replace("_tif","")}', f'training/{entry.name.replace("_merged","_mask")}')
    elif entry.name in test_data and (entry.name.replace("_merged","").replace("_tif","") in mask):
      #data
      shutil.copyfile(f'{export_path}/tif_files/{entry.name}', f'validation/{entry.name}')
      #mask
      shutil.copyfile(f'{export_path}/tif_masks/{entry.name.replace("_merged","").replace("_tif","")}', f'validation/{entry.name.replace("_merged","_mask")}')



['S2_tif_1418.0_114.95250182147028_38.36774811216274_merged.tif', 'S2_tif_1420.0_117.95800926831882_42.35713797011496_merged.tif', 'S2_tif_1419.0_114.00274727360178_38.47051202685896_merged.tif', 'S2_tif_1424.0_113.68342051520985_36.62399679424529_merged.tif', 'S2_tif_1422.0_114.99849355330652_37.63657912825696_merged.tif', 'S2_tif_1426.0_117.77288760517298_41.22757048698065_merged.tif', 'S2_tif_1425.0_114.57282150785962_39.45643045515003_merged.tif', 'S2_tif_1428.0_115.29664957871492_36.87837358858305_merged.tif', 'S2_tif_1427.0_114.84658721048233_40.29872820100133_merged.tif', 'S2_tif_1430.0_113.72901934440593_36.68145271896969_merged.tif', 'S2_tif_1429.0_117.44807620131624_38.188592344073314_merged.tif', 'S2_tif_1432.0_118.7647410803597_41.22232815062798_merged.tif', 'S2_tif_1431.0_114.95702152201645_38.36832339233854_merged.tif', 'S2_tif_1435.0_115.0056847393625_37.34141665762544_merged.tif', 'S2_tif_1434.0_117.96646636450964_42.358242274825216_merged.tif', 'S2_tif_1433.0_115.28757

In [None]:
!zip -r /content/training.zip /content/training
!zip -r /content/validation.zip /content/validation