In [58]:
# Importing Packages
import os
import ee
import geemap
import numpy as np
import matplotlib.pyplot as plt
import skimage
import skimage.transform
import pandas as pd
import random
import numpy as np
import pickle

### Earth Engine Authentication

In [88]:
ee.Authenticate()
ee.Initialize()

Enter verification code: 4/1AX4XfWg9-FyHyZBj0gJRRzqdYNY1dAJez3gHNgwjD1IHcR0qo5vPQDoZLL0

Successfully saved authorization token.


## Key Functions

### 1. Perturbation for image centering

In [89]:
def perturb():
    _epsilon_long = [0.005, 0.004, 0.002, 0.001, 0, -0.001, -0.002, -0.004, -0.005]
    _epsilon_lat = [0.005, 0.004, 0.002, 0.001, 0, -0.001, -0.002, -0.004, -0.005]
    return np.random.choice(_epsilon_long, 1, replace=False)[0], np.random.choice(_epsilon_lat, 1, replace=False)[0]

### 2. Sampling negative cases

In [90]:
def sample_negative(long, lat):
    """
    long = float (incoming longitute of landfill)
    lat = float (incoming lattitude of landfill)
    
    return list of tuples of size 4 (long, lat) coordinates, nearby the landfill
    """
    _epsilon = 0.4
    
    _tup_1 = (long + _epsilon , lat + _epsilon)
    _tup_2 = (long + _epsilon , lat - _epsilon)
    _tup_3 = (long - _epsilon , lat + _epsilon)
    _tup_4 = (long - _epsilon , lat - _epsilon)
    
    return _tup_1, _tup_2, _tup_3, _tup_4

### 3.Get the image for a given coordinate

In [54]:
def get_ee_image_S2(long, lat, start_date = '2020-09-01', end_date = '2021-09-21', band_list = ['B4', 'B3', 'B2']):
    """
    long = float (incoming longitute of landfill)
    lat = float (incoming lattitude of landfill)
    start_date = string (start date for image sampling)
    end_date = string (end date for image sampling)
    band_list = list(string) (list of band strings to be extracted)
    
    return geemap image object given the long, lat
    """
    # Half width of geometry region
    _HW = 0.012

    point = ee.Geometry.Point(long, lat)
    aoi = ee.Geometry.Polygon([[
        [long - _HW, lat + _HW],
        [long - _HW, lat - _HW],
        [long + _HW, lat - _HW],
        [long + _HW, lat + _HW],
    ]], None, False)

    img_col = (
        ee
        .ImageCollection('COPERNICUS/S2_SR') # load from the Sentinel2 data source
        .filterBounds(point)
        .filterDate(start_date, end_date) 
        .sort('CLOUDY_PIXEL_PERCENTAGE') # sort ascending by the cloudy pixel percentage
        .select(band_list)
    )
    
    num_imgs = img_col.size().getInfo()
    if num_imgs == 0:
        return None

    img = (
        img_col
        .first() # get the lest cloudy image
        .clipToBoundsAndScale(
                geometry=aoi,
                width=512,
                height=512,
            )
    )

    return img

### 4.Reading the csv with locations

In [80]:
# the csv needs to at least contain two columns with the column names 
# "Latitude" and "Longitude" indicating the positions of the landfills

def get_landfills(filename, hashdict_filename = 'landfill_hashdict.pkl', path='D:/MIDS/CAPSTONE/UCB-Capstone-Methane-Tracker'):
    """
    filename: string (name of the csv file containing the lattitide and longitude information)
    hashdict_filename: string (name of the file where existing hashmap of landfill locations exist)
    path: string (path of the directory where both files are saved)
    
    return dictionary (landfill locations with values as (long,lat) tuple and key as their hashmap)
    """
    
    filepath = os.path.join(path, filename) # path of the location data file
    df = pd.read_csv(filepath) # loading on a data frame
    
    try:
        with open(os.path.join(path, hashdict_filename), "rb") as hashed_l: # check if the hashed location file exists  
            hashed_locations = pickle.load(hashed_l) # load dictionary object
    
    except FileNotFoundError:
        hashed_locations = {} # if file is not available, instnatiate wit a blank dictionary
    
    for index in range(len(df)):
        lat = df.loc[index, "Latitude"] # "Lattitude column name is hard coded"
        long = df.loc[index, "Longitude"] # "Longitude column name is hard coded"
        
        if (not lat) or (not long):
            continue
        
        _tup = (long, lat)
        hashmap = hash(_tup) # Create the hash of the tuple (long, lat)
        
        if hashmap in hashed_locations: 
            continue  # If the hash exists in the distionary then move on to avoid double counting 
        else: 
            hashed_locations[hashmap] = _tup # otherwise add the location to the dictionary
    
    with open(os.path.join(path, hashdict_filename), "wb") as fw:
        pickle.dump(hashed_locations, fw, protocol=pickle.HIGHEST_PROTOCOL) # rewrite the most updated file to disk 
    
    return hashed_locations

### 5.Save the image

In [48]:
def save_img(img, path, img_filename):
    
    filepath = os.path.join(path, img_filename)
    
    try: 
        img_tensor = geemap.ee_to_numpy(img)
        img_tensor = img_tensor.clip(0, 3000) / 3000
    
    except:
        print("Skipping: image not found")
        return
    
    plt.imsave(f'{filepath}.png', img_tensor)
    return 

### 6.Check if a given location is already in the training set

In [53]:
def check_availability(image_folder, hashmap):
    """
    image_folder: str (path to the folder where all training images are saved)
    hashmap: int (the hashed value of the (long, lat) tuple of landfill location)
    
    return boolean (True if the base file exists)
    """
    base_filename = 'BASE_'+str(hashmap)+'.png'
    pathToFile = os.path.join(image_folder, base_filename)
    
    return os.path.exists(pathToFile)

## Full processing pipeline

In [56]:
# key variables
landfil_locations_filename = 'consolidated_us_landfill_lat_long.csv'
hashed_location_filename = 'landfill_hashdict.txt'
main_directory_path = 'D:/MIDS/CAPSTONE/UCB-Capstone-Methane-Tracker'
image_file_location = 'D:/MIDS/CAPSTONE/UCB-Capstone-Methane-Tracker/complete_image_dataset'

In [81]:
# Step 1: Get the Landfill Dictionary 

landfill_locations = get_landfills(filename = landfil_locations_filename, 
                                   hashdict_filename = hashed_location_filename, 
                                   path=main_directory_path)

In [85]:
# Step 2: extract the images
count = 0 
for hm, tup in landfill_locations.items():
    
    if count%1000 == 0:
        print("Total landfills completed : " + str(count))

    long, lat = tup
    if (not long) or (not lat) or (long is null) or (lat is null):
        continue 
    
    # check if the base image exists 
    if check_availability(image_folder = image_file_location, hashmap = hm):
        continue
    else: 
        perturb_long, perturb_lat = perturb() # sampling small perturbation
        long_center = long + perturb_long # adjusting the center longitide
        lat_center = lat + perturb_lat # adjusting the center lattitude
        
        n1, n2, n3, n4 = sample_negative(long=long, lat=lat) # Sampling nearby non-landfill containing regions
        
        base_image_center = get_ee_image_S2(long=long, lat=lat) # base centered image 
        base_image = get_ee_image_S2(long=long_center, lat=lat_center) # extarct off centered base image (positive lable)
        
        neg_samp1 = get_ee_image_S2(long=n1[0], lat=n1[1]) # extract negative lables from nearby region
        neg_samp2 = get_ee_image_S2(long=n2[0], lat=n2[1])
        neg_samp3 = get_ee_image_S2(long=n3[0], lat=n3[1])
        neg_samp4 = get_ee_image_S2(long=n4[0], lat=n4[1])
        
        base_centered_img_name = 'BASE_'+str(hm)+'_CENT_.png' 
        base_img_name = 'BASE_'+str(hm) +'.png' # follow similar naming convention for positive and negative classes
        neg_samp1_img_name = 'NEGSAMP_'+str(hm)+'_pp.png'
        neg_samp2_img_name = 'NEGSAMP_'+str(hm)+'_pm.png'
        neg_samp3_img_name = 'NEGSAMP_'+str(hm)+'_mp.png'
        neg_samp4_img_name = 'NEGSAMP_'+str(hm)+'_mm.png'
        
        save_img(img = base_image_center, path = image_file_location, img_filename = base_centered_img_name) 
        save_img(img = base_image, path = image_file_location, img_filename = base_img_name) 
        save_img(img = neg_samp1, path = image_file_location, img_filename = neg_samp1_img_name)
        save_img(img = neg_samp2, path = image_file_location, img_filename = neg_samp2_img_name)
        save_img(img = neg_samp3, path = image_file_location, img_filename = neg_samp3_img_name)
        save_img(img = neg_samp4, path = image_file_location, img_filename = neg_samp4_img_name)
        
        count += 1
        

Total landfills completed : 0
Image.sampleRectangle: Fully masked pixels / pixels outside of the image footprint when sampling band 'B4' with no default value set. Note that calling sampleRectangle() on an image after ee.Image.clip() may result in a sampling bounding box outside the geometry passed to clip().
Skipping: image not found
Image.sampleRectangle: Fully masked pixels / pixels outside of the image footprint when sampling band 'B3' with no default value set. Note that calling sampleRectangle() on an image after ee.Image.clip() may result in a sampling bounding box outside the geometry passed to clip().
Skipping: image not found
Image.sampleRectangle: Fully masked pixels / pixels outside of the image footprint when sampling band 'B3' with no default value set. Note that calling sampleRectangle() on an image after ee.Image.clip() may result in a sampling bounding box outside the geometry passed to clip().
Skipping: image not found
Image.sampleRectangle: Fully masked pixels / pixe

EEException: Invalid JSON payload received. Unexpected token.
nstantValue": [NaN, NaN]}}}}}}}}}}}
               ^

In [87]:
if np.Nan is null:
    print(1)
else:
    print(2)

AttributeError: module 'numpy' has no attribute 'Nan'