# About 

This notebook can be used to add average and entropy of spectral bands in a NAIP scene at a point over a window of a specific side length. All points in the input csv with coordinates must have the same CRS.

**NOTEBOOK PARAMETERS:**
- `fp` (str): filepath to the csv file containing the points for which we want to calculate the average and entropy of NAIP bands over a window 
- `itemid_col` (str): name of column containing itemid of the NAIP scene containing the point
- `crs_col` (str): name of columns with the crs of all points
- `lon_label` (str): column names for longitude
- `lat_label` (str): column names for latitude
- `entropy_r` (array of int): radius of the disk(s) (in pixels) over which entropy is calculated. For each given radius r, the disk to calculate entropy will be centered at the point with radius 2r+1, and the avereage will be calculated over a box centered at the point with side length 2r+1. 

**OUTPUT:**

The given dataframe of points augmented with columns having the average and entropy at each point calculated over the indicated windows. 

In [None]:
import os
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS

from shapely.geometry import box

import planetary_computer as pc

In [None]:
# Assuming repository's parent directory is the home directory
home = os.path.expanduser("~")
os.chdir(os.path.join(home,'iceplant-detection-santa-barbara'))

# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

# csv with the points for which to add spectral window features
data_path =  os.path.join(os.getcwd(),
                   'notebooks',
                   'A_data_sampling',
                   'temp',
                   'iceplant_pts.csv')
                                       
# -------------------------------------------
# radius of the disk(s) (in pixels) over which entropy is calculated
entropy_r = [1,2]

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point
itemid_col = 'naip_id'
# name of columns with the crs of all points
crs_col = 'pts_crs'

# -------------------------------------------
# column names for longitude and latitude
lon_label = 'x' # lon = x
lat_label = 'y' # lat = y

# ***************************************************
# ***************************************************

In [None]:
def clear_screen():
    if os.name in ('nt', 'dos'):
        _ = os.system('cls')
    elif os.name == 'posix':
        _ = os.system('clear')
        
# ===================================================
# temporary folder for aux rasters
temp_dir = os.path.join(os.getcwd(),
                    'notebooks',
                    'A_data_sampling',
                    'temp',
                    'aux_naip_rasters')
if os.path.exists(temp_dir) == False:
    os.mkdir(temp_dir)

# folder for output points
out_dir = os.path.join(os.getcwd(),
                       'notebooks',
                       'A_data_sampling',
                       'output')
if os.path.exists(out_dir) == False:
    os.mkdir(out_dir)
    
# ===================================================
# read in data
all_pts = pd.read_csv(data_path)
itemids = list(all_pts[itemid_col].unique()) # itemids with points
N = len(itemids)  # counter to finish
crs = CRS.from_string(all_pts[crs_col][0]) # crs of dataframe

# ===================================================
# length of side of the square window over which average is calculated.
box_sides = [r*2 +1 for r in entropy_r]

# ===================================================
sampled_pts = [] # sampled pts from each scene are collected here
print('REMAINING: ', N, 'scenes', end="\r")

# ===================================================
for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    pts_scene = all_pts.loc[all_pts[itemid_col] == itemid]

    # double check there are points in that scene
    if len(pts_scene) !=0:
        # create geodataframe with pts in scene
        pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, 
                                                lon_label=lon_label, 
                                                lat_label=lat_label, 
                                                crs=crs)
        # convert pts to crs of NAIP scene
        pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

        samples = []
        for pt in pts_col:
            pt_samples = []
            for ent_r, box_s in zip(entropy_r,box_sides):
                # this creates a box centered at point with side length=entropy_r*2 meters
                #     current pts coordinates are in the NAIP scene's crs, which is in meters
                #     so pt.buffer(entropy_r) is a disk with radius entropy_r meters.        
                #     This disk is inscribed in a square with side length entropy_r*2 meters.                
                #     Each pixel in the NAIP scene has side length of ~0.6m,
                #     so a length of 2*entropy_r meters = entropy_r*10/3 pixels ~ 3.3*entropy_r pixels 
                #     this square is big enough to have a window of side length entropy_r*2 + 1 pixels 
                #     around the central pt
                reduce_box = box(*(pt.buffer(ent_r).bounds))            
                # clip NAIP scene to box
                rast = naip_rast_r.rio.clip_box(*reduce_box.bounds)

                # save auxiliary average and entropy rasters for R,G,B,NIR bands of clipped scene
                band_names = ['r_', 'g_', 'b_', 'nir_']
                tags = ['_avgs', '_entrs']
                window_fps = []
                window_cols = []

                for band_name, band_n in zip(band_names,range(1,5)):
                    rast_name = band_name + itemid + '_pt'
                    sr.avg_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_s, folder_path=temp_dir)
                    sr.entropy_raster(raster = rast, band=band_n, rast_name=rast_name, n=ent_r, folder_path=temp_dir)                        

                    for tag in tags:
                        window_fps.append(os.path.join(temp_dir, rast_name + tag + '.tif'))        
                        window_cols.append(band_name.replace('_','')+tag.replace('s',str(box_s)))

                # ------------------------------
                # make auxiliary NDVI of clipped scene
                ndvi = sr.ndvi_xarray(rast)

                # make auxiliary NDVI entropy
                band_names.append('ndvi_')
                rast_name = 'ndvi_' + itemid + '_pt'

                sr.avg_raster(rast_data=ndvi, 
                                  crs=rast.rio.crs, 
                                  transf=rast.rio.transform(), 
                                  rast_name=rast_name, 
                                  n=box_s, 
                                  folder_path=temp_dir)

                # adjusting to entropy input types
                ndvi = ndvi*100 +100
                sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                                  crs=rast.rio.crs, 
                                  transf=rast.rio.transform(), 
                                  rast_name=rast_name, 
                                  n=ent_r, 
                                  folder_path=temp_dir)

                for tag in tags:
                    window_fps.append(os.path.join(temp_dir, rast_name + tag + '.tif'))        
                    window_cols.append( 'ndvi'+tag.replace('s',str(box_s)))

                # ---------------------------------------
                # sample raster values for points in this scene
                
                for fp, col_name in zip(window_fps, window_cols):
                    rast_r = rasterio.open(fp)
                    pt_df = gpd.GeoDataFrame({'geometry':[pt]}, crs=pts_col.crs)
                    sample = sr.sample_raster_from_pts(pt_df.geometry, rast_r, [col_name])    
                    pt_samples.append(sample)
                    os.remove(fp)
            samples.append(pd.concat(pt_samples, axis=1))

        # ---------------------------------------
        # Add all derived spectral data to pts dataframe
        new_features = pd.concat(samples)
        pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

        # -----------------------------
        # collect all points from each polygon in the scene
        sampled_pts.append(pts)

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

os.rmdir(temp_dir)
clear_screen()
print('FINISHED PROCESSING')       

# ---------------------------------------
# create data frame with all points
sampled_pts = pd.concat(sampled_pts).sort_index()
if 'geometry' in sampled_pts.columns:
    sampled_pts = sampled_pts.drop(['geometry'],axis=1)
    
# ---------------------------------------
# save
sampled_pts.to_csv(os.path.join(out_dir, 'spectral_windows_'+data_path.split('/')[-1]), 
                   index=False)

print(sampled_pts.columns)