# About

This notebook extracts spectral and date features from NAIP images at random points within polygons. 

The polygons used here depict verified iceplant locations within four NAIP images along the Santa Barbara County coast and are archived in the data directory of this repository. 


**NOTEBOOK VARIABLES:**

- `aois` (array): These are the areas of interest where we collected the polygons we want to sample. Must be a subset of: `['campus_lagoon','carpinteria','gaviota','capitan']`. 

- `total_pts` (array of int): the number of points to sample from each aoi

- `convert_crs` (bool): whether to match all sampled points to the same CRS (EPSG 4326), otherwise points have the crs of the naip image it was sampled from.


**OUTPUT:**

The output is a data frame of points with the following features:

- x, y: coordinates of point *p* 
- pts_crs: CRS of coordinates x, y
- naip_id: itemid of the NAIP from which *p* was sampled from
- polygon_id: id of the polygon from which *p* was sampled from
- iceplant: whether point *p* corresponds to a confirmed iceplant location or a confirmed non-iceplant location (0 = non-iceplant, 1 = iceplant)
- r, g, b, nir: Red, Green, Blue, and NIR values of NAIP scene with naip_id at coordinates of point *p*
- ndvi: computed for each point using the Red and NIR bands
- year, month, day_in_year: year, month, and day of the year when the NAIP image was collected
- aoi: name of the area of interest where the points were sampled from


The data frames are saved in the 'temp' folder as a csv file. Filenames have the structure: `aoi_iceplant_points.csv'`

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import geopandas as gpd
from rasterio import CRS

import sample_rasters as sr

In [None]:
### ***************************************************
# (Assuming repository's parent directory is the home directory)
# Set cwd to top directory in the project
home = os.path.expanduser("~")

os.chdir(os.path.join(home,'iceplant-detection-santa-barbara'))

### ***************************************************
def path_2_polys(aoi):
    """
        Creates a path to the shapefile with polygons collected at specified aoi. 
            Parameters:
                        aoi (str): name of aoi in polygon's file name
            Return: fp (str): if the file exists it returns the constructed file path
    """    
    
    fp = os.path.join(os.getcwd(),
                      'data',
                      'iceplant_data',
                      'iceplant_polygons',
                      aoi+'_iceplant_polygons',
                      aoi+'_iceplant_polygons.shp')
    
    # check there is a file at filepath
    if not os.path.exists(fp):
        print('invalid filepath: no file')
        return
    return fp
    

# Specify notebook variables

In [None]:
### ***************************************************
# ************* NOTEBOOK VARIABLES ******************

aois = ['carpinteria','campus_lagoon','capitan','gaviota']

total_pts = [10,10,10,10]

# convert to epsg 4326
convert_crs = True

# Sample points

In [None]:
# create temp directory if needed, final samples pts are saved here

tmp_path = os.path.join(os.getcwd(),
                        'notebooks',
                        'A_data_sampling',
                        'temp')  
if not os.path.exists(tmp_path):
    os.mkdir(tmp_path)

# -------------------------
# sample points
all_pts = []
for aoi, total_pts_aoi in zip(aois,total_pts):
    # open polygons
    fp = path_2_polys(aoi)
    polys = gpd.read_file(fp)
    
    # -------------------------
    # select iceplant polygons
    polys_ice = polys.loc[polys.iceplant == 1].reset_index(drop = True)

    # sample points according to parameters
    pts = sr.sample_naip_from_polys_no_warnings(polys = polys_ice,
                                                    class_name = 'iceplant',
                                                    itemid = polys.aoi[0], 
                                                    total_pts = total_pts_aoi)  
    pts['aoi'] = aoi 
    # add ndvi as feature
    pts['ndvi'] = (pts.nir.astype('int16') - pts.r.astype('int16'))/(pts.nir.astype('int16') + pts.r.astype('int16'))
    # -------------------------
    # if we don't need to match the crs of all points, save each file sepparately
    if (not convert_crs):
        fp = os.path.join(tmp_path, aoi+'_iceplant_pts.csv')
        pts.to_csv(fp, index=False)
    if convert_crs:
        all_pts.append(pts)

# -------------------------
# match crs of all sampled points to EPSG 4326
if convert_crs:
    same_crs_pts = []
    for df in all_pts:
        # -------------------------        
        # find crs of points and create geodataframe
        aoi = df.aoi[0]
        if aoi in ['campus_lagoon','carpinteria']: 
            crs = 26911  #this crs is known from the NAIP scene
        else:
            crs = 26910  #this crs is known from the NAIP scene
        gdf = gpd.GeoDataFrame(df,
                               geometry = gpd.points_from_xy(df.x, df.y),
                               crs = CRS.from_epsg(crs))
        # -------------------------        
        # conver to EPSG 4326 crs
        gdf = gdf.to_crs(CRS.from_epsg(4326))
        same_crs_pts.append(gdf)

    # -------------------------        
    # create final dataframe of pts
    pts = pd.concat(same_crs_pts, ignore_index=True)
    # -------------------------        
    # update coordinate and crs columns
    pts = pts.drop(['x','y','pts_crs'], axis=1)
    pts = pts.assign(x = lambda pt: pt.geometry.x)
    pts = pts.assign(y = lambda pt: pt.geometry.y)
    pts['pts_crs'] = 'EPSG:4326'
    pts = pts.drop(['geometry'], axis=1)

    # -------------------------        
    # save points
    fp = os.path.join(tmp_path,'iceplant_pts.csv')
    pts.to_csv(fp, index=False)