In [28]:
import os 
import glob
import numpy as np
import rasterio as rio
import pandas as pd
import json
import pyproj
import matplotlib.pyplot as plt
from rasterio import plot
from rasterio.mask import raster_geometry_mask
from shapely.geometry import shape, MultiPolygon
from shapely.ops import transform
import geopandas as gpd
from geocube.api.core import make_geocube
import rioxarray as rx

## Helper functions

In [29]:
# Generate binary mask from multi-band .tif file

def binary_mask_tif(mask_fp):
    '''This function turns a multi-band raster mask into a single-band raster mask
    with unmasked pixels coded as 1s and masked pixels coded as 0s
    
    Inputs:
    mask_fp (str) : filepath to the mask .tif file
    
    Returns:
    mask_arr_3d (np array) : 3-d numpy array of 0s and 1s'''
    
    with rio.open(mask_fp) as src:
        mask_arr = src.read()
        band_ct = mask_arr.shape[0]
    
    # get unique values for binary mask band (not necessarily 0 and 1)
    # binary mask band is the last band in the image
    
    mask_band = mask_arr[(band_ct-1)]
    mask_vals = np.unique(mask_band)
    
    # make binary mask 0s and 1s
    mask_arr_binary = mask_band*(1/(mask_vals[1]))
    
    mask_arr_3d = mask_arr_binary.reshape(1,mask_arr.shape[1],mask_arr.shape[2])
    
    #checks
    #print(mask_arr_3d.shape)
    #plot.show(mask_arr_3d)
    
    return mask_arr_3d

In [30]:
# Generate binary mask from three-band .png file

def binary_mask_png(mask_fp):
    '''This function turns a .png mask file into a single-band raster mask
    with unmasked pixels coded as 1s and masked pixels coded as 0s
    
    Inputs:
    mask_fp (str) : filepath to the mask .png file
    
    Returns:
    mask_arr_3d (np array) : 3-d numpy array of 0s and 1s'''
    
    # Read the PNG file into a NumPy array
    mask_array = plt.imread(mask_fp)
    band_ct = mask_arr.shape[0]

    # get unique values for binary mask band (not necessarily 0 and 1)
    # binary mask band is the last band in the image
    
    mask_band = mask_arr[(band_ct-1)]
    mask_vals = np.unique(mask_band)
    
    # make binary mask 0s and 1s
    mask_arr_binary = mask_band*(1/(mask_vals[1]))
    
    mask_arr_3d = mask_arr_binary.reshape(1,mask_arr.shape[1],mask_arr.shape[2])
    
    #checks
    #print(mask_arr_3d.shape)
    #plot.show(mask_arr_3d)
    
    return mask_arr_3d

In [62]:
# multi-purpose binary mask (accepts .png, .jpg, or .tif mask files)

# Generate binary mask from multi-band .tif, .jpg, or .png file

def binary_mask_multi(mask_fp):
    '''This function turns a multi-band .tif, .jpg, or .png mask file into a single-band raster mask
    with unmasked pixels coded as 1s and masked pixels coded as 0s
    
    Inputs:
    mask_fp (str) : filepath to the mask .tif, .jpg, or .png file
    
    Returns:
    mask_arr_3d (np array) : 3-d numpy array of 0s and 1s'''

    if mask_fp[-3:]=='tif':
        with rio.open(mask_fp) as src:
            mask_arr = src.read()
            band_ct = mask_arr.shape[0]
    
            # get unique values for binary mask band (not necessarily 0 and 1)
            # binary mask band is the last band in the image
    
            mask_band = mask_arr[(band_ct-1)]
            mask_vals = np.unique(mask_band)
    
            # make binary mask 0s and 1s
            mask_arr_binary = mask_band*(1/(mask_vals[1]))
    
            mask_arr_3d = mask_arr_binary.reshape(1,mask_arr.shape[1],mask_arr.shape[2])
            return mask_arr_3d
            
    elif mask_fp[-3:]=='png':
        mask_arr = plt.imread(mask_fp)
        band_ct = mask_arr.shape[2]
        mask_band = mask_arr[:,:,-band_ct]
        mask_vals = np.unique(mask_band)
        mask_arr_binary = mask_band*(1/(mask_vals[-1]))
        mask_arr_binary[mask_arr_binary != 1] = 0
        mask_arr_3d = mask_arr_binary.reshape(1,mask_arr.shape[0],mask_arr.shape[1])
        return mask_arr_3d
        
    elif mask_fp[-3:]=='jpg':
        mask_arr = plt.imread(mask_fp)
        band_ct = mask_arr.shape[2]
        mask_band = mask_arr[:,:,-band_ct]
        mask_vals = np.unique(mask_band)
        mask_arr_binary = mask_band*(1/(mask_vals[-1]))
        mask_arr_binary[mask_arr_binary != 1] = 0
        mask_arr_3d = mask_arr_binary.reshape(1,mask_arr.shape[0],mask_arr.shape[1])
        return mask_arr_3d

    else:
        print ("mask file type is not supported. Supported file types are: .tif .png .jpg")


In [63]:
# Mask function

def mask_img(img_fp, mask_fp, output_dir):
    """
    This function masks a multispectral or RGB UAS image using a binary mask file
    The mask file must have the same dimensions and CRS as the UAV image.
    
    Inputs:
    img_fp (str) : filepath to the UAV image to be masked (.tif) 
    
    mask_fp (str) : filepath to the mask file (.tif, .png, .jpg)
    
    output_dir (str) : directory to store the masked .tif image (e.g. 'kathleen/Desktop/')
    
    Returns:
    
    A masked .tif file with the same dimensions and CRS as the original UAV image. 
    All masked pixels will have a value of 0 for all bands. Unmasked pixels will retain original band values. 
    """ 
    
    mask_arr = binary_mask_multi(mask_fp) ## modify with appropriate helper fxn
    
    with rio.open(img_fp) as src:
        img_arr = src.read()
        masked_img_arr = mask_arr * img_arr
        
        kwargs = src.meta
        band_ct = masked_img_arr.shape[0]
        kwargs.update(dtype=rio.float32, count=band_ct)
        
        with rio.open(output_dir+
                      'masked_'+
                      str(os.path.basename(img_fp)),
                      'w', **kwargs) as dst:
            for b in range(masked_img_arr.shape[0]):
                dst.write_band(b+1, masked_img_arr[b].astype(rio.float32))
        
        
        #checks
        #print(masked_img_arr.shape)
        #plot.show(masked_img_arr[(band_ct-1)])
    

## Mask the RGB UAS orthomosaic  

In [64]:
# Paths to RGB orthomosaic and corresponding mask file. 

ortho = '/Volumes/CAIR_LAB/UAV_Share/2022/Hemp_2022/NY 2022/RGBmaps/Hemp_22_08_03_RGBmap.tif'

mask_jpg = '/Volumes/CAIR_LAB/UAV_Share/2022/Hemp_2022/NY 2022/Processed/Maps/20220803_mask.jpg'

In [66]:
# Mask the orthomosaic with the corresponding mask file

output_dir = '/Users/kathleenkanaley/Desktop/' # modify to match your file structure
mask_img(ortho, 
         mask_jpg, 
         output_dir)

## Use a SHP metadata file to extract reflectance data for each experimental unit

In [67]:
# The metadata file in this example is a SHP file containing hemp plant bounding box coordinates
# The experimental unit is one hemp plant 

# Metadata file with panel geometries
shp_path = '/Volumes/CAIR_LAB/UAV_Share/2022/Hemp_2022/NY 2022/Processed/Maps/Shapefile/map_20220803_poly.shp'

# Read the .shp file as a geodataframe
gdf = gpd.read_file(shp_path)
gdf.head()

Unnamed: 0,plant_id,col_id,row_id,genotype,c_east,c_north,geometry
0,1,1,15,,336142.354148,4751174.0,"POLYGON ((336143.170 4751173.329, 336141.571 4..."
1,2,1,14,,336142.491947,4751172.0,"POLYGON ((336143.308 4751171.151, 336141.709 4..."
2,3,1,13,,336142.491947,4751170.0,"POLYGON ((336143.308 4751168.891, 336141.709 4..."
3,4,1,12,,336142.629746,4751167.0,"POLYGON ((336143.446 4751166.384, 336141.846 4..."
4,5,1,11,,336142.615966,4751165.0,"POLYGON ((336143.432 4751164.385, 336141.833 4..."


In [68]:
# Reset index
gdf['index'] = gdf.index
gdf

Unnamed: 0,plant_id,col_id,row_id,genotype,c_east,c_north,geometry,index
0,1,1,15,,336142.354148,4.751174e+06,"POLYGON ((336143.170 4751173.329, 336141.571 4...",0
1,2,1,14,,336142.491947,4.751172e+06,"POLYGON ((336143.308 4751171.151, 336141.709 4...",1
2,3,1,13,,336142.491947,4.751170e+06,"POLYGON ((336143.308 4751168.891, 336141.709 4...",2
3,4,1,12,,336142.629746,4.751167e+06,"POLYGON ((336143.446 4751166.384, 336141.846 4...",3
4,5,1,11,,336142.615966,4.751165e+06,"POLYGON ((336143.432 4751164.385, 336141.833 4...",4
...,...,...,...,...,...,...,...,...
535,536,36,5,,336223.324841,4.751151e+06,"POLYGON ((336224.141 4751149.999, 336222.542 4...",535
536,537,36,4,,336223.407520,4.751148e+06,"POLYGON ((336224.224 4751147.643, 336222.624 4...",536
537,538,36,3,,336223.476419,4.751146e+06,"POLYGON ((336224.293 4751145.328, 336222.693 4...",537
538,539,36,2,,336223.559099,4.751144e+06,"POLYGON ((336224.375 4751143.054, 336222.776 4...",538


In [69]:
mask_img_path = '/Users/kathleenkanaley/Desktop/masked_Hemp_22_08_03_RGBmap.tif' # path to masked image
img_data = rx.open_rasterio(img_path)#, masked=True)#.rio.clip(gdf.geometry.values, gdf.crs)
img_data

In [70]:
# # Hemp
out_grid = make_geocube(
    vector_data=gdf,
    measurements=['plant_id','col_id','row_id','genotype','index'],
    like=img_data, # ensure the data are on the same grid
)

In [71]:
out_grid

In [72]:
# This section is specific to RGB images

blue = img_data[0]
green = img_data[1]
red = img_data[2]

band_dict = {'blue':blue, 'green':green, 'red':red}

In [73]:
# merge the dfs together

for key, b in band_dict.items():
    out_grid[key] = (b.dims, b.values, b.attrs, b.encoding)

out_grid


In [74]:
# Change 255 to NAN - hemp
out_grid_nans= out_grid.where(out_grid != 255)

In [75]:
out_grid_nans

In [76]:
# Get a dataframe with per-pixel reflectance values - hemp
outgrid_df = out_grid_nans.to_dataframe()
outgrid_df.sort_values(by=['plant_id'], inplace=True)
outgrid_df.reset_index(inplace=True)
outgrid_df.dropna(subset=['plant_id'], inplace=True) # remove pixels not associated with a plant_id
outgrid_df

Unnamed: 0,y,x,plant_id,col_id,row_id,index,blue,green,red,spatial_ref
0,4.751174e+06,336142.088870,1.0,1.0,15.0,0.0,246.0,237.0,222.0,0
1,4.751173e+06,336141.844012,1.0,1.0,15.0,0.0,83.0,98.0,116.0,0
2,4.751173e+06,336141.830409,1.0,1.0,15.0,0.0,87.0,100.0,111.0,0
3,4.751173e+06,336141.816806,1.0,1.0,15.0,0.0,92.0,96.0,109.0,0
4,4.751173e+06,336141.803202,1.0,1.0,15.0,0.0,87.0,95.0,105.0,0
...,...,...,...,...,...,...,...,...,...,...
5271750,4.751142e+06,336223.055116,540.0,36.0,1.0,539.0,167.0,171.0,187.0,0
5271751,4.751142e+06,336223.041513,540.0,36.0,1.0,539.0,155.0,172.0,173.0,0
5271752,4.751142e+06,336223.027910,540.0,36.0,1.0,539.0,168.0,180.0,180.0,0
5271753,4.751142e+06,336223.218354,540.0,36.0,1.0,539.0,70.0,83.0,87.0,0


## Optionally, calculate the average reflectance for each experimental unit

In [77]:
# Calculate the average reflectance for each experimental uit (in this case, experimental unit = one panel)
groupby_plantid = out_grid_nans.drop("spatial_ref").groupby(out_grid_nans.index)

plant_means = groupby_plantid.mean()
as_df = plant_means.to_dataframe()
as_df

  groupby_plantid = out_grid_nans.drop("spatial_ref").groupby(out_grid_nans.index)


Unnamed: 0_level_0,plant_id,col_id,row_id,blue,green,red
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,1.0,1.0,15.0,112.985970,135.222916,135.394287
1.0,2.0,1.0,14.0,100.633522,116.594055,121.128365
2.0,3.0,1.0,13.0,118.291893,137.924500,146.979324
3.0,4.0,1.0,12.0,97.371437,123.654900,124.230515
4.0,5.0,1.0,11.0,121.065308,133.296036,140.946991
...,...,...,...,...,...,...
535.0,536.0,36.0,5.0,153.075607,174.423004,181.215530
536.0,537.0,36.0,4.0,136.817810,165.613037,160.489578
537.0,538.0,36.0,3.0,144.492401,177.775467,170.220093
538.0,539.0,36.0,2.0,136.143845,161.712265,157.716476


In [78]:
as_df.sort_values(by=['plant_id'], inplace=True)
as_df.reset_index(inplace=True)
as_df

Unnamed: 0,index,plant_id,col_id,row_id,blue,green,red
0,0.0,1.0,1.0,15.0,112.985970,135.222916,135.394287
1,1.0,2.0,1.0,14.0,100.633522,116.594055,121.128365
2,2.0,3.0,1.0,13.0,118.291893,137.924500,146.979324
3,3.0,4.0,1.0,12.0,97.371437,123.654900,124.230515
4,4.0,5.0,1.0,11.0,121.065308,133.296036,140.946991
...,...,...,...,...,...,...,...
464,536.0,537.0,36.0,4.0,136.817810,165.613037,160.489578
465,537.0,538.0,36.0,3.0,144.492401,177.775467,170.220093
466,538.0,539.0,36.0,2.0,136.143845,161.712265,157.716476
467,539.0,540.0,36.0,1.0,172.319504,184.853439,164.557877


In [79]:
final_df = as_df.drop(['index'], axis=1)
final_df

Unnamed: 0,plant_id,col_id,row_id,blue,green,red
0,1.0,1.0,15.0,112.985970,135.222916,135.394287
1,2.0,1.0,14.0,100.633522,116.594055,121.128365
2,3.0,1.0,13.0,118.291893,137.924500,146.979324
3,4.0,1.0,12.0,97.371437,123.654900,124.230515
4,5.0,1.0,11.0,121.065308,133.296036,140.946991
...,...,...,...,...,...,...
464,537.0,36.0,4.0,136.817810,165.613037,160.489578
465,538.0,36.0,3.0,144.492401,177.775467,170.220093
466,539.0,36.0,2.0,136.143845,161.712265,157.716476
467,540.0,36.0,1.0,172.319504,184.853439,164.557877


## Save the dataframe as a CSV

In [173]:
## Per-pixel
# outgrid_df.to_csv('/Users/kathleenkanaley/Desktop/perpixel_hemp_22_08_10.csv',index=False)

## Per-plant (experimental unit)
#final_df.to_csv('/Users/kathleenkanaley/Desktop/perplant_hemp_22_08_10.csv',index=False)