In [1]:
from pprint import pprint
from shapely.geometry import mapping
import geopandas as gpd, rasterio, os
from PIL import Image
from rasterio.mask import mask
from rasterio.plot import reshape_as_image

orthomosaic_path = "D:/FCAT/FCAT2APPK.tif"
output_dir = "D:/FCAT/annotation_crops" # folder where any outputs will be dumped
input_csv = "D:/FCAT/Orthomosaic_1_training_data_v3.csv" # csv of points representing focal objects, here palm trees
class_colname = 'Especie' # column where the object "class" is located in the input_csv
object_size = 12 # estimated size of our focal object, here a palm tree, given in meters

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [2]:
### This section converts the points from the CSV file to boxes
### not necessary if your annotations are already in boxes

# reading in the orthomosaic, for spatial reference (CRS)
# and the point file, as a csv with X and Y coordinate columns for lat/lon

dataset = rasterio.open(orthomosaic_path)
pts = gpd.read_file(input_csv) # read in the CSV file to a geodataframe
gdf=gpd.points_from_xy(pts.POINT_X, pts.POINT_Y)

# we apply a buffer around each point
buffer_dist = object_size/2 # distance from center, in meters
buffer_dist = buffer_dist/111300 # converted to lat/lon degrees

box_list = []
for pt in gdf:
    box= pt.buffer(buffer_dist).envelope
    box_list.append(box)

# then we assemble a new geodataframe with these boxes

d = {class_colname: pts[class_colname], 'geometry': box_list}
boxes = gpd.GeoDataFrame(d, crs=dataset.crs)

# we now have a geodataframe of species-labeled boxes extending 'buffer_dist' around each point
print(boxes[0:5])

             Especie                                           geometry
0    Attalea colenda  POLYGON ((-79.66497 0.36761, -79.66487 0.36761...
1  Oenocarpus bataua  POLYGON ((-79.66444 0.36793, -79.66433 0.36793...
2  Oenocarpus bataua  POLYGON ((-79.66539 0.36803, -79.66528 0.36803...
3  Oenocarpus bataua  POLYGON ((-79.66847 0.36828, -79.66836 0.36828...
4  Oenocarpus bataua  POLYGON ((-79.66939 0.36877, -79.66928 0.36877...


In [3]:
orthomosaic_name = orthomosaic_path.split("/")[-1].split(".")[0]

for k in range(0, len(boxes)):
    
    ### this variable is your output filename - feel free to include more metadata in here
    image_name = f"{orthomosaic_name}_{k}_{boxes[class_colname][k]}.tif"
    out_img, out_transform = mask(dataset, [boxes['geometry'][k]], crop=True)
    img=Image.fromarray(reshape_as_image(out_img))
    
    # make the new output directory for the species
    new_output_dir = f"{output_dir}/{boxes[class_colname][k]}"
    
    # if it hasn't been created yet, make it
    if not os.path.exists(new_output_dir):
        os.makedirs(new_output_dir)
    
    path=os.path.join(new_output_dir, image_name)
    img.save(path)
    #print(boxes[k])
    if k%100 == 0:
        print(f"{k} annotations cropped so far, up to {image_name}")

print("script complete")

0 annotations cropped so far, up to FCAT2APPK_0_Attalea colenda.tif
100 annotations cropped so far, up to FCAT2APPK_100_Iriartea deltoidea.tif
200 annotations cropped so far, up to FCAT2APPK_200_Bottlebrush unk..tif
300 annotations cropped so far, up to FCAT2APPK_300_Bottlebrush unk..tif
400 annotations cropped so far, up to FCAT2APPK_400_Fan unk..tif
500 annotations cropped so far, up to FCAT2APPK_500_Bottlebrush unk..tif
600 annotations cropped so far, up to FCAT2APPK_600_Bottlebrush unk..tif
700 annotations cropped so far, up to FCAT2APPK_700_Bottlebrush unk..tif
800 annotations cropped so far, up to FCAT2APPK_800_Bottlebrush unk..tif
900 annotations cropped so far, up to FCAT2APPK_900_Bottlebrush unk..tif
1000 annotations cropped so far, up to FCAT2APPK_1000_Bottlebrush unk..tif
1100 annotations cropped so far, up to FCAT2APPK_1100_Bottlebrush unk..tif
1200 annotations cropped so far, up to FCAT2APPK_1200_Bottlebrush unk..tif
1300 annotations cropped so far, up to FCAT2APPK_1300_Bo