# Ghana Drone Images Chipping Pipeline

This notebook is for chipping the existing Ghana Drone Images. The chipping pipeline is based on the existing code from Lyndon Estes and Sam Khallaghi. The original drone image data is from a existing project by Clark University and Farmerline and this pipeline helps to chip the existing tile data into chips for training purposes.

## Imports

In [None]:
# importing necessary modules
import os
import sys
from pathlib import Path
import geopandas as gpd

In [None]:
# Specify the path to the package
current_pth = os.getcwd()
pkg_path = os.path.dirname(current_pth)

In [None]:
# Adding the path to the package to the system path
sys.path.insert(0, os.path.join(pkg_path, 'imageProcessing/'))
sys.path.insert(0, pkg_path)

In [None]:
# Importing the imageProcessing package
import importlib
import imageProcessing
importlib.reload(imageProcessing)
from imageProcessing import *

## Config

In [None]:

# Specify the path to the project directory
# This would be the directory that you clone the repo into


proj_dir = Path("/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/")

# Configureation parameters
config = {
    
    #I/O and path setup

    # Path to the project directory
    "proj_dir" : proj_dir, 
    # Path to the label file
    "label_path" : Path(proj_dir) / "labels" / "class2_all_fix_v1_2.geojson", 
    # Path to the bounding box file
    "labelled_area_path" : Path(proj_dir) / "labels" / "ortho_exts_maingrid_rectified_v1_1.geojson", 
    # Path to the raw image directory
    "img_dir" : Path(proj_dir) / "processed",
    # Path to save the rasterized image tiles
    "img_out_dir" : Path(proj_dir) / "working/image_tiles",
    # Path to save the rasterized label tiles
    "lbl_out_dir" : Path(proj_dir) / "working/label_tiles",
    # Path to save the rasterized mask tiles
    "msk_out_dir" : Path(proj_dir) / "working/mask_tiles",
    # Path to save the processed image chips
    "lbl_chip_dir" : Path(proj_dir) / "label_chips",
    # Path to save the processed label chips
    "img_chip_dir" : Path(proj_dir) / "image_chips",

    # Image processing parameters
    "prim_crop" : ["maize"], # When doing binary crop classification only specify one crop here The donimate crop are maize and fallow.
    "binary_mask" : False,           # If True, will create a binary mask for crop and non-crop

    # Output format for the image chips. Geotiff, npz and pkl are available.
    # If npz or pkl, the image chips will be saved along the label chips as pairs in a list
    # Two files that contians the training and validation chips will be created.
    # When no output format is specified, the default is tif and will be saved to the provided path.
    "output_format" : "tif",

    "positive_threshold" : 0.1,      # Threshold for the positive class in the mask, between 0 and 1

    # output resolution for images. original was 2.5e-07
    # res = 10e-07 is used in this example and as default.
    # Warning: Using the res option might result in tiles with different sizes(nrows, ncols)
    "res": 10e-07,                # Used in Oil-Palm 

    # When res is not used, the number of rows and columns in each tile must be specified
    # Used in maize and rice
    "nrows": 5000,                  # number of rows in each tile
    "ncols": 5000,                  # number of columns in each tile

    "tile_size" : 256,              # tile size in pixels
    "overlap": 32,                  # overlap between tiles in pixels

}

# Create the output directories if they don't exist
dir_list = [config["img_out_dir"], config["lbl_out_dir"], config["msk_out_dir"], config["img_chip_dir"], config["lbl_chip_dir"]]
for folder in dir_list:
    if not os.path.isdir(folder):
        os.makedirs(folder)

## Checking/Loading Label Information

In [None]:
# Loading the bounding box for the labelled area
bbox = gpd.read_file(config["labelled_area_path"])
bbox.head()

In [None]:
# Loading the labels for each image tile
labels = gpd.read_file(config["label_path"])
labels.head()

In [None]:
# Filtering the tables to only include necessary information

labels = labels[["prim_crop", "confidence", "file_name", "geometry"]]
bbox = bbox[["file_name", "geometry"]]

# Gather the list of path of all the image tiles in the image directory
img_paths = list_full_path(config["img_dir"], ".tif$")

## Chipping Process

In [None]:
# Creating image, mask and label rasters
img_list, lbl_list, msk_list = create_msk_lbl_img(img_paths, labels, bbox, 
                                                  proj_dir,config["msk_out_dir"],
                                                  config["lbl_out_dir"],
                                                  config["img_out_dir"],
                                                  res=None,
                                                  nrow=config["nrows"],
                                                  ncol=config["ncols"],
                                                  binary_mask=config["binary_mask"], 
                                                  prim_crop=config["prim_crop"])

In [None]:
# Splitting the image, mask and label rasters into chips and save the information in a catalog
chipping_catalog = train_test_split_cat(img_list, lbl_list, msk_list, proj_dir)

In [None]:
chipping_catalog

In [None]:
# Do the chipping based on the catalog and other parameters
chipping(proj_dir, config["lbl_chip_dir"],
         config["img_chip_dir"],
         config["output_format"],
         chipping_cat=chipping_catalog, # Returned from train_test_split_cat
         patch_size=config["tile_size"],
         overlap=config["overlap"],
         positive_class_threshold = config["positive_threshold"])

## Visualize chipped img and lbl pairs

In [None]:
# Loading the catalog of the chipped images and display it
cat = pd.read_csv(Path(proj_dir) / "working" / "chips_catalog.csv")
cat

In [None]:
band_composite=[1,2,3] # RGB
stretch = True  
samples = random.sample(range(0, len(cat)), 3) # Randomly select 3 samples from the catalog

# Plotting the image chips and label chips
for sample in samples:
    plot_img_lbl_pair(Path(proj_dir) / cat["images"][sample], Path(proj_dir) / cat["labels"][sample], band_composite, stretch)