# Ghana Drone Image Pre-Processing

This notebook cleans up the original ghana drone image data's file name and it's catalog for better organization and readability.

## Imports

In [8]:
import os
import shutil
import glob
import geopandas as gpd
from pathlib import Path

## Define paths and loads catalog

In [3]:
# I/O settings

proj_dir = "/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images" # Path to project directory

raw_path = Path(proj_dir) / "raw_oil_palm" # Path to raw images
processed_path = Path(proj_dir) / "processed_oil_palm" # Path to processed images
bbox_path = Path(proj_dir) / "labels_oil_palm" / "ortho_labeled_areas.geojson" # Path to bounding box file version 1
labels_path = Path(proj_dir) / "labels_oil_palm" / "oil_palm_labels.geojson" # Path to bounding box file version 1

In [4]:
# Read in catalog data from geojson with geopandas
bbox = gpd.read_file(bbox_path)
labels = gpd.read_file(labels_path)

In [5]:
labels.head()

Unnamed: 0,CID,SID,Labeller,prim_crop,sec_crop,confidence,layer,Age,Density,Notes,geometry
0,1647,248,Jay,oil palm,,high,ortho_cid1301,,,,"MULTIPOLYGON (((-1.94505 6.31443, -1.94506 6.3..."
1,1647,248,Jay,oil palm,,high,ortho_cid1647,,,,"MULTIPOLYGON (((-1.94342 6.31437, -1.94348 6.3..."
2,1647,248,Jay,oil palm,,high,ortho_cid1647,,,,"MULTIPOLYGON (((-1.94599 6.31469, -1.94592 6.3..."
3,1647,248,Jay,oil palm,,high,ortho_cid1647,,,,"MULTIPOLYGON (((-1.94708 6.31357, -1.94711 6.3..."
4,1647,248,Jay,oil palm,,high,ortho_cid1647,,,,"MULTIPOLYGON (((-1.94174 6.31585, -1.94175 6.3..."


In [6]:
bbox.head()

Unnamed: 0,CID,SID,Labeller,Orthophoto,geometry
0,1647,248,Jay,cid_1647,"MULTIPOLYGON (((-1.94731 6.31356, -1.94742 6.3..."
1,1301,134,Jay,cid_1301,"MULTIPOLYGON (((-2.64725 7.06104, -2.63992 7.0..."
2,1828,384,Jay,cid_1828,"MULTIPOLYGON (((-0.94682 5.99601, -0.94019 5.9..."
3,1802,191,jay,cid_1802,"MULTIPOLYGON (((-0.93208 6.34128, -0.92516 6.3..."
4,1821,202,jay,cid_1821,"MULTIPOLYGON (((-0.74696 6.14088, -0.74332 6.1..."


In [9]:
def listdir_nohidden(path):
    return glob.glob(os.path.join(path, '*'))

In [11]:
file_list = listdir_nohidden(raw_path)
file_list[1:10]

['/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1248_sid54_cog.tif',
 '/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1248_sid73_cog.tif',
 '/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1301_sid134_cog.tif',
 '/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1301_sid170_cog.tif',
 '/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1301_sid171_cog.tif',
 '/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1325_sid98_cog.tif',
 '/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1325_sid250_cog.tif',
 '/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1325_sid295_cog.tif',
 '/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/raw_oil_palm/ortho_cid1344_sid37_cog.tif']

## Renaming and Copy the Raw Images

This section would rename and copy the raw images to a new directory without modifying the original raw images

In [19]:
bbox_new = bbox.copy()

for row in bbox_new.iterrows():
    temp_name = f"ortho_cid{row[1]['CID']}_sid{row[1]['SID']}_cog.tif"
    bbox_new.at[row[0], "Orthophoto"] = temp_name

bbox_new = bbox_new.rename(columns={'Orthophoto': 'file_name'})
bbox_new.head()


Unnamed: 0,CID,SID,Labeller,file_name,geometry
0,1647,248,Jay,ortho_cid1647_sid248_cog.tif,"MULTIPOLYGON (((-1.94731 6.31356, -1.94742 6.3..."
1,1301,134,Jay,ortho_cid1301_sid134_cog.tif,"MULTIPOLYGON (((-2.64725 7.06104, -2.63992 7.0..."
2,1828,384,Jay,ortho_cid1828_sid384_cog.tif,"MULTIPOLYGON (((-0.94682 5.99601, -0.94019 5.9..."
3,1802,191,jay,ortho_cid1802_sid191_cog.tif,"MULTIPOLYGON (((-0.93208 6.34128, -0.92516 6.3..."
4,1821,202,jay,ortho_cid1821_sid202_cog.tif,"MULTIPOLYGON (((-0.74696 6.14088, -0.74332 6.1..."


In [21]:
bbox_new.to_file("/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/labels_oil_palm/ortho_labeled_areas_v1_1.geojson", driver="GeoJSON")

In [20]:
labels_new = labels.copy()

for row in labels_new.iterrows():
    temp_name = f"ortho_cid{row[1]['CID']}_sid{row[1]['SID']}_cog.tif"
    labels_new.at[row[0], "layer"] = temp_name

labels_new = labels_new.rename(columns={'layer': 'file_name'})
labels_new.head()

Unnamed: 0,CID,SID,Labeller,prim_crop,sec_crop,confidence,file_name,Age,Density,Notes,geometry
0,1647,248,Jay,oil palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94505 6.31443, -1.94506 6.3..."
1,1647,248,Jay,oil palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94342 6.31437, -1.94348 6.3..."
2,1647,248,Jay,oil palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94599 6.31469, -1.94592 6.3..."
3,1647,248,Jay,oil palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94708 6.31357, -1.94711 6.3..."
4,1647,248,Jay,oil palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94174 6.31585, -1.94175 6.3..."


In [22]:
labels_new["prim_crop"] = labels_new["prim_crop"].apply(lambda x: "oil_palm" if x == "oil palm" else "other")
labels_new.head()

Unnamed: 0,CID,SID,Labeller,prim_crop,sec_crop,confidence,file_name,Age,Density,Notes,geometry
0,1647,248,Jay,oil_palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94505 6.31443, -1.94506 6.3..."
1,1647,248,Jay,oil_palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94342 6.31437, -1.94348 6.3..."
2,1647,248,Jay,oil_palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94599 6.31469, -1.94592 6.3..."
3,1647,248,Jay,oil_palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94708 6.31357, -1.94711 6.3..."
4,1647,248,Jay,oil_palm,,high,ortho_cid1647_sid248_cog.tif,,,,"MULTIPOLYGON (((-1.94174 6.31585, -1.94175 6.3..."


In [23]:
labels_new.to_file("/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/labels_oil_palm/oil_palm_labels_v1_1.geojson", driver="GeoJSON")

In [None]:
# Create a list of all the files in the raw directory
file_list = os.listdir(raw_path)

# Loop through the list of files and copy them to the processed directory
for file in file_list:
  ori_path = os.path.join(raw_path, file)
  if not bbox[bbox.ortho == file]["name"].empty:
    new_name = bbox[bbox.ortho == file]["name"].values[0] + ".tif"
    grid_path = os.path.join(processed_path, new_name)
    if not os.path.exists(grid_path):
        shutil.copy(ori_path, grid_path)

## Generating new catalogs

In [None]:
bbox_new = bbox.copy()
bbox_new["ortho"] = bbox_new["name"].apply(lambda x: x + ".tif")
bbox_new = bbox_new.rename(columns={'name': 'grid_name', 'ortho': 'file_name'})
bbox_new.head()

In [None]:
labels_new = labels.copy()
labels_new["ortho"] = labels_new["name"].apply(lambda x: x + ".tif")
labels_new = labels_new.rename(columns={'name': 'grid_name', 'ortho': 'file_name'})
labels_new.head()

In [None]:
labels_new.to_file("/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/labels/class2_all_fix_v1_2.geojson", driver="GeoJSON")

In [None]:
bbox_new.to_file("/Volumes/sTeeeve/GIS_Data/Ghana_Drone_Images/labels/ortho_exts_maingrid_rectified_v1_1.geojson", driver="GeoJSON")