### Creating masks from geojson files

In [None]:
#Defining function that creates masks from geojson files - 1=building, 0=no building
def geojson_to_mask(geojson_path, image_path, output_mask_path=None):
    # Load the reference image (to get metadata)
    with rasterio.open(image_path) as src:
        img_shape = (src.height, src.width)
        img_transform = src.transform
        img_crs = src.crs

    # Load and reproject geojson to match image CRS
    gdf = gpd.read_file(geojson_path)
    if gdf.crs != img_crs:
        gdf = gdf.to_crs(img_crs)

    # Rasterize polygons into a binary mask
    mask = rasterize(
        [(geom, 1) for geom in gdf.geometry if geom.is_valid],
        out_shape=img_shape,
        transform=img_transform,
        fill=0,
        dtype='uint8'
    )

    # Optionally save the mask
    if output_mask_path:
        with rasterio.open(
            output_mask_path,
            'w',
            driver='GTiff',
            height=mask.shape[0],
            width=mask.shape[1],
            count=1,
            dtype='uint8',
            crs=img_crs,
            transform=img_transform,
        ) as dst:
            dst.write(mask, 1)

    return mask


In [None]:
geojson_dir = "AWS_files/AOI_3_Paris_Train/geojson/buildings"
image_dir = "AWS_files/AOI_3_Paris_Train/RGB-PanSharpen"
output_dir = "AWS_files/masks"

os.makedirs(output_dir, exist_ok=True)

for fname in os.listdir(geojson_dir):
    if fname.endswith(".geojson"):
        stem = fname.replace(".geojson", "")
        stem = stem.replace("buildings", '')
        geojson_path = os.path.join(geojson_dir, fname)
        image_path = os.path.join(image_dir, f"RGB-PanSharpen{stem}.tif")
        output_path = os.path.join(output_dir, f"RGB-PanSharpen{stem}_mask.tif")
        
        mask = geojson_to_mask(geojson_path, image_path, output_path)
        print(f"Created mask for: {fname}")


### Converting .tif images to .png (fastai does not accept .tif)

In [None]:
def convert_tif_to_png(tif_path, png_path):
    with rasterio.open(tif_path) as src:
        img = src.read()  # shape: (bands, H, W)

        # Use first 3 bands (assume RGB), and normalize to 0–255
        if img.shape[0] >= 3:
            img = img[:3]  # take only RGB bands
        else:
            raise ValueError(f"Not enough bands in {tif_path.name}")

        # Normalize if dtype is not uint8
        if img.dtype != np.uint8:
            img = img.astype(np.float32)
            img = (255 * (img - img.min()) / (img.max() - img.min())).astype(np.uint8)

        img = np.transpose(img, (1, 2, 0))  # CHW to HWC
        Image.fromarray(img).save(png_path)

# Example usage:
src_dir = Path('AWS_files/AOI_3_Paris_Train/RGB-PanSharpen')           # .tif folder
dst_dir = Path('AWS_files/images_png')       # .png output folder
dst_dir.mkdir(exist_ok=True)

for tif_file in src_dir.glob("*.tif"):
    png_file = dst_dir / (tif_file.stem + ".png")
    try:
        convert_tif_to_png(tif_file, png_file)
        print(f"Converted: {tif_file.name}")
    except Exception as e:
        print(f"❌ Failed to convert {tif_file.name}: {e}")


### Converting test .tif files into .png

In [None]:
src_dir = Path('AWS_files/AOI_3_Paris_Test_Public/RGB-PanSharpen')           # .tif folder
dst_dir = Path('AWS_files/test_images_png')       # .png output folder
dst_dir.mkdir(exist_ok=True)

for tif_file in src_dir.glob("*.tif"):
    png_file = dst_dir / (tif_file.stem + ".png")
    try:
        convert_tif_to_png(tif_file, png_file)
        print(f"Converted: {tif_file.name}")
    except Exception as e:
        print(f"❌ Failed to convert {tif_file.name}: {e}")