In [7]:
from src import pipeline
from src import composite_analysis
from src import classification
import os
import glob
import rasterio
import pandas as pd
import numpy as np
from importlib import reload
import matplotlib.pyplot as plt
import geopandas as gpd
import contextily as ctx
from shapely.geometry import box
from project_config import get_config

root = '.'

data_folder = pipeline.make_new_dir(root, 'data')
raw_folder = pipeline.make_new_dir(root, 'data/raw')
clipped_folder = pipeline.make_new_dir(root, 'data/clipped')
stacked_folder = pipeline.make_new_dir(root, 'data/stacked')
derived_folder = pipeline.make_new_dir(root, 'data/derived')
classified_folder = pipeline.make_new_dir(root, 'output/classified')
rois_folder = pipeline.make_new_dir(root, 'output/rois')
rois_initial_analysis_dir = pipeline.make_new_dir(rois_folder, 'initial_analysis')
class_train_rois_folder = pipeline.make_new_dir(rois_folder, 'classification_training_rois')
tests_folder = pipeline.make_new_dir(root, 'tests')
envi_folder = pipeline.make_new_dir(data_folder, 'envi')

cfg = get_config()
band_map = cfg.band_map.to_dict()
bands_to_keep = cfg.bands.to_dict()
composites = cfg.composites.get_all()

## Obtain the study area bounding box
To ensure perfect consistency with ENVI, ENVI was used to clip a raw image and the bounding box and CRS was obtained programmatically from the result

In [8]:
envi_clipped = envi_folder + r'/new_clipped_RGB.img'

reload(pipeline)
reload(classification)

replace_existing = False
extract_raw_files = True
stack_files = True
classify_files = True

if extract_raw_files:
    ### List the dates for which raw data exists
    raw_data_dates = [pipeline.extract_scene_date(x) for x in os.listdir(raw_folder)]

    print(f'Total number of raw files: {len(raw_data_dates)}')
    years = sorted(set(d[:4] for d in raw_data_dates))

    grouped = {y: [d for d in raw_data_dates if d.startswith(y)] for y in years}

    max_len = max(len(v) for v in grouped.values())
    for y in years:
        grouped[y] += [np.nan] * (max_len - len(grouped[y]))

    df = pd.DataFrame(grouped)
    display(df)

    with rasterio.open(envi_clipped) as src:
        study_area_bbox, study_area_crs = src.bounds, src.crs
    bbox_geom = box(*study_area_bbox)
    gdf = gpd.GeoDataFrame({'geometry': [bbox_geom]}, crs=study_area_crs)
    files_clipped = pipeline.clip_raw_scenes_to_study_area(raw_folder, clipped_folder, study_area_bbox, bands_to_keep, replace_existing)
    print(f'Number of raw data files clipped: {files_clipped}')


## Take the clipped bands and stack them
if stack_files:
    files_stacked = pipeline.stack_all_bands_in_dir(clipped_folder, stacked_folder, bands_to_keep, replace_existing=replace_existing)
print(f'Files stacked: {files_stacked}')

## Create classified tif files from stacked images
if classify_files:
    images_classified = classification.train_and_classify(roi_sources = classification.roi_sources,                             
                       roi_folder_path = class_train_rois_folder,
                       img_folder_path = stacked_folder,
                       classified_folder_path = classified_folder,
                       cloud_masking = True,
                       plot = False,
                       replace_existing=True)
print(f'Images Classified: {images_classified}')

## Generate composite images from stacks
composites_created = pipeline.build_composites_from_stacks(stacked_folder, derived_folder, composites, band_map, replace_existing=replace_existing)
print(f'Composites generated: {composites_created}')


Total number of raw files: 63


Unnamed: 0,2013,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025
0,20131017.0,20160127.0,20170505.0,20180508.0,20190204.0,20200122.0,20210108.0,20220127.0,20230114,20240202.0,20250220.0
1,,20160212.0,20171012.0,20180609.0,20190324.0,20200310.0,20210313.0,20220503.0,20230319,20240422.0,20250527.0
2,,20160502.0,20171129.0,20180727.0,20190527.0,20200411.0,20210430.0,20220706.0,20230506,20240727.0,20250316.0
3,,20160603.0,20171231.0,20180913.0,20190612.0,20200513.0,20210617.0,20220807.0,20231130,20240929.0,20250722.0
4,,20161110.0,,20181031.0,20190714.0,20201207.0,20210804.0,20221026.0,20231216,20241015.0,
5,,,,,20190916.0,,20211023.0,20221127.0,20230223,20241218.0,
6,,,,,20191205.0,,20211124.0,20220612.0,20230412,20240226.0,
7,,,,,,,20211210.0,,20230530,,
8,,,,,,,,,20230701,,
9,,,,,,,,,20231005,,


Number of raw data files clipped: 252
Files stacked: 28
Images Classified: 63


  # We rely on the fact that band_names were set previously in the stack
  swir = bands[band_to_index[band_map[sensor]['SWIR1']] - 1]
  bands, band_names, band_to_index, profile = get_bands_from_stack(stack_path)
  band_to_index = {name: i + 1 for i, name in enumerate(band_names)}


Composites generated: 336
