In [3]:
from src import pipeline
from src import composite_analysis
from src import classification
import os
import glob
import rasterio
import pandas as pd
import numpy as np
from importlib import reload
import matplotlib.pyplot as plt
import geopandas as gpd
import contextily as ctx
from shapely.geometry import box
from project_config import get_config

root = '.'

data_folder = pipeline.make_new_dir(root, 'data')
raw_folder = pipeline.make_new_dir(root, 'data/raw')
clipped_folder = pipeline.make_new_dir(root, 'data/clipped')
stacked_folder = pipeline.make_new_dir(root, 'data/stacked')
derived_folder = pipeline.make_new_dir(root, 'data/derived')
classified_folder = pipeline.make_new_dir(root, 'output/classified')
rois_folder = pipeline.make_new_dir(root, 'output/rois')
rois_initial_analysis_dir = pipeline.make_new_dir(rois_folder, 'initial_analysis')
class_train_rois_folder = pipeline.make_new_dir(rois_folder, 'classification_training_rois')
tests_folder = pipeline.make_new_dir(root, 'tests')
envi_folder = pipeline.make_new_dir(data_folder, 'envi')

cfg = get_config()
band_map = cfg.band_map.to_dict()
bands_to_keep = cfg.bands.to_dict()
composites = cfg.composites.get_all()

## Obtain the study area bounding box
To ensure perfect consistency with ENVI, ENVI was used to clip a raw image and the bounding box and CRS was obtained programmatically from the result

In [6]:
envi_clipped = envi_folder + r'/new_clipped_RGB.img'

reload(pipeline)
reload(classification)

replace_existing = True
extract_raw_files = False
stack_files = False
classify_files = True

if extract_raw_files:
    ### List the dates for which raw data exists
    raw_data_dates = [pipeline.extract_scene_date(x) for x in os.listdir(raw_folder)]

    print(f'Total number of raw files: {len(raw_data_dates)}')
    years = sorted(set(d[:4] for d in raw_data_dates))

    grouped = {y: [d for d in raw_data_dates if d.startswith(y)] for y in years}

    max_len = max(len(v) for v in grouped.values())
    for y in years:
        grouped[y] += [np.nan] * (max_len - len(grouped[y]))

    df = pd.DataFrame(grouped)
    display(df)

    with rasterio.open(envi_clipped) as src:
        study_area_bbox, study_area_crs = src.bounds, src.crs
    bbox_geom = box(*study_area_bbox)
    gdf = gpd.GeoDataFrame({'geometry': [bbox_geom]}, crs=study_area_crs)
    files_clipped = pipeline.clip_raw_scenes_to_study_area(raw_folder, clipped_folder, study_area_bbox, bands_to_keep, replace_existing)
    print(f'Number of raw data files clipped: {files_clipped}')


## Take the clipped bands and stack them
files_stacked = pipeline.stack_all_bands_in_dir(clipped_folder, stacked_folder, bands_to_keep, stack_files)
print(f'Files stacked: {files_stacked}')

## Create classified tif files from stacked images
images_classified = classification.train_and_classify(roi_sources = classification.roi_sources,                             
                       roi_folder_path = class_train_rois_folder,
                       img_folder_path = stacked_folder,
                       classified_folder_path = classified_folder,
                       cloud_masking = True,
                       plot = False,
                       replace_existing=classify_files)
print(f'Images Classified: {images_classified}')

## Generate composite images from stacks
composites_created = pipeline.build_composites_from_stacks(stacked_folder, derived_folder, composites, band_map, replace_existing=replace_existing)
print(f'Composites generated: {composites_created}')


Files stacked: 0
Images Classified: 35


  (nir - red) / (nir + red)
  composite = (green - swir) / (green + swir)
  (swir - nir) / (swir + nir)
  composite = 2.5 * (nir - red) / (nir + 6*red - 7.5*blue + 1)


Composites generated: 420


'd'