In [11]:
import ee

ee.Initialize()

In [12]:
from dataclasses import dataclass
from pathlib import Path
# set some globals
DATA_DIR = Path("/home/rhamilton/code/cnwi/test_data/aoi_NS/data")

# set the Data for the images
@dataclass
class Payload:
    """Payload for the datacube
    s1: Sentinel-1
    dc: Data cube Composites
    al: ALOS
    ft: Fourier Transform
    ta: Terrain Analysis
    """
    s1 = [
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190601T220203_20190601T220228_027492_031A28_EB74",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190601T220228_20190601T220253_027492_031A28_1D62",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190601T220253_20190601T220318_027492_031A28_B0EC",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190601T220318_20190601T220343_027492_031A28_3A0C",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190731T220207_20190731T220232_028367_0334A1_0ECA",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190731T220232_20190731T220257_028367_0334A1_32FF",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190731T220257_20190731T220322_028367_0334A1_4F99",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190731T220322_20190731T220347_028367_0334A1_7758",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190606T221027_20190606T221052_027565_031C53_C63B",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190606T221052_20190606T221117_027565_031C53_1088",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190606T221117_20190606T221142_027565_031C53_7FD5",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190606T221142_20190606T221207_027565_031C53_E704",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190805T221031_20190805T221056_028440_0336C5_08AD",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190805T221056_20190805T221121_028440_0336C5_8F55",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190805T221121_20190805T221146_028440_0336C5_322A",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190805T221146_20190805T221211_028440_0336C5_0053",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190527T215404_20190527T215429_027419_0317D3_FF9D",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190527T215429_20190527T215454_027419_0317D3_48EB",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190527T215454_20190527T215519_027419_0317D3_1009",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190527T215519_20190527T215544_027419_0317D3_501E",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190726T215408_20190726T215433_028294_033253_09E6",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190726T215433_20190726T215458_028294_033253_92C9",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190726T215458_20190726T215523_028294_033253_8B6E",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190726T215523_20190726T215548_028294_033253_7A46",
        "COPERNICUS/S1_GRD/S1B_IW_GRDH_1SDV_20190605T221811_20190605T221836_016567_01F302_2750",
        "COPERNICUS/S1_GRD/S1B_IW_GRDH_1SDV_20190605T221836_20190605T221901_016567_01F302_AFE0",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190729T221905_20190729T221934_028338_0333B8_EEBB",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190729T221934_20190729T221959_028338_0333B8_1E30",
        "COPERNICUS/S1_GRD/S1A_IW_GRDH_1SDV_20190729T221959_20190729T222024_028338_0333B8_BD9F",
    ]
    dc = "projects/fpca-336015/assets/cnwi-datasets/aoi_novascotia/datacube"
    ft = "projects/fpca-336015/assets/NovaScotia/fourier_transform"
    ta = "projects/fpca-336015/assets/NovaScotia/terrain_analysis"
    al = "JAXA/ALOS/PALSAR/YEARLY/SAR_EPOCH"



In [26]:
# Client side processing must be done first

In [27]:
# TODO should write out reference data to a csv file i.e code books for types etc
# This is the inital processing step for the features chain
# need to get the training data from the client
from cnwi.cnwilib.data import *
from cnwi.cnwilib.features import insert_values_into_features

shp_files = get_shapefile_paths(DATA_DIR)
manifest = create_raw_data_manifest(shp_files)

# save the manifest
if not Path(DATA_DIR.parent / 'manifest').exists():
    Path(DATA_DIR.parent / 'manifest').mkdir()

manifest.to_csv(DATA_DIR.parent / 'manifest' / 'manifest.csv')

# process the manifest
gdf_all = process_data_manifest(manifest) # creates a geodataframe for all regions
if gdf_all.crs != 'EPSG:4326':
    gdf_all.to_crs('EPSG:4326', inplace=True)
# TODO need to insert a value column for the training data
# create the lookup for all regions
lookup = create_lookup_table(gdf_all)
gdf_all = insert_values_into_features(gdf_all, lookup)
print(gdf_all.head())
# save the lookup
if not Path(DATA_DIR.parent / 'reference').exists():
    Path(DATA_DIR.parent / 'reference').mkdir()

lookup.to_csv(DATA_DIR.parent / 'reference' / 'lookup.csv')

# save all the data
if not Path(DATA_DIR / 'processed').exists():
    Path(DATA_DIR / 'processed').mkdir()
gdf_all.to_file(DATA_DIR / 'processed' / 'all_regions.shp')


  class_name                    geometry  type region_id  value
0      Swamp  POINT (-62.48171 45.03326)   2.0       127      1
1      Swamp  POINT (-62.54104 45.01055)   2.0       127      1
2      Swamp  POINT (-63.36111 44.89482)   2.0       127      1
3      Swamp  POINT (-63.51164 44.85164)   2.0       127      1
4      Swamp  POINT (-63.65494 44.84893)   2.0       127      1


In [28]:
# need to process the files further
from typing import Generator, Tuple
def compute_by_region(manifest, gdf_all) -> Generator[Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame], None, None]:
    """Compute by region
    """
    for _, group in manifest.groupby('region_id'):
        # get the region id
        region = gpd.read_file(group[group["type"] == 3]["file_path"].values[0])
        if region.crs != "EPSG:4326":
            region.to_crs(epsg=4326, inplace=True)
        # now get the training data
        training = gdf_all[gdf_all["region_id"] == _ ]
        yield (region, training[['value', 'type', 'geometry']])
    

In [29]:
region, gdf = next(compute_by_region(manifest, gdf_all))

In [30]:
# I want to maintain the index 
# the probelm im going to run into is putting the subseted data back together

Unnamed: 0,value,type,geometry
1348,1,2.0,POINT (-64.20332 45.87357)
1349,1,2.0,POINT (-63.00702 45.70105)
1350,1,2.0,POINT (-64.13768 46.10270)
1351,1,2.0,POINT (-63.64956 45.80755)
1352,1,2.0,POINT (-63.03562 45.70061)


In [None]:
# set up the data for the images
class ImageCollectionProc:
    def __init__(self, arg: str | list[str])  -> None:
        self.collection = arg
    
    @property
    def collection(self) -> ee.ImageCollection:
        return self._collection
    
    @collection.setter
    def collection(self, arg: str | list[str]) -> None:
        self._collection = ee.ImageCollection(arg)
    
    def run(self, aoi, start: str = None, end: str = None) -> ee.ImageCollection:
        """Run the image collection processing
        Args:
            aoi: Area of Interest
            start: start date
            end: end date
        Returns:
            ee.ImageCollection
        """
        if start and end:
            return self.collection.filterBounds(aoi).filterDate(start, end)
        else:
            return self.collection.filterBounds(aoi)


# the data sets will be in a Image Collection for intial processing

# in SciOps the Images are more or less hand selected therefore minimal processing is needed at
# the image collection stage, left in an image collection b/c it is easier to isolate the images
# needed by region

# this is the inital processing step for the images in the chain
pyld = Payload()
dcc = ImageCollectionProc(pyld.dc).run()
s1c = ImageCollectionProc(pyld.s1).run()
alc = ImageCollectionProc(pyld.al).run()
tac = ImageCollectionProc(pyld.ta).run()
ftc = ImageCollectionProc(pyld.ft).run()

In [None]:
# Next we will need to build the data sets for the models, this is were a majoirty of the processing
# happens

from cnwi.cnwilib.image import ImageBuilder, ImageDirector, ImageStack

# create a stack object
stack = ImageStack() # storage Container for the images

# optical inputs
dc_bldt = ImageBuilder(dcc)
dc_dir = ImageDirector(dc_bldt).build_data_cube()
stack.add(dc_dir.builder.image)

# SAR inputs
s1_bldt = ImageBuilder(s1c)
s1_dir = ImageDirector(s1_bldt).build_sentinel_1()
stack.add(s1_dir.builder.image)

# AL inputs
al_bldt = ImageBuilder(alc)
al_dir = ImageDirector(al_bldt).build_alos()
stack.add(al_dir.builder.image)

# TA inputs
stack.add(tac.mosaic()) # processing done externally

# FT inputs
stack.add(ftc.mosaic()) # processing done externally
