# Updating Databases

In [2]:
import geopandas as gpd
from pystac_client import Client
from shapely.geometry import shape
import rioxarray as rxr
from pathlib import Path
import numpy as np
import xarray as xr
from geocube.api.core import make_geocube
from sqlalchemy import text, create_engine
import psycopg
import pandas as pd

## Loading Enschede Boundary

In [3]:
enschede = gpd.read_file('../../vector/data/enschede_boundary.gpkg')
enschede_4326 = enschede.to_crs('EPSG:4326')

## Connecting to Element 84's Earth Search API

In [5]:
client = Client.open('https://earth-search.aws.element84.com/v1')

## Setting the Yearly Range

In [10]:
years = ['2024', '2019']
years.sort()

enschede_clip = enschede_4326.copy()

## Creating Yearly Median Composite Images

In [26]:
years = ['2024', '2019']

for year in years:
        
    search = client.search(
        collections=['sentinel-2-l2a'],
        intersects=enschede_4326['geometry'].iloc[0],
        datetime=f'{year}',
        query=['eo:cloud_cover<5']
    )
    
    items = search.item_collection()
    
    total_images = gpd.GeoDataFrame(
        [
            {
                'item': item,
                'geometry': shape(item.geometry),
            }
            for item in items
        ],
        crs='EPSG:4326'
    )
    
    enschede_images = total_images[total_images['geometry'].covers(enschede_4326['geometry'].iloc[0])]
    
    for item in enschede_images['item']:
            
        assets = item.assets
        
        red = rxr.open_rasterio(assets['red'].href, masked=True).squeeze()
        
        if enschede_clip.crs != red.rio.crs:
            enschede_clip = enschede_clip.to_crs(red.rio.crs)
            
        red = red.rio.clip(enschede_clip.geometry.values, enschede_clip.crs, drop=True)
        
        nir = rxr.open_rasterio(assets['nir'].href, masked=True).squeeze()
        nir = nir.rio.clip(enschede_clip.geometry.values, enschede_clip.crs, drop=True)
        
        ndvi = (nir - red) / (nir + red)
    
        ndvi = ndvi.where(np.isfinite(ndvi), -9999)
        ndvi = ndvi.rio.write_nodata(-9999)
    
        inputs_folder = Path(f'../data/{year}/inputs')
        input_file_name = f'ndvi_{item.id}.tif'
        input_file = Path(inputs_folder/input_file_name)
        input_file.parent.mkdir(parents=True, exist_ok=True)
        
        if input_file.exists():
            input_file.unlink()
        
        ndvi.rio.to_raster(input_file, driver='GTiff')
        print(f'Exported {item.id}')
    
    rasters = [
        rxr.open_rasterio(file, masked=True)
        for file in inputs_folder.iterdir()
        if file.is_file()
    ]
    
    rasters_concat = xr.concat(rasters, dim='time')
    composite = rasters_concat.median(dim='time')
    
    composite = composite.where(np.isfinite(composite), -9999)
    composite = composite.rio.write_nodata(-9999)
    
    composite_folder = Path(f'../data/{year}/composite')
    composite_file_name = f'ndvi_composite_{year}.tif'
    composite_file = Path(composite_folder/composite_file_name)
    composite_file.parent.mkdir(parents=True, exist_ok=True)
    
    if composite_file.exists():
        composite_file.unlink()
    
    composite.rio.to_raster(composite_file, driver='GTiff')
    print(f'Exported {composite_file_name}')

Exported S2B_32ULC_20240720_0_L2A
Exported S2A_32ULC_20240625_0_L2A
Exported S2A_32ULC_20240127_0_L2A
Exported ndvi_composite_2024.tif
Exported S2B_32ULC_20190826_0_L2A
Exported S2B_32ULC_20190826_1_L2A
Exported S2B_32ULC_20190627_0_L2A
Exported S2B_32ULC_20190627_1_L2A
Exported S2A_32ULC_20190513_1_L2A
Exported S2A_32ULC_20190513_0_L2A
Exported S2B_32ULC_20190329_0_L2A
Exported S2B_32ULC_20190329_1_L2A
Exported S2B_32ULC_20190227_0_L2A
Exported ndvi_composite_2019.tif


## Updating Database Values

In [160]:
if not years:
    raise ValueError('years list is empty')

init_year = years[0]
baseline_df = pd.read_csv(f'../data/{init_year}/stats/zonal_stats_{init_year}.csv')

value_columns = [
    column
    for column in baseline_df.columns
    if column != 'sn'
]

for year in years[1:]:
    comparision_df = pd.read_csv(f'../data/{year}/stats/zonal_stats_{year}.csv')

    merged_df = baseline_df.merge(
        comparision_df,
        on='sn',
        suffixes=('_base', '_new')
    )

    for column in value_columns:
        baseline_df[column] = merged_df[f'{column}_new'].where(
            merged_df[f'{column}_new'] != merged_df[f'{column}_base'],
            merged_df[f'{column}_base']
        )

latest_folder = Path('../data/latest')
latest_file = 'latest.csv'
latest_file_path = Path(latest_folder/ latest_file)
latest_file_path.parent.mkdir(parents=True, exist_ok=True)

baseline_df.to_csv(latest_file_path, index=False)
print(f'Exported {latest_file}')

Exported latest.csv


## Calculating Zonal Statistics for Enschede Districts
### Connecting to Postgres

In [None]:
engine = create_engine(
    'postgresql+psycopg://postgres:postgres@localhost/postgres'
)

### Importing the Enschede Districts GeoDataFrame

In [34]:
districts = gpd.read_file(f'../../vector/data/enschede_districts.gpkg')

districts.to_postgis(
    'districts',
    engine,
    if_exists='replace',
    index=False
)

### Importing the Composites

In [4]:
#for year in years:
#    f"!raster2pgsql \
#    -I \
#    -C \
#    -M \
#    -t 256x256 \
#    ../data/{year}/composite/ndvi_composite_2019.tif public.ndvi_{year} \
#    | psql -U postgres -d postgres"

### Creating Tile Layers and Spatial Indices

In [24]:
for year in years:
    with engine.begin() as conn:
        conn.execute(
            text(
                f"""
                DROP TABLE IF EXISTS ndvi_tiles_{year};
    
                CREATE TABLE ndvi_tiles_{year} AS
                SELECT rid,
                ST_Tile(rast, 256, 256) AS rast
                FROM ndvi_{year};
    
                CREATE INDEX IF NOT EXISTS idx
                ON ndvi_tiles_{year}
                USING GIST(ST_ConvexHull(rast));
                """
            )
        )

### Exporting the Zonal Statistics

In [94]:
for year in years:
    
    with engine.begin() as conn:
        conn.execute(
            text(
                f"""
                DROP TABLE IF EXISTS ndvi_stats_{year};
                
                CREATE TABLE ndvi_stats_{year} AS
                
                SELECT d.district_code,
                d.geometry,
                ROUND((j.stats).min::numeric, 2) as min_ndvi,
                ROUND((j.stats).max::numeric, 2) as max_ndvi,
                ROUND((j.stats).mean::numeric, 2) as mean_ndvi
                
                FROM districts as d
                JOIN LATERAL (
                
                    SELECT ST_SummaryStatsAgg(ST_Clip(
                        t.rast,
                        d.geometry,
                        true),
                        1,
                        true
                    ) AS stats
                    FROM ndvi_tiles_2019 AS t
                    WHERE t.rast && d.geometry AND ST_Intersects(t.rast, d.geometry)
                    
                ) AS j
                
                ON true;
                """            
            )
        )

    stats = gpd.read_postgis(
        f"""
        SELECT *
        FROM ndvi_stats_{year};
        """,
        engine,
        crs='EPSG:28992',
        geom_col='geometry'
    )
    
    stats = stats.to_crs('EPSG:4326')
    
    stats_folder = Path(f'../data/{year}/stats')
    stats_file = Path(f'zonal_stats_{year}.geojson')
    stats_path = Path(stats_folder/ stats_file)
    stats_path.parent.mkdir(parents=True, exist_ok=True)
    
    stats.to_file(stats_path, driver='GeoJSON')
    
    print(f'Exported {stats_file}')

Exported zonal_stats_2019.geojson
Exported zonal_stats_2024.geojson


## Creating the Latest NDVI Stats DB

In [None]:
if not years:
    raise ValueError("years list is empty")

init_year = years[0]

with engine.begin() as conn:
    conn.execute(text(f"""
        DROP TABLE IF EXISTS ndvi_stats_latest;
        CREATE TABLE ndvi_stats_latest AS
        SELECT *
        FROM ndvi_stats_{init_year};
    """))

    for year in years[1:]:
        conn.execute(text(f"""
            UPDATE ndvi_stats_latest AS b
            SET
                min_ndvi = CASE
                    WHEN c.min_ndvi <> b.min_ndvi THEN c.min_ndvi
                    ELSE b.min_ndvi
                END,
                max_ndvi = CASE
                    WHEN c.max_ndvi <> b.max_ndvi THEN c.max_ndvi
                    ELSE b.max_ndvi
                END,
                mean_ndvi = CASE
                    WHEN c.mean_ndvi <> b.mean_ndvi THEN c.mean_ndvi
                    ELSE b.mean_ndvi
                END
            FROM ndvi_stats_{year} AS c
            WHERE b.district_code = c.district_code;
        """))