# README
In this notebook, I will be looking at drought indicators at the watershed level in Baja Calidornia 
Sur. The inidcators evaluated are precipitation, land surface temperature, and ground water deviation
from basline (using GRACE data, assumes a baseline of 2004-2009). The time period evaluated for 
these datasets is 2013 - 2023.

For the purpose of this analysis, I will look at these indicators on an annual basis (taking a mean across months). Future work may entail digging into the variance within each year for these indicators.

# Imports

In [1]:
import ee

from gee_water.utils import (
    annual_agg_ic
)

import pandas as pd
import numpy as np
import plotly.express as px
import json


In [2]:
ee.Initialize()

# Functions

## Analysis

In [3]:
def reduce_to_basin_means_annual(image, basins):
    # sum of precipitation over each basin.
    # adjust 'scale' based on your dataset's native resolution
    fc = image.reduceRegions(
        collection = basins,
        reducer    = ee.Reducer.mean(),
        scale      = 10000  # adjust as appropriate
    )
    return fc.map(lambda f: f.set({'year': image.get('year')}))

In [4]:
def get_slope(group, group_col_name, analysis_col_name):
    """
    Given a subset of df containing one HYBAS_ID across multiple years,
    compute the linear slope of precip_sum vs. year using polyfit.
    """
    x = group[group_col_name].values
    y = group[analysis_col_name].values

    # polyfit(x, y, 1) -> returns [slope, intercept]
    slope, intercept = np.polyfit(x, y, 1)
    return slope

## Plotting

In [5]:
def create_chloropleth_map(
    gdf,
    color_by_value,
    color_continuous_scale=[
            (0.00, "blue"),   # 0% of the range -> blue
            (0.50, "white"),  # 50% of the range -> white (mid)
            (1.00, "red")     # 100% of the range -> red
        ],
    color_continuous_midpoint=0):
  # 1) Convert the GeoDataFrame to a GeoJSON dictionary
  geojson_data = json.loads(gdf.to_json())

  # 2) Create a choropleth mapbox
  fig = px.choropleth_mapbox(
      data_frame = gdf,
      geojson    = geojson_data,
      locations  = "HYBAS_ID",                 # column in gdf to match with geojson features
      featureidkey = "properties.HYBAS_ID",    # feature property in geojson_data
      color      = color_by_value,                # which column to color by
      hover_data = ["HYBAS_ID", "DIST_SINK", "SUB_AREA"],  # whatever extra info you want on hover
      center     = {"lat": 25, "lon": -112},   # pick a center for Baja Sur
      mapbox_style = "carto-positron",
      zoom       = 5,                          # adjust zoom level
      opacity    = 0.5,                         # polygon transparency
      color_continuous_scale = color_continuous_scale,
        color_continuous_midpoint = color_continuous_midpoint
  )

  # 3) Add text labels (HYBAS_ID) by plotting the centroid of each basin
  #    We'll create a separate scatter_mapbox and then merge it into the main figure.
  gdf["centroid_lon"] = gdf.geometry.centroid.x
  gdf["centroid_lat"] = gdf.geometry.centroid.y

  fig_scatter = px.scatter_mapbox(
      data_frame = gdf,
      lat        = "centroid_lat",
      lon        = "centroid_lon",
      text       = "HYBAS_ID",                 # show the basin ID as text
      hover_name = "HYBAS_ID",                 # also show on hover
      hover_data = ["DIST_MAIN", "DIST_SINK", "SUB_AREA"],
      center     = {"lat": 25, "lon": -112},
      zoom       = 5
  )

  # 4) Merge the scatter traces into the choropleth figure
  for trace in fig_scatter.data:
      fig.add_trace(trace)

  # 5) Final layout tweaks
  fig.update_layout(
      mapbox_style = "carto-positron",
      mapbox_zoom  = 5,
      margin       = {"r":0, "t":0, "l":0, "b":0}
  )

  fig.show()


# Global Variables

In [6]:
# For this particular blog, I am analyzing multiple datasets from a handful of different 
# satellites. The year 2013 is far enough back historically to capture form trends, but still 
# allows for higih quality data to be gathred for all my metrics of interest.
start_date = '2013-01-01'
end_date   = '2023-12-31'

# Define start and end year from your date strings
start_year = int(start_date[:4])
end_year   = int(end_date[:4])

In [7]:
# get watershed basin boundaries at desired level of granularity (12 is highest granularity, 1 is the most coarse)
BASINS_LEVEL9_ID = 'WWF/HydroSHEDS/v1/Basins/hybas_9'
BASINS_LEVEL8_ID = 'WWF/HydroSHEDS/v1/Basins/hybas_8'
BASINS_LEVEL7_ID = 'WWF/HydroSHEDS/v1/Basins/hybas_7' 

# geographical regional boundary dataset (country, state, etc.)
BOUNDARIES_ID = 'FAO/GAUL/2015/level1' 

# Pull Datasets

In [8]:
baja_sur = ee.FeatureCollection(BOUNDARIES_ID).filter(
    'ADM0_NAME == "Mexico" && '
    'ADM1_NAME == "Baja California Sur"'
)

In [9]:
# filter watershed basins to only those in Baja California Sur
basins_level9 = ee.FeatureCollection(BASINS_LEVEL9_ID)
basins_level8 = ee.FeatureCollection(BASINS_LEVEL8_ID)
basins_level7 = ee.FeatureCollection(BASINS_LEVEL7_ID)

In [10]:
# filter basins to baja california sur boundary
baja_basins_level9 = basins_level9.filterBounds(baja_sur)
baja_basins_level8 = basins_level8.filterBounds(baja_sur)
baja_basins_level7 = basins_level7.filterBounds(baja_sur)

In [11]:
# confirm CRS system
baja_basins_level7.first().geometry().projection().getInfo()

{'type': 'Projection', 'crs': 'EPSG:4326', 'transform': [1, 0, 0, 0, 1, 0]}

## Create Basin GeoDataFrames

In [70]:
def get_basin_geodataframe(basin_level_id):
    baja_basins= ee.FeatureCollection(basin_level_id).filterBounds(baja_sur)
    
    baja_basins_gdf = ee.data.computeFeatures({
        'expression': baja_basins,
        'fileFormat': 'GEOPANDAS_GEODATAFRAME'
    })

    # set CRS
    baja_basins_gdf.crs = 'EPSG:4326'
    return baja_basins_gdf




# Precipitation

In [57]:
precip = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY').select('precipitation').filterDate(start_date, end_date).filterBounds(baja_sur)

In [58]:
# build annual precipitation sums image collection
annual_chrips_precip_ic = annual_agg_ic(image_collection=precip, agg_type='sum', start_year=start_year, end_year=end_year)

In [59]:
# reduce images to basin level image collection (of feature collections)
basin_reduced_col_precip = annual_chrips_precip_ic.map(
    lambda image: reduce_to_basin_means_annual(image, basins=baja_basins_level9)
)

In [60]:
# flatten into a single FeatureCollection
all_features_chirps = basin_reduced_col_precip.flatten()

In [61]:
def get_annual_pdf(
        feature_collection: ee.featurecollection.FeatureCollection,
        agg_type: str,
        output_col_name: str
):
    features_dict = feature_collection.getInfo()
    records = []

    for f in features_dict['features']:
        props = f['properties']
        # HYBAS is the basin ID as outlined by the WWF basin dataset
        basin_id = props.get('HYBAS_ID') # or f['id']  # fallback to feature id if needed

        # The sum of precipitation is stored under 'sum' by default with ee.Reducer.sum()
        aggregate = props.get(agg_type)

        # We'll store year/month as integers, then create a date from them
        year  = int(props['year'])

        records.append([basin_id, year, aggregate])
    
    pdf = pd.DataFrame(records, columns=['HYBAS_ID', 'year', output_col_name])

    return pdf

In [62]:
precip_df = get_annual_pdf(feature_collection=all_features_chirps, agg_type='mean', output_col_name='annual_precip_mm')

In [63]:
mm_to_inches = 0.0393701
precip_df['annual_precip_in'] = precip_df['annual_precip_mm'] * mm_to_inches

## Create Precip Trend Map

In [71]:
precip_slope_df = (
    precip_df.sort_values(['HYBAS_ID', 'year'])
    .groupby('HYBAS_ID', as_index=False)
    .apply(lambda group: get_slope(group, group_col_name='year', analysis_col_name='annual_precip_mm'))  # Use a lambda function to pass the group to get_slope
    .rename(columns={None: 'slope'})
)





In [73]:
baja_basins_gdf_level9 = get_basin_geodataframe(BASINS_LEVEL9_ID)

In [75]:
# merge basin gdf with trend data for plotting
baja_basins_gdf_level9_with_precip = baja_basins_gdf_level9.merge(precip_slope_df, on='HYBAS_ID').rename(columns={'slope': 'average_chirps_precip_mm_per_year'})

In [78]:
create_chloropleth_map(
    gdf=baja_basins_gdf_level9_with_precip,
    color_by_value='average_chirps_precip_mm_per_year',
    color_continuous_scale=[
            (0.00, "red"),   # 0% of the range -> blue
            (0.50, "white"),  # 50% of the range -> white (mid)
            (1.00, "blue")     # 100% of the range -> red
        ],
    color_continuous_midpoint=0)


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




## Determine if trend is statistically significant (?)