# Get Elevation Gradient data from Copernicus Digital Elevation Model (DEM)
Elevation data was provided for the ground measures but not for the test and train datasets. This notebook pulls the southern and eastern elevation gradient for the ground measurements and the test and train grid cells and saves it into the data/static directory.

In [None]:
!pip install pystac_client
!pip install planetary_computer
!pip install rasterio
!pip install xarray-spatial

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import json
import pandas as pd
import numpy as np
import planetary_computer
import xarray
import time
from collections import defaultdict
from pystac_client import Client
import rasterio
from rasterio import windows
from rasterio import features
from rasterio import warp
from matplotlib import pyplot as plt

In [None]:
!pip freeze

### Import Base Data Files

In [None]:
data_dir = '/content/drive/MyDrive/snocast/train/data'

ground_measures_train = pd.read_csv(os.path.join(data_dir, 'static/ground_measures_train_features.csv'))
ground_measures_train.columns = ['station_id'] + list(ground_measures_train.columns[1:])
gm_melt_train = ground_measures_train.melt(id_vars=["station_id"],
                                            var_name="date",
                                            value_name="swe").dropna()
            

ground_measures_test = pd.read_csv('/content/drive/MyDrive/snocast/dev/ground_measures_test_features.csv')
ground_measures_test.columns = ['station_id'] + list(ground_measures_test.columns[1:])
gm_melt_test = ground_measures_test.melt(id_vars=["station_id"],
                           var_name="date",
                           value_name="swe").dropna()

ground_measures_metadata = pd.read_csv('/content/drive/MyDrive/snocast/dev/ground_measures_metadata.csv')
ground_measures_all = pd.concat([ground_measures_train, ground_measures_test], axis=1)


train_labels = pd.read_csv('/content/drive/MyDrive/snocast/dev/train_labels.csv')
labels_melt_train = train_labels.melt(id_vars=["cell_id"],
                  var_name="date",
                  value_name="swe").dropna()

submission_format = pd.read_csv('/content/drive/MyDrive/snocast/dev/submission_format.csv')
preds_melt_test = submission_format.melt(id_vars=["cell_id"],
                  var_name="date",
                  value_name="swe").dropna()

train_label_unique = pd.read_csv('/content/drive/MyDrive/snocast/dev/train_label_unique.csv')
test_pred_unique = pd.read_csv('/content/drive/MyDrive/snocast/dev/test_pred_unique.csv')

In [None]:
# get latitude longitude for train and test grids
f = open('/content/drive/MyDrive/snocast/dev/grid_cells.geojson')
grid_cells = json.load(f)
print('length grid_cells features: ', len(grid_cells['features']))

grid_features = defaultdict(dict)
for grid_cell in grid_cells['features']:
  cell_id = grid_cell['properties']['cell_id']
  coordinates = grid_cell['geometry']['coordinates'][0]
  region = grid_cell['properties']['region']
  grid_features[cell_id] = {'coordinates': coordinates[1:],
                            'region': region,
                            'geometry': grid_cell['geometry']}

grid_features_train = defaultdict(dict)
train_ids = []
train_lats = []
train_lons = []
train_regions = []
train_bboxes = []
grid_features_test = defaultdict(dict)
test_ids = []
test_lats = []
test_lons = []
test_regions = []
test_bboxes = []


for cell_id in train_labels['cell_id'].values:
  train_ids.append(cell_id)
  lon, lat = np.mean(grid_features[cell_id]['coordinates'], axis=0)
  northeast_corner = np.max(grid_features[cell_id]['coordinates'], axis=0)
  southwest_corner = np.min(grid_features[cell_id]['coordinates'], axis=0)
  # bbox = [min_lon, min_lat, max_lon, max_lat]
  bbox = np.concatenate([southwest_corner,northeast_corner])
  train_regions.append(grid_features[cell_id]['region'])
  train_lats.append(lat)
  train_lons.append(lon)
  train_bboxes.append(bbox)

  grid_features[cell_id]['dataset'] = 'train'

for cell_id in submission_format['cell_id'].values:
  test_ids.append(cell_id)
  lon, lat = np.mean(grid_features[cell_id]['coordinates'], axis=0)
  northeast_corner = np.max(grid_features[cell_id]['coordinates'], axis=0)
  southwest_corner = np.min(grid_features[cell_id]['coordinates'], axis=0)
  bbox = np.concatenate([southwest_corner,northeast_corner])
  test_regions.append(grid_features[cell_id]['region'])
  test_lats.append(lat)
  test_lons.append(lon)
  test_bboxes.append(bbox)

  if 'dataset' in grid_features[cell_id].keys():
    grid_features[cell_id]['dataset'] = 'both'
  else:
    grid_features[cell_id]['dataset'] = 'test'

for cell_id in grid_features:
  if grid_features[cell_id]['dataset'] in ('test','both'):
    grid_features_test[cell_id] = grid_features[cell_id]
  if grid_features[cell_id]['dataset'] in ('train','both'):
    grid_features_train[cell_id] = grid_features[cell_id]
print("test count: ", len(grid_features_test))
print("train count: ", len(grid_features_train))


train_lat_lon = pd.DataFrame({'cell_id': train_ids, 
                              'latitude': train_lats, 
                              'longitude': train_lons, 
                              'region': train_regions,
                              'bbox': train_bboxes})
test_lat_lon = pd.DataFrame({'cell_id': test_ids, 
                             'latitude': test_lats, 
                             'longitude': test_lons, 
                             'region': test_regions,
                             'bbox': test_bboxes})

## Get Data for Copernicus Digital Elevation Model (DEM)

In [None]:
def read_band(href, aoi):
    with rasterio.open(href) as ds:
        aoi_bounds = features.bounds(aoi)
        warped_aoi_bounds = warp.transform_bounds("epsg:4326", ds.crs, *aoi_bounds)
        aoi_window = windows.from_bounds(transform=ds.transform, *warped_aoi_bounds)
        try:
          data = ds.read(1, window=aoi_window)
        except:
          data = x = np.array([[0, 0],[0, 0]])
        return data

In [None]:
client = Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    ignore_conformance=True,
)

In [None]:
df = test_lat_lon

In [None]:
df.head()

In [None]:
aoi = grid_features['000ba8d9-d6d5-48da-84a2-1fa54951fae1']['geometry']
aoi

In [None]:
# Get all relevant items within the lat/lon bounds of the df
search = client.search(
    collections=["cop-dem-glo-30"],
    intersects=aoi,
)

items = list(search.get_items())
print(f"Returned {len(items)} items")

In [None]:
signed_asset = planetary_computer.sign(items[0].assets["data"])
elev_matrix = read_band(signed_asset.href, aoi)

In [None]:
plt.imshow(elev_matrix)
plt.colorbar()

In [None]:
southern_gradient = -1*np.diff(elev_matrix, axis=0)
plt.imshow(southern_gradient)
plt.colorbar()
print(southern_gradient.mean())

In [None]:
southern_gradient.shape

In [None]:
h, w = southern_gradient.shape
(southern_gradient < 0).sum()/(h*w)

In [None]:
eastern_gradient = np.diff(elev_matrix, axis=1)
plt.imshow(eastern_gradient)
plt.colorbar()
print(eastern_gradient.mean())

In [None]:
def get_elevations(df, gm=False):
  east_grads = []
  south_grads = []
  east_pcts = []
  south_pcts = []
  ids = []

  for idx, row in df.iterrows():
    if idx % 250 == 0:
      print(idx)
    if gm:
      cell_id = row['station_id']
      lat = row['latitude']
      lon = row['longitude']
      eps = 0.001
      aoi = {'coordinates': [[[lon-eps, lat-eps],
                              [lon-eps, lat+eps],
                              [lon+eps, lat+eps],
                              [lon+eps, lat-eps],
                              [lon-eps, lat-eps]]],
                            'type': 'Polygon'}
    else:
      cell_id = row['cell_id']
      aoi = grid_features[cell_id]['geometry']

    # Get all relevant items within the lat/lon bounds of the df
    search = client.search(
        collections=["cop-dem-glo-30"],
        intersects=aoi,
    )

    need_item = True
    num_tries = 0
    while need_item:
      try:
        items = list(search.get_items())
        need_item = False
      except:
        num_tries += 1
        print('exception')
        time.sleep(1)
        if num_tries > 3:
          need_item = False
          print('give up')
    
    if num_tries <= 3:
      loc_east_grads = []
      loc_south_grads = []
      loc_east_low = []
      loc_south_low = []
      loc_east_size = []
      loc_south_size = []
      for item in items:
        signed_asset = planetary_computer.sign(item.assets["data"])
        elev_matrix = read_band(signed_asset.href, aoi)
        eastern_grad = (np.diff(elev_matrix, axis=1))
        e_h, e_w = eastern_grad.shape
        east_low = (eastern_grad > 0).sum()
        east_size = e_h*e_w
        southern_grad = (-1*np.diff(elev_matrix, axis=0))
        s_h, s_w = southern_grad.shape
        south_low = (southern_grad > 0).sum()
        south_size = s_h*s_w
        loc_east_grads.append(eastern_grad.mean())
        loc_south_grads.append(southern_grad.mean())
        loc_east_low.append(east_low)
        loc_east_size.append(east_size)
        loc_south_low.append(south_low)
        loc_south_size.append(south_size)

      east_pct = np.sum(loc_east_low)/np.sum(loc_east_size)
      east_pcts.append(east_pct)
      east_grads.append(np.mean(loc_east_grads))
      south_pct = np.sum(loc_south_low)/np.sum(loc_south_size)
      south_pcts.append(south_pct)
      south_grads.append(np.mean(loc_south_grads))
      ids.append(cell_id)

  return east_grads, south_grads, ids, east_pcts, south_pcts


In [None]:
test_east_grads, test_south_grads, test_ids, test_east_pcts, test_south_pcts = get_elevations(test_lat_lon)

In [None]:
test_elev_grads = pd.DataFrame({'cell_id': test_ids, 
                                'east_elev_grad': test_east_grads, 
                                'south_elev_grad': test_south_grads,
                                'east_elev_pct': test_east_pcts,
                                'south_elev_pct': test_south_pcts})

In [None]:
test_elev_grads.to_parquet('/content/drive/MyDrive/snocast/train/data/static/test_elevation_grads.parquet')

In [None]:
train_east_grads, train_south_grads, train_ids, train_east_pcts, train_south_pcts = get_elevations(train_lat_lon)
train_elev_grads = pd.DataFrame({'cell_id': train_ids, 
                                 'east_elev_grad': train_east_grads, 
                                 'south_elev_grad': train_south_grads,
                                 'east_elev_pct': train_east_pcts,
                                 'south_elev_pct': train_south_pcts})

In [None]:
train_elev_grads.head()

In [None]:
train_elev_grads.to_parquet('/content/drive/MyDrive/snocast/train/data/static/train_elevation_grads.parquet')

In [None]:
gm_east_grads, gm_south_grads, gm_ids, gm_east_pcts, gm_south_pcts = get_elevations(ground_measures_metadata, gm=True)
gm_elev_grads = pd.DataFrame({'station_id': gm_ids, 
                              'east_elev_grad': gm_east_grads, 
                              'south_elev_grad': gm_south_grads,
                              'east_elev_pct': gm_east_pcts,
                              'south_elev_pct': gm_south_pcts})

In [None]:
gm_elev_grads.to_parquet('/content/drive/MyDrive/snocast/train/data/static/gm_elevation_grads.parquet')