In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import rasterio as rio
import xarray as xr
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import os
from joblib import Parallel, delayed
from tqdm import tqdm
from pathlib import Path
import tempfile
import utm
from pathlib import Path
import glob
from shapely.geometry import Point
from osgeo import gdal
import tempfile
from shapely.geometry import box
import rasterio as rio 
from rasterio.merge import merge
from tqdm.auto import tqdm
import pyproj
from functools import partial
from shapely.ops import transform
import richdem as rd
from src.data import download_dem 
%matplotlib inline

In [None]:
# Сhange paths as needed, specify absolute paths
DATA_FOLDER  = f'C:/Users/wrkstation/projects/snowcast/data/'
PATH_DEM_MOSAIC = 'C:/Users/wrkstation/projects/snowcast/data/processed/dem_mosaic/'
INPUT_FOLDER = 'C:/Users/wrkstation/projects/snowcast/data/processed/'
OUTPUT_FOLDER = 'C:/Users/wrkstation/projects/snowcast/data/external/dem_tiles'

In [None]:
def get_tile_bounds(files):
    bounds = []
    for file in files:
        tile = rio.open(file)
        # file_name = file.split('/')[-1]
        bound = box(*tile.bounds)
        bounds.append([file, bound])
        tile.close()
    bounds = pd.DataFrame(bounds, columns=['tile', 'geometry'])
    bounds = gpd.GeoDataFrame(bounds, geometry='geometry')
    bounds = bounds.set_crs(epsg=4326)
    return bounds

In [None]:
def geodesic_buffer(lon, lat, km):
    """Create geodesic buffer around a given point (lat, lon)."""
    # Azimuthal equidistant projection
    proj_wgs84 = pyproj.Proj('+proj=longlat +datum=WGS84')
    aeqd_proj = '+proj=aeqd +lat_0={lat} +lon_0={lon} +x_0=0 +y_0=0'
    project = partial(
        pyproj.transform,
        pyproj.Proj(aeqd_proj.format(lon=lon, lat=lat)),
        proj_wgs84)
    buf = Point(0, 0).buffer(km * 1000)  # distance in metres
    return transform(project, buf)

def tiles_to_mosaic(files, output_path, file_name, proj='+proj=longlat +datum=WGS84 +no_defs'):
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    src_files_to_mosaic = []
    for fp in files:
        src = rio.open(fp)
        src_files_to_mosaic.append(src)
        
    mosaic, out_trans = merge(src_files_to_mosaic)
    
    out_meta = src.meta.copy()
    out_meta.update({"driver": "GTiff",
                        "height": mosaic.shape[1],
                        "width": mosaic.shape[2],
                        "transform": out_trans,
                         "crs": proj
                         }
                        )
    out_file_name = Path(os.path.join(output_path, file_name))
    with rio.open(out_file_name, "w", **out_meta) as dest:
        dest.write(mosaic)

In [None]:
download_dem.main()

In [None]:
files = glob.glob(os.path.join(DATA_FOLDER + 'external/dem_tiles/', '*.tif'))

In [None]:
tile_bounds = get_tile_bounds(files)

In [None]:
grid_clusters = gpd.read_file(Path(os.path.join(os.path.join(DATA_FOLDER, 'processed')), 'grid_clusters.geojson'))
grid_points = gpd.read_file(Path(os.path.join(os.path.join(DATA_FOLDER, 'processed')), 'grid_cells_points.geojson'))

In [None]:
tile_bounds

In [None]:
grid1 = gpd.read_file(Path(os.path.join(os.path.join(DATA_FOLDER, 'input')), 'grid_cells.geojson'))
grid2 = gpd.read_file(Path(os.path.join(os.path.join(DATA_FOLDER, 'input')), 'grid_cell_stage2.geojson'))
grid = pd.concat([grid1, grid2])
grid = grid.drop_duplicates('cell_id')
grid = gpd.GeoDataFrame(grid, geometry=grid.centroid)
grid = grid.reset_index(drop=True)
grid = gpd.sjoin(grid, tile_bounds)
grid = grid.drop('index_right', axis=1)
grid = gpd.sjoin(grid, grid_clusters, how='left')
grid = grid.fillna(-1)
grid = grid.drop('index_right', axis=1)

In [None]:
grid['lon'] = grid.geometry.x
grid['lat'] = grid.geometry.y

In [None]:
%%time
grid['buff_500'] = grid.apply(lambda row: geodesic_buffer(row['lon'], row['lat'], 500 / 1000), axis=1)
grid['buff_200'] = grid.apply(lambda row: geodesic_buffer(row['lon'], row['lat'], 200 / 1000), axis=1)

In [None]:
for clst in tqdm(grid['cluster'].unique()):
    if clst != -1:
        files = grid[grid['cluster']==clst]['tile'].drop_duplicates().values
        files = [Path(f) for f in files]
        tiles_to_mosaic(files, DATA_FOLDER + 'processed/dem_mosaic/', f'dem_{int(clst)}.tif')

In [None]:
def clip_dem_by_buffer(cell):
    tmp_dem='processed/dem_tmp'
    tmp_dem = os.path.join(DATA_FOLDER, tmp_dem)
    if not os.path.exists(tmp_dem):
        os.makedirs(tmp_dem)

    if cell.cluster>0:
        dem_path = os.path.join(PATH_DEM_MOSAIC, f'dem_{int(cell.cluster)}.tif')
        dem_path = str(Path(dem_path))
    else:
        dem_path = cell.tile
        dem_path = str(Path(dem_path))

    for r in [200, 500]:
        temp_dem = tempfile.NamedTemporaryFile()
        temp_dem.close()
        gdf = gpd.GeoDataFrame(geometry=[cell[f'buff_{r}']])
        gdf = gdf.set_crs(epsg=4326)
        temp = tempfile.NamedTemporaryFile()
        gdf.to_file(temp.name, driver='GeoJSON')

        out = os.path.join(tmp_dem, f'{cell.cell_id}_{r}.tif')
        out = str(Path(out))
        cmd = f'gdalwarp -cutline {temp.name} -crop_to_cutline -dstnodata -9999 {dem_path} {out}'
        os.system(cmd)



In [None]:
_ = Parallel(n_jobs=-1,)(delayed(clip_dem_by_buffer)(i[1]) for i in tqdm(grid.iterrows(), total=grid.shape[0]))

In [None]:
import richdem as rd

In [None]:
def array_stats(arr, idx, prefix, suffix, nan=-9999):
    try:
        in_point = np.take(arr, arr.size // 2)
    except Exception:
        in_point = np.nan
    
    try:
        minimum = np.min(arr[arr!=nan])
    except Exception:
        minimum = np.nan
        
    try:
        maximum = np.max(arr[arr!=nan])
    except Exception:
        maximum = np.nan
        
    try:
        mean = arr[arr!=nan].mean()
    except Exception:
        mean = np.nan
    try:
        median = np.median(arr[arr!=nan])
    except Exception:
        median = np.nan
    
    # majority = np.argmax(np.bincount(arr.astype(int)[arr!=nan]))
    
    out =  {f'{prefix}_{suffix}' : float(in_point), 
            f'{prefix}_min_{suffix}' : float(minimum),
            f'{prefix}_max_{suffix}' : float(maximum),
            f'{prefix}_mean_{suffix}' : float(mean),
            f'{prefix}_median_{suffix}' : float(median), 
            # f'{prefix}_majority_{suffix}' : float(majority),
            'idx' : idx
           }
    
    return pd.DataFrame(pd.Series(out)).T
    


In [None]:

def get_raster_stats(dem_buff_file, tmp_dem='processed/dem_tmp/'):
    fullpath = os.path.join(tmp_dem, dem_buff_file)
    fullpath = str(Path(fullpath))
    
    radius = int(dem_buff_file.split('_')[-1].split('.')[0])
    idx = dem_buff_file.split('_')[0]
    dem = rd.LoadGDAL(fullpath, no_data=-9999)
    slope = rd.TerrainAttribute(dem, attrib='slope_degrees') 
    aspect = rd.TerrainAttribute(dem, attrib='aspect')
    curv_prof = rd.TerrainAttribute(dem, attrib='profile_curvature')
    curv_plan = rd.TerrainAttribute(dem, attrib='planform_curvature')
    curv = rd.TerrainAttribute(dem, attrib='curvature')
    gdal.DEMProcessing(os.path.join(tmp_dem, 'tri_' + dem_buff_file),  gdal.Open(fullpath), 'TRI', computeEdges=True)
    tri = gdal.Open(os.path.join(tmp_dem, 'tri_' + dem_buff_file)).ReadAsArray()

    out = pd.concat([array_stats(dem, idx, 'alt', radius),
    array_stats(slope, idx, 'slope', radius),
    array_stats(aspect, idx, 'aspect', radius),
    array_stats(curv_prof, idx, 'curv_prof', radius),
    array_stats(curv_plan, idx, 'curv_plan', radius),                 
    array_stats(curv, idx, 'curv', radius),
    array_stats(tri, idx, 'tri', radius),
    ], axis=1)
    
    return out

In [None]:
tmp_dem='processed/dem_tmp'
tmp_dem = os.path.join(DATA_FOLDER, tmp_dem)

In [None]:
files_200 = [f for f in os.listdir(tmp_dem) if f.endswith('200.tif')]
files_500 = [f for f in os.listdir(tmp_dem) if f.endswith('500.tif')]

In [None]:
df_200 = Parallel(n_jobs=-1,)(delayed(get_raster_stats)(i, tmp_dem=tmp_dem) for i in tqdm(files_200))
df_500 = Parallel(n_jobs=-1,)(delayed(get_raster_stats)(i, tmp_dem=tmp_dem) for i in tqdm(files_500))

In [None]:
pd.concat(df_200).to_csv('data/processed/dem_features_200.csv', index=False)

In [None]:
pd.concat(df_500).to_csv('data/processed/dem_features_500.csv', index=False)

In [None]:
df_200 = pd.concat(df_200)
df_500 = pd.concat(df_500)

In [None]:
df_200 = df_200.loc[:,~df_200.columns.duplicated()]
df_500 = df_500.loc[:,~df_500.columns.duplicated()]

In [None]:
dem_features = df_200.merge(df_500, on='idx')

In [None]:
dem_features

In [None]:
cols = [
    'idx',
        'alt_200',
    # 'alt',
    'alt_min_200', 'alt_max_200', 'alt_mean_200',
'slope_200', 'slope_mean_200', 'slope_median_200', 
'aspect_200','aspect_mean_200', 'aspect_median_200',
'curv_prof_200', 'curv_prof_mean_200', 'curv_prof_median_200', 
'curv_200', 'curv_mean_200', 'curv_median_200',
'curv_plan_200', 'curv_plan_mean_200', 'curv_plan_median_200',      
'tri_200',  'tri_mean_200','tri_median_200',

'alt_min_500', 'alt_max_500', 'alt_mean_500', 'alt_median_500', 
'slope_mean_500', 'slope_median_500',
'aspect_mean_500','aspect_median_500', 
'curv_prof_mean_500', 'curv_prof_median_500',
'curv_plan_mean_500', 'curv_plan_median_500', 
'curv_mean_500', 'curv_median_500',
'tri_mean_500', 'tri_median_500'
       ]

In [None]:
dem_features = dem_features[cols]

In [None]:
dem_features = dem_features.rename(columns={'idx':'cell_id', 
                                            'alt_200':'alt',
                                            'slope_200':'slope',
                                            'aspect_200':'aspect',
                                            'curv_prof_200':'curv_prof',
                                            'curv_200':'curv',
                                            'curv_plan_200':'curv_plan',
                                            'tri_200':'tri',
                                           }
                                  )

In [None]:
dem_features.to_csv('data/raw/dem_features.csv', index=False)

In [None]:
dem_features.columns