In [1]:
## Script to generate ua_swe_data.csv (from UA SWE tif files)

In [2]:
## Setup Python environment

import sys
import os
import geojson
import numpy as np
from osgeo import gdal

In [3]:
## Get Metadata for the Evaluation Stage Grid Cells

with open('Data/Snowcast Evaluation/grid_cells.geojson') as f:
    gj = geojson.load(f)
features = gj['features']
evaluation_cell_ids = []
evaluation_coordinates = []
for feature in features:
    evaluation_cell_ids.append(feature['properties']['cell_id'])
    evaluation_coordinates.append(feature['geometry']['coordinates'])

evaluation_cell_ids = np.array(evaluation_cell_ids)  # So we can do numpy stuff to it

In [4]:
# Create the time array by looking at the Training Label Data from the Development Stage
# (Training dates will be defined for all of these days)

# Training dataset
with open('Data/Snowcast Development/train_labels.csv') as f:
    tline = f.readline().replace('\n','')
    header = tline.split(',')
    
times = np.array(header[1:])

# 2020-2021 Data
with open('Data/Snowcast Evaluation/labels_2020_2021.csv') as f:
    tline = f.readline().replace('\n','')
    header = tline.split(',')
    
times_2021 = np.array(header[1:])

# Concatenate the time vectors
times = np.concatenate((times, times_2021))
    

In [5]:
# Get locations of UA grid cells to extract data for

# Read one of the files to get spatial information about it
yyyy, mm, dd = times[0].split('-')
src = gdal.Open('Data/UASWE/' + yyyy + '/' + mm + '/' + dd + '/SWE.tif')
ncols = src.RasterXSize
nrows = src.RasterYSize
ulx, xres, xskew, uly, yskew, yres  = src.GetGeoTransform()
lrx = ulx + (ncols * xres)
lry = uly + (nrows * yres)
data = src.ReadAsArray()
src = None

UASWE_1km_XLocations = []
UASWE_1km_YLocations = []

# Matrix of latitudes and longitudes (used in the 'if' block below)
x = np.tile(np.arange(ulx+xres/2, lrx, xres), [nrows, 1])
y = np.tile(np.arange(uly+yres/2, lry, yres), [ncols, 1]).T

# Loop through the locations and extract data for each grid cell
for i in range(len(evaluation_cell_ids)):
    
    # From the centroids of each grid cell, figure out which UA SWE grid cell to extract data from
    X_center = (evaluation_coordinates[i][0][0][0] + evaluation_coordinates[i][0][2][0]) / 2
    Y_center = (evaluation_coordinates[i][0][0][1] + evaluation_coordinates[i][0][1][1]) / 2
    xloc = -int((ulx - X_center) / xres)
    yloc = -int((uly - Y_center) / yres)
    
    # If the pixel is nan (e.g. water body), then look for the nearest non-nan pixel
    if np.isnan(data[yloc,xloc]):
        locs = (x > X_center-0.05) * (x < X_center+0.05) * (y > Y_center-0.05) * (y < Y_center+0.05)
        dist_sub = np.sqrt((x[locs]-X_center)**2 + (y[locs]-Y_center)**2)
        dist = np.ones(x.shape) * np.nan
        dist[locs] = dist_sub
        dist[np.isnan(data)] = np.nan
        loc = np.where(dist == np.nanmin(dist))
        xloc = loc[0][0]
        yloc = loc[1][0]
        
    UASWE_1km_XLocations.append(xloc)
    UASWE_1km_YLocations.append(yloc)


In [6]:
# Extract the UA Data (using the above lookup structures)

AllLocations_data = np.ones([len(evaluation_cell_ids),len(times)])

# Loop through the times
for t in range(len(times)):
    yyyy, mm, dd = times[t].split('-')
    print('Reading Data for ' + yyyy + '/' + mm + '/' + dd);
    
    # Get data from the relevent file
    src = gdal.Open('Data/UASWE/' + yyyy + '/' + mm + '/' + dd + '/SWE.tif')
    data = src.ReadAsArray()
    src = None
    
    # And extract the correct data for the grid cell
    for i in range(len(evaluation_cell_ids)):
        AllLocations_data[i,t] = max(0,data[UASWE_1km_YLocations[i],UASWE_1km_XLocations[i]])/25.4 # Convert to inches


Reading Data for 2013/01/01
Reading Data for 2013/01/08
Reading Data for 2013/01/15
Reading Data for 2013/01/22
Reading Data for 2013/01/29
Reading Data for 2013/02/05
Reading Data for 2013/02/12
Reading Data for 2013/02/19
Reading Data for 2013/02/26
Reading Data for 2013/03/05
Reading Data for 2013/03/12
Reading Data for 2013/03/19
Reading Data for 2013/03/26
Reading Data for 2013/04/02
Reading Data for 2013/04/03
Reading Data for 2013/04/09
Reading Data for 2013/04/16
Reading Data for 2013/04/23
Reading Data for 2013/04/29
Reading Data for 2013/04/30
Reading Data for 2013/05/03
Reading Data for 2013/05/07
Reading Data for 2013/05/14
Reading Data for 2013/05/21
Reading Data for 2013/05/25
Reading Data for 2013/05/28
Reading Data for 2013/06/01
Reading Data for 2013/06/04
Reading Data for 2013/06/08
Reading Data for 2013/06/11
Reading Data for 2013/06/18
Reading Data for 2013/06/25
Reading Data for 2013/12/03
Reading Data for 2013/12/10
Reading Data for 2013/12/17
Reading Data for 201

Reading Data for 2020/12/01
Reading Data for 2020/12/08
Reading Data for 2020/12/15
Reading Data for 2020/12/22
Reading Data for 2020/12/29
Reading Data for 2021/01/05
Reading Data for 2021/01/12
Reading Data for 2021/01/19
Reading Data for 2021/01/26
Reading Data for 2021/02/02
Reading Data for 2021/02/09
Reading Data for 2021/02/16
Reading Data for 2021/02/23
Reading Data for 2021/03/02
Reading Data for 2021/03/09
Reading Data for 2021/03/16
Reading Data for 2021/03/23
Reading Data for 2021/03/30
Reading Data for 2021/04/06
Reading Data for 2021/04/13
Reading Data for 2021/04/20
Reading Data for 2021/04/27
Reading Data for 2021/05/04
Reading Data for 2021/05/11
Reading Data for 2021/05/18
Reading Data for 2021/05/25
Reading Data for 2021/06/01
Reading Data for 2021/06/08
Reading Data for 2021/06/15
Reading Data for 2021/06/22
Reading Data for 2021/06/29


In [7]:
## Write Output File

# Write the first line
f = open('Training Tables/ua_swe_data.csv', 'w')
f.write('cell_id')
for time in times:
    f.write(',' + time)
f.write('\n')
i = 0

# For subsequent lines, write the cell id and then the data for each date
for evaluation_cell_id in evaluation_cell_ids:
    f.write(evaluation_cell_id)
    for d in range(len(times)):
        f.write(',{:.2f}'.format(max(0,AllLocations_data[i, d])))
    f.write('\n')
    i = i+1
    
f.close()
