In [1]:
## Script to generate ground_measures_data.csv (from Snowcast data files)

In [2]:
## Setup Python Environment

import sys
import os
import numpy as np

In [3]:
## Get the Metadata for the Ground Features (for the Evaluation stage)

ground_feature_ids = []
with open('Data/Snowcast Evaluation/ground_measures_metadata.csv') as f:
    tline = f.readline().replace('\n','')
    header = tline.split(',')
    tline = f.readline()
    
    # Read the IDs (so we can match with features from the development stage)
    while not (tline == ''):
        fields = tline.split(',')
        id = fields[0]
        ground_feature_ids.append(id)
        tline = f.readline()
    

In [4]:
## Get the Development Stage Ground Features (Training Dataset)

# Get the timestamp information and ground feature ids
development_feature_ids = []
with open('Data/Snowcast Development/ground_measures_train_features.csv') as f:
    tline = f.readline().replace('\n','')
    header = tline.split(',')
    tline = f.readline()
    while not (tline == ''):
        fields = tline.split(',')
        id = fields[0]
        development_feature_ids.append(id)
        tline = f.readline()

development_feature_ids = np.array(development_feature_ids)  # So we can do numpy stuff to it
times_train = np.array(header[1:])

num_times = len(times_train)
num_lines = len(development_feature_ids)
development_GroundSWE = np.empty([num_lines, num_times])
development_GroundSWE[:] = np.nan

# Read the data
with open('Data/Snowcast Development/ground_measures_train_features.csv') as f:
    tline = f.readline()
    tline = f.readline()
    while not (tline == ''):
        fields = tline.split(',')
        id = fields[0]
        loc = development_feature_ids == id
        for d in range(len(fields)-1):
            if not (fields[d+1] == '') and len(fields[d+1]) > 1:
                development_GroundSWE[loc,d] = float(fields[d+1])
                
        tline = f.readline()

In [5]:
## Get the Development Stage Ground Features (Test Dataset)

# Get the timestamp information
with open('Data/Snowcast Development/ground_measures_test_features.csv') as f:
    tline = f.readline().replace('\n','')
    header = tline.split(',')
    
times_test = np.array(header[1:])

num_times = len(times_test)
num_lines = len(development_feature_ids)
development_GroundSWE_test = np.empty([num_lines, num_times])
development_GroundSWE_test[:] = np.nan

# Read the data
with open('Data/Snowcast Development/ground_measures_test_features.csv') as f:
    tline = f.readline()
    tline = f.readline()
    while not (tline == ''):
        fields = tline.split(',')
        id = fields[0]
        loc = development_feature_ids == id
        for d in range(len(fields)-1):
            if not (fields[d+1] == '') and len(fields[d+1]) > 1:
                development_GroundSWE_test[loc,d] = float(fields[d+1])
                
        tline = f.readline()


In [6]:
## Combine the Development Stage Datasets and Write Output File

## Combine time vectors and SWE data
development_GroundTimes = np.concatenate((times_train, times_test))
development_GroundSWE = np.concatenate((development_GroundSWE, development_GroundSWE_test), axis=1)

## Write the output file
f = open('Training Tables/ground_measures_data.csv', 'w')

# Write the first line
f.write('station_id')
for time in development_GroundTimes:
    f.write(',' + time)
f.write('\n')

# For subsequent lines, write the cell id and then the data for each date
for ground_feature_id in ground_feature_ids:
    loc = development_feature_ids == ground_feature_id
    f.write(ground_feature_id)
    for d in range(len(development_GroundTimes)):
        if np.any(loc):
            f.write(',{:.2f}'.format(development_GroundSWE[loc, d][0]))
        else:
            f.write(',nan')
    f.write('\n')
f.close()
