In [2]:
import numpy as np
import matplotlib.pyplot as plt
import datetime
import pandas as pd
from dateutil.parser import parse
from glob import glob
import xarray as xr
import os
import zipfile
import pickle as cPickle
from scipy import stats

dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%dT%H:%M:%S')

import warnings

In [3]:
output_folder = '/pl/active/icesheetsclimate/firn_iceshelves/deg0C/output/' #folder where SNOWPACK output is stored

In [4]:
###### ------- MAIN FUNCTION -------- ######
#load all data for station and create a dictionary structure
    #stn
        #hist
            #pro - profile data (depth, density, subsurface temperature, air, water, ice %)
            #ts - smet timeseries data (cold content, air temp, windspeed, rainfall, sublimation, melt, freeze, snowfall)
        #ssp1
            #pro
            #ts
        #ssp3
            #......
def load_data(stn):
    dc = {}
    print('loading historical')
    dc['hist'] = {}
    pro, smet = read_zip_output_files(stn, 'HIST')
    dc['hist']['pro'] = pro
    dc['hist']['ts'] = smet

    
    return dc

###### ---------------------------------------- ######

#open zip files and load data from .smet and .pro files
def read_zip_output_files(stn, a):
    if a == 'HIST':
        zz=f'{output_folder}HIST/{stn}.zip'
        with zipfile.ZipFile(zz) as z:
            files = z.namelist()
            if f'output//{stn}.pro' in files:
                profile = f'output//{stn}.pro'
                smetfile = f'output//{stn}.smet'
            elif f'output/{stn}.pro' in files:
                profile = f'output/{stn}.pro'
                smetfile = f'output/{stn}.smet'
            else:
                profile = f'{stn}.pro'
                smetfile = f'{stn}.smet'
    else:
        zz = f'{output_folder}{a}/{stn}_{a}_output.zip'
        profile = f'output/{stn}_{a}.pro'
        smetfile = f'output/{stn}_{a}.smet'
    with zipfile.ZipFile(zz) as z:
        with z.open(profile, 'r') as p:
            pro = load_pro_data(p)
        with z.open(smetfile, 'r') as s: 
            smet = load_smet_data(s)
            
    return pro, smet

#load data from .pro file
def load_pro_data(f):
    #important data codes from SNOWPACK output
    #0500 = date
    #0501 = height (cm)
    #0502 = density (kg/m3)
    #0503 = snow temperature
    #0506 = volumetric water (%)
    #0515 = volumetric ice (%)
    #0516 = volumetric air (%)
    
    f.readline()#header
    f.readline()#header
    lat = f.readline() #latitude
    lat = float(str(lat, 'utf-8').split(' ')[-1][:-2])
    lon = f.readline() #longitude
    lon = float(str(lon, 'utf-8').split(' ')[-1][:-2])

    #more header lines
    header = 'nnn'
    while header != b'[DATA]\n':
        header = f.readline()
        
    all_data = list()
    
    for line in f:
        line = str(line, 'utf-8')
        if line[0:4] == '0500': # date
            dd = line.split(',')[1].split('.')
            dt = datetime.datetime(int(dd[2][0:4]), int(dd[1]), int(dd[0]), int(dd[2][5:7]))
            year = dt.year
            temp_data = {} #dictionary of data for each timestep
            temp_data['date'] = dt
        if year >= 1985:
            if line[0:4] == '0501': # depth
                temp_depth = list(map(float, line.split('\n')[0].split(',')[2:]))
                temp_depth = np.asarray(temp_depth)/100 #convert to meters
                d = temp_depth + (100-temp_depth[-1]) #convert from height to depth
                aa = np.argwhere(d < 0)
                if len(aa > 1):
                    a = aa[-1][0]+1
                else:
                    a = 0
                temp_data['depth'] = d[a:]
            elif line[0:4] == '0502': # density
                temp_density = list(map(float, line.split('\n')[0].split(',')[2:]))
                temp_data['density'] = np.asarray(temp_density)[a:]
            elif line[0:4] == '0516': # %air
                temp_air = list(map(float, line.split('\n')[0].split(',')[2:]))
                temp_data['air'] = np.asarray(temp_air)[a:]/100
            elif line[0:4] == '0517': # append to list
                all_data.append(temp_data)
            
    return all_data
        
#load timeseries smet data
def load_smet_data(f): 
     
    dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%dT%H:%M:%S')

    smet = pd.read_csv(f, delim_whitespace=True, skiprows=19, parse_dates=['time'], date_parser=dateparse,
                           names = ['time', 'Qs', 'Ql', 'Qg', 'TSG', 'Qg0', 'Qr', 'Qmf', 'CC', 'OLWR', 
                            'ILWR', 'LWR_net', 'OSWR', 'ISWR', 'Qw', 'pAlbedo', 'mAlbedo', 'ISWR_h', 'ISWR_dir',
                            'ISWR_diff', 'TA', 'TSS_mod', 'TSS_meas', 'T_bottom', 'RH', 'VW', 'VW_drift', 'DW',
                            'MS_Snow', 'HS_mod', 'HS_meas', 'hoar_size', 'wind_trans24', 'HN24', 'HN72_24', 'SWE',
                            'MS_Water', 'MS_Wind', 'rainfall', 'MS_SN_Runoff', 'MS_Soil_Runoff', 'sublimation',
                            'MS_Evap', 'melt', 'freeze', 'MS_Sublimation_dHS', 'MS_Settling_dHS', 'MS_Redeposit_dHS',
                            'MS_Redeposit_dRHO', 'Sclass1', 'Sclass2', 'zSd', 'Sd', 'zSn', 'Sn', 'zSs', 'Ss', 'zS4',
                            'S4', 'zS5', 'S5'])
    smet = smet[['time', 'TA', 'VW', 'MS_Snow', 'MS_Wind', 'rainfall', 'sublimation','melt', 'freeze']]
    smet['snowfall'] = smet['MS_Snow'] - smet['MS_Wind']
    smet = smet.drop(columns = ['MS_Snow', 'MS_Wind'])
    
    #convert from kg/m2/hr to kg/m2
    smet['dt'] = (smet['time']-smet['time'].shift()).dt.total_seconds()/3600
    smet['snowfall'] = smet['snowfall'] * smet['dt']
    smet['rainfall'] = smet['rainfall'] * smet['dt']
    smet = smet.drop(columns = ['dt'])
    smet = smet[smet.time.dt.year >=1985]
    return smet

#write pickle file
def write_pickle(stn, dc):
    with open(f'/pl/active/icesheetsclimate/firn_iceshelves/deg0C/pickles/{stn}.p', 'wb') as fp:
        cPickle.dump(dc, fp, protocol=cPickle.HIGHEST_PROTOCOL)

## Create pickle files with historical data

In [5]:
zip_files = glob(f'{output_folder}HIST/*.zip')
pickles = glob(f'/pl/active/icesheetsclimate/firn_iceshelves/deg0C/pickles/*')
n = len(zip_files)
i = 0

for zz in zip_files:
    
    stn = zz.split('/')[-1].split('.')[0].split('_')[0]
    print(f'{i}/{n} {stn}')
    if f'/pl/active/icesheetsclimate/firn_iceshelves/deg0C/pickles/{stn}.p' not in pickles:
        df = load_data(stn)
        write_pickle(stn, df)
    i = i+1


## Add to pickle files

In [25]:
def get_pickle_data(site):
    site_file = f'/pl/active/icesheetsclimate/firn_iceshelves/deg0C/pickles/{site}.p'
    with open(site_file, 'rb') as fp:
        data = cPickle.load(fp)
    return data

In [26]:
scenario = 'ssp1'
s2 = 'SSP1'

In [30]:
n = 1

for p in pickles:
    stn = p.split('/')[-1][:-2]
    print(stn, f'{n}/{len(pickles)}')
    dc = get_pickle_data(stn)
    
    if scenario not in dc:
        print('... loading')
        dc[scenario] = {}
        pro,smet = read_zip_output_files(stn, s2)
        dc[scenario]['pro'] = pro
        dc[scenario]['ts'] = smet
        
        write_pickle(stn, dc)
    
    
    n = n+1

VIR116 1/1
... loading


  dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%dT%H:%M:%S')
