# Raw HRDPS prep : formatting for MeteoIO input

The smet files headers are updated with new centroid information. The header is corrected to fit MeteoIO formatting

Files are also downsampled to 10 minutes.


/!\ Some corrections in this notebooke were a bit hand-tailored to my specific needs (e.g. specific outliers in the raw HRDPS files). Do not hesitate to adapt this code to your needs. 

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import os
import pytz
import sys

## Function definitions

In [2]:
def import_raw_smet(path):
    '''
    Read a raw smet file. Convert datetime from UTC to PDT as well
    '''
    gem_df = pd.read_csv(path,
                        skiprows=18,
                        sep='\s+',
                        parse_dates=True,
                        index_col=0,
                        names=['timestamp', 'TA', 'RH', 'VW', 'DW', 'ISWR', 'ILWR', 'PSUM'])
    # Clean up glitch in the radiation variables
    gem_df[gem_df['ISWR'] > 1500] = np.nan
    gem_df[gem_df['ILWR'] > 600] = np.nan
    gem_df.interpolate('linear', inplace=True)
    gem_df.index = gem_df.index.tz_localize('UTC')
    gem_df.index = gem_df.index.tz_convert('Canada/Pacific')
    
    return gem_df

def format_header(path, columns):
    '''
    Replace the localisation parameters to the updated centroid. Make minor corrections as well.
    '''
    with open(path, 'r') as f:
        data = f.readlines()
    file_name = path.split('/')[-1]
    station = int(file_name.split('.')[0][4:])
    lat_line = data[4]
    lon_line = data[5]
    # Create dummy lines on the same format as lat/lon since easting/northing don't exist in the original file
    x_line = data[4]
    y_line = data[4]
    lon = str(np.round(hrdps_cdem_4326[hrdps_cdem_4326['Station'] == station]['centroid'].values[0].x, 6))
    lat = str(np.round(hrdps_cdem_4326[hrdps_cdem_4326['Station'] == station]['centroid'].values[0].y, 6))
    x = str(np.round(hrdps_cdem[hrdps_cdem['Station'] == station]['centroid'].values[0].x, 6))
    y = str(np.round(hrdps_cdem[hrdps_cdem['Station'] == station]['centroid'].values[0].y, 6))
    
    data[4] = data[4].replace('-999.000000', lat)
    data[5] = data[5].replace('-999.000000', lon)
                    
    x_line = x_line.replace('latitude', 'easting')
    x_line = x_line.replace('-999.000000', x)
    data.insert(7, x_line)
    
    y_line = y_line.replace('latitude', 'northing')
    y_line = y_line.replace('-999.000000', y)
    data.insert(8, y_line)
    data.insert(9, 'epsg              = 32611\n')
    header = data[:12] + data[19:21]
    # Format the fields lime to the dataframe's columns order
    header[12] = header[12].split('=')[0] + '= timestamp\t' + '\t'.join(columns) + '\n'
    
    return header

def write_smet(filename, met_df, header):
    '''
    Write corrected smet file
    '''
    with open(filename, 'w') as f:
        for line in meteoio_header:
            f.write(line)
        met_df.to_csv(f, sep='\t', float_format='%.3f', header=False, line_terminator='\n')

## Path setup

In [40]:
# Path to centroid file
hrdps_cdem = pd.read_pickle('../ancillary_data/topographic_data/hrdps_cdem_correspondance_custom_sorter_min_20x20.pkl')
# Path to the original raw HRDPS files
smet_root_path = 'D:/DATA/METEO_2019-2020/HRDPS/RAW'
# Path to raw HRDPS files with updated centroid
smet_dest_path = 'D:/DATA/METEO_2019-2020/HRDPS/METEOIO_READY_1H_good-geoloc_alpine3d-exp'

## Main code 

In [43]:
# Add EPSG:4326 geometry to the hrdps_cdem dataframe. MeteoIO needs coordinates both in 32611 and 4326
temp = hrdps_cdem.loc[:, ['Station', 'centroid', 'elevation']].copy()
temp.geometry = temp['centroid']
hrdps_cdem_4326 = temp.to_crs(4326)

# perform header modification
for station_number in list(hrdps_cdem_4326['Station'].values):
    filename = f'VIR0{station_number}.smet'
    full_smet_path = f'{smet_root_path}/{filename}'
    raw_smet_df = import_raw_smet(full_smet_path)
    # ten_min_smet_df = ten_minute_resample(raw_smet_df)
    meteoio_header = format_header(full_smet_path, raw_smet_df.columns)
    write_smet(f'{smet_dest_path}/{filename}', raw_smet_df, meteoio_header)