# Raw HRDPS prep : formatting for MeteoIO input

The smet files headers are updated with new centroid information. The header is corrected to fit MeteoIO formatting

Files are also downsampled to 10 minutes.


/!\ Some corrections in this notebooke were a bit hand-tailored to my specific needs (e.g. specific outliers in the raw HRDPS files). Do not hesitate to adapt this code to your needs. 

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import os
import pytz
import sys

## Function definitions

In [19]:
def import_raw_smet(path):
    '''
    Read a raw smet file. Convert datetime from UTC to PDT as well
    '''
    gem_df = pd.read_csv(path,
                        skiprows=20,
                        sep='\\s+',
                        parse_dates=True,
                        index_col=0,
                        names=['timestamp', 'TA', 'RH', 'VW', 'DW', 'ISWR', 'ILWR', 'PSUM'])
    # Clean up glitch in the radiation variables
    gem_df[gem_df['ISWR'] > 1500] = np.nan
    gem_df[gem_df['ILWR'] > 600] = np.nan
    gem_df.interpolate('linear', inplace=True)
    # gem_df.index = gem_df.index.tz_localize('UTC')
    # gem_df.index = gem_df.index.tz_convert('Canada/Pacific')
    
    return gem_df

def format_header(path):
    '''
    Replace the localisation parameters to the updated centroid. Make minor corrections as well.
    '''
    with open(path, 'r') as f:
        data = f.readlines()
    file_name = path.split('/')[-1]
    station = file_name.split('.')[0]
    lon = str(np.round(hrdps_cdem_4326[hrdps_cdem_4326['name'] == station]['geometry'].values[0].x, 6))
    lat = str(np.round(hrdps_cdem_4326[hrdps_cdem_4326['name'] == station]['geometry'].values[0].y, 6))
    x = str(np.round(hrdps_cdem[hrdps_cdem['name'] == station]['geometry'].values[0].x, 6))
    y = str(np.round(hrdps_cdem[hrdps_cdem['name'] == station]['geometry'].values[0].y, 6))

    data[2] = 'station_id =' + data[2].split('=')[1]
    data[3] = f'latitude = {lat}\n'
    data[4] = f'longitude = {lon}\n'
    data[5] = f'easting = {x}\n'
    data[6] = f'northing = {y}\n'
    data[7] = 'epsg = 32618\n'
    data[9] = 'fields = timestamp TA RH VW DW ISWR ILWR PSUM\n'
    data[13] = 'slope_angle =' + data[13].split('=')[1]
    data[14] = 'slope_azi =' + data[14].split('=')[1]
    
    header = data[:15]
    header = header + ['\n', '[DATA]\n']
    
    return header

def write_smet(filename, met_df, header):
    '''
    Write corrected smet file
    '''
    with open(filename, 'w') as f:
        for line in meteoio_header:
            f.write(line)
        met_df.to_csv(f, sep='\t', float_format='%.3f', header=False, lineterminator='\n', date_format='%Y-%m-%dT%H:%M:%S')

In [4]:
# Add EPSG:4326 geometry to the hrdps_cdem dataframe. MeteoIO needs coordinates both in 32611 and 4326
hrdps_cdem = pd.read_pickle('C:/Users/PaulBillecocq/OneDrive - HOROS/Documents/UdS/KRG_DATA/a3d-prep/utm18n/hrdps-subgridding-prep/hrdps-dem-centroid-correspondance_32618.pkl')
temp = hrdps_cdem.loc[:, ['name', 'geometry', 'elevation']].copy()
#temp.geometry = temp['centroid']
hrdps_cdem_4326 = temp.to_crs(4326)

## Path setup

In [13]:
# Path to centroid file
hrdps_cdem = pd.read_pickle('C:/Users/PaulBillecocq/OneDrive - HOROS/Documents/UdS/KRG_DATA/a3d-prep/utm18n/hrdps-subgridding-prep/hrdps-dem-centroid-correspondance_32618.pkl')
# Path to the original raw HRDPS files
smet_root_path = 'C:/Users/PaulBillecocq/OneDrive - HOROS/Documents/UdS/KRG_DATA/HRDPS/00_raw'
# Path to raw HRDPS files with updated centroid
smet_dest_path = 'C:/Users/PaulBillecocq/OneDrive - HOROS/Documents/UdS/KRG_DATA/HRDPS/02_centroid-adjusted'

## Main 

In [20]:
# Add EPSG:4326 geometry to the hrdps_cdem dataframe. MeteoIO needs coordinates both in 32611 and 4326
temp = hrdps_cdem.loc[:, ['name', 'geometry', 'elevation']].copy()
#temp.geometry = temp['centroid']
hrdps_cdem_4326 = temp.to_crs(4326)

# perform header modification
for station_number in list(hrdps_cdem_4326['name'].values):
    filename = f'{station_number}.smet'
    full_smet_path = f'{smet_root_path}/{filename}'
    raw_smet_df = import_raw_smet(full_smet_path)
    # ten_min_smet_df = ten_minute_resample(raw_smet_df)
    meteoio_header = format_header(full_smet_path)
    write_smet(f'{smet_dest_path}/{filename}', raw_smet_df, meteoio_header)

In [7]:
raw_smet_df

Unnamed: 0_level_0,TA,RH,VW,DW,ISWR,ILWR,PSUM
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-01-01 06:00:00,255.26,0.71,5.91,178.68,0.0,204.57,0.0
2022-01-01 07:00:00,255.28,0.70,5.88,187.88,0.0,219.52,0.0
2022-01-01 08:00:00,255.32,0.71,4.95,198.16,0.0,222.85,0.0
2022-01-01 09:00:00,255.40,0.70,2.61,207.10,0.0,227.39,0.0
2022-01-01 10:00:00,255.34,0.69,0.44,240.28,0.0,221.93,0.0
...,...,...,...,...,...,...,...
2023-07-31 01:00:00,281.80,0.76,0.77,91.35,50.0,280.90,0.0
2023-07-31 02:00:00,282.31,0.74,0.43,135.00,0.0,282.76,0.0
2023-07-31 03:00:00,282.37,0.75,0.05,86.79,0.0,284.30,0.0
2023-07-31 04:00:00,281.93,0.77,0.19,93.63,0.0,282.50,0.0
