## .top file parser functions

In [1]:
def read_rawdata(file_nr):
    folder = "../Parameter files (.top)/"
    filename = "NVE_par_"+str(file_nr)+".top"
    filepath = folder + filename

    with open(filepath, 'rb') as f:
        rawdata = f.read()
    
    # Cast byte to string data
    return str(rawdata)    

In [2]:
def get_param_value(data, key: str):
    index = data.find(key)
    
    # NB hardcoded read indexes, found by trial 
    OFFSET1 = 43
    OFFSET2 = -7
    
    value = float(data[index+OFFSET1+OFFSET2:index+OFFSET1])
    return value

## Key data to read from .top file

In [3]:
## Interesting key data to read

# Elevation data
ELEVS = ["ELEV"+str(i) for i in range(11)]

# Evaporation per month
EVAPOS = ["EPJAN", "EPFEB", "EPMAR", "EPAPR", "EPMAY", "EPJUN", "EPJUL", "EPAUG", "EPSEP", "EPOKT", "EPNOV", "EPDES"]

# Other params 
OTHER = ["RCORR", "SCORR", "PGRAD", "TPGRAD"]

interesting_keys = [ELEVS, OTHER, EVAPOS]
# Flatten lists inside list
interesting_keys = [val for sublist in interesting_keys for val in sublist]

In [4]:
def read_interesting_params(data, keys):
    params_dict = {}
    for key in keys: 
        params_dict[key] = get_param_value(data, key)
    return params_dict

In [5]:
file_nr = 1
data = read_rawdata(file_nr)
read_interesting_params(data, interesting_keys)

{'ELEV0': 670.0,
 'ELEV1': 894.0,
 'ELEV2': 23.0,
 'ELEV3': 109.0,
 'ELEV4': 196.0,
 'ELEV5': 261.0,
 'ELEV6': 304.0,
 'ELEV7': 335.0,
 'ELEV8': 361.0,
 'ELEV9': 391.0,
 'ELEV10': 534.0,
 'RCORR': 1.594,
 'SCORR': 1.768,
 'PGRAD': -0.5,
 'TPGRAD': -0.0,
 'EPJAN': 0.1,
 'EPFEB': 0.2,
 'EPMAR': 0.7,
 'EPAPR': 1.0,
 'EPMAY': 2.3,
 'EPJUN': 3.5,
 'EPJUL': 3.5,
 'EPAUG': 2.3,
 'EPSEP': 1.0,
 'EPOKT': 0.7,
 'EPNOV': 0.2,
 'EPDES': 0.1}

## NVE specific data loader function

In [8]:
# Read all input txt files
import glob
from typing import Tuple
import pandas as pd

def dateparse(dates, times):
    return [
        pd.datetime.strptime(date + time, "%d.%m.%Y%H:%M:%S")
        for date, time in zip(dates, times)
    ]

def load_forcing_and_discharge(catchment: int) -> Tuple[pd.DataFrame, int]:
    """Load the meteorological forcing data of a specific catchment.

    :param catchment: number (id)
    
    :return: pd.DataFrame containing the meteorological forcing data.
    """
    path = '../Input files (.txt)'
    all_files = glob.glob(path + "/*.txt")

    file_exist = False
    
    # Loop through files and find correct catchment
    for file_path in all_files:
        # Name is formatted `./Input files (.txt)/nve_inp_XX.txt`
        number = int(file_path.split('_')[-1].split('.')[0])
        
        if number == catchment:
            file_exist = True
            df = pd.read_csv(
                file_path, 
                encoding='cp1252', 
                skiprows=[0], 
                delimiter=r"\s+", 
                parse_dates=[['dd.mm.yyyy',  'hh:mm:ss']],
                date_parser=dateparse)
            df = df.rename(columns={"dd.mm.yyyy_hh:mm:ss": "timestamp"})
    
    # Return None if catchment does not exist
    if file_exist == False:
        print("Catchment does not exist")
        return None
    else:
        return df

In [12]:
load_forcing_and_discharge(1)[0:31]

  pd.datetime.strptime(date + time, "%d.%m.%Y%H:%M:%S")


Unnamed: 0,timestamp,mm,grC,grC.1,m3/s
0,2000-01-01,3.3,-3.75,-3.75,0.27
1,2000-01-02,3.79,-2.02,-2.02,0.27
2,2000-01-03,6.98,-3.84,-3.84,0.27
3,2000-01-04,16.53,-3.8,-3.8,0.29
4,2000-01-05,4.44,-4.78,-4.78,0.31
5,2000-01-06,6.51,-2.75,-2.75,0.46
6,2000-01-07,13.87,-3.44,-3.44,0.44
7,2000-01-08,10.74,-1.23,-1.23,0.7
8,2000-01-09,16.58,-2.6,-2.6,0.51
9,2000-01-10,0.09,-4.47,-4.47,0.43


## Read Output, Simulated and Residuals data

In [None]:
import pandas as pd

def read_output_simulated_residual_data(file_nr):
    # Read raw csv
    folder = "../Residual, Output, Simulated data/"
    filename = "HBV_output_"+str(file_nr)+".txt"
    filepath = folder + filename
    
    df = pd.read_csv(filepath)
    
    # Rename columns to match LSTM notebook
    df = df.rename(columns={"DATE": "timestamp"})
    
    # Cast timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y/%m/%d', yearfirst=True)
    
    # Remove first column
    df = df.drop(['Unnamed: 0'], axis=1)
    
    df = df.sort_values(by="timestamp")
    
    return df

In [None]:
read_output_simulated_residual_data(file_nr)[0:35]


In [None]:
df = read_output_simulated_residual_data(file_nr)

import datetime as dt
date = df["timestamp"][1]
pd.to_datetime(dt.datetime.strptime(date,'%Y-%m-%d')).month#.date())
#dt.strptime(df["timestamp"][0], '%Y-%m-%d').date()
#df["timestamp"][0]

## Merge parameter data with rainfall-runoff data

In [None]:
#from calendar import monthrange
#num_days = monthrange(2020, 2)[1] # num_days = 28
#print(num_days) # Prints 28

file_nr = 1
data = read_rawdata(file_nr)
ip = read_interesting_params(data, interesting_keys)


def create_time_series(df, key):
    elevations = []
    for d in range(len(df)):
        if df["timestamp"][d].month == 1:
            elevations.append(ip["ELEV0"])
        else:
            elevations.append(-1)
    
    df["elev0"] = elevations 
    return df
create_time_series(df, 0)[0:13]

In [None]:
df["timestamp"][11].month

from datetime import datetime
df["timestamp"][0]

In [None]:
monthrange(df["timestamp"][0].year, df["timestamp"][0].month)[1]