# Reading the rest of HYDRUS files
![The project files](../Assets/Project_files.png)

## Importing and loading dataset

In [1]:
import os
import glob
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from NesrHydrusAnalyst import *

In [2]:
src = '../Datasets/H3D2_SandDitch0011'

## Unreadable files
Encrypted files that are converted to text elswhere
1. `h.out`
2. `Q.out`
3. `v.out`
3. `th.out`
4. `MESHTRIA.000`
4. `DOMAIN.IN`

## Reading previous implemented files
### Reading the _mesh files_
1. `H.TXT`
2. `V.TXT`
3. `TH.TXT`
4. `MESHTRIA.TXT`

In [3]:
df = read_hydrus_data(folder=src, save_to_csv=False, read_velocities=True)
display(df.sample(3), get_full_simulation_info(df))

df_rotated = rotate_back(df, 2.2899, rotation_axis='y')
get_full_simulation_info(df_rotated)

 For the entire simulation
Time steps : [0, 5, 15, 30, 60, 120, 180, 240, 500, 720, 1440] 
Dimensions : {'x': (-0.839105, 49.9601), 'y': (0.0, 20.0), 'z': (0.0, 22.9811)} 
Matrix dims: x_vals(41,), z_vals(46,), X(46, 41), Z(46, 41), M(46, 41)


 Variables statistics:


Unnamed: 0,n,x,y,z,Th_T0,Th_T5,Th_T15,Th_T30,Th_T60,Th_T120,...,V3_T240,V1_T500,V2_T500,V3_T500,V1_T720,V2_T720,V3_T720,V1_T1440,V2_T1440,V3_T1440
7157,7157,39.0295,9.25,17.7896,0.2,0.200017,0.245111,0.344525,0.387539,0.361496,...,-0.000688,1.2e-05,4e-06,-0.000297,1.7e-05,2.60076e-06,-0.00017,4e-06,6.2183e-07,-4.2e-05
17,17,26.6789,20.0,21.0829,0.1,0.068521,0.39216,0.415979,0.43,0.201584,...,-0.000587,-0.000396,-2e-06,-0.000123,-0.000169,2.46964e-07,-5.7e-05,-4.7e-05,3.69181e-07,-1.4e-05
1239,1239,25.737,4.61538,1.02921,0.15,0.135443,0.119711,0.108188,0.43,0.425847,...,-0.003099,0.000214,-1.4e-05,-0.000906,-0.000158,-6.58004e-06,-0.000509,-0.000731,-5.70631e-06,-0.000141


Unnamed: 0,Min,Mean,Median,Max
Th,0.049132,0.257289,0.3117305,0.43
H,-676.912,-66.645863,-19.626,12.4292
V1,-1.66093,-0.021988,-1.86493e-05,0.802749
V2,-0.263769,-0.000331,1.83706e-08,0.265775
V3,-1.08642,-0.002844,-2.684e-05,0.777201


 For the entire simulation
Time steps : [0, 5, 15, 30, 60, 120, 180, 240, 500, 720, 1440] 
Dimensions : {'x': (0.0, 50.0), 'y': (0.0, 20.0), 'z': (0.0, 21.0)} 
Matrix dims: x_vals(41,), z_vals(43,), X(43, 41), Z(43, 41), M(43, 41)


 Variables statistics:


Unnamed: 0,Min,Mean,Median,Max
Th,0.049132,0.257289,0.3117305,0.43
H,-676.912,-66.645863,-19.626,12.4292
V1,-1.66093,-0.021988,-1.86493e-05,0.802749
V2,-0.263769,-0.000331,1.83706e-08,0.265775
V3,-1.08642,-0.002844,-2.684e-05,0.777201


### Reading the _information files_
1. `A_Level.out`
2. `ATMOSPH.IN`
3. `Balance.out`
4. `DIMENSIO.IN`
5. `Run_Inf.out`
6. `SELECTOR.IN`

Reading two files: `A_Level.out` and `ATMOSPH.IN`

In [4]:
read_a_level_out(src, geom='3D')

Unnamed: 0,Time,CumQAP,CumQRP,CumQA,CumQR,CumQ3,hAtm,hRoot,hKode3,A-level,...,rRoot,hCritA,rt,ht,rt.1,ht.1,rt.2,ht.2,rt.3,ht.3
0,60.0,0.0,0.0,0.0,0.0,-2400.0,-4.0,0.0,10.7,1,...,0.000278,10000,-2,0,0,0,0,0,0,0
1,1440.0,0.0,0.0,0.0,0.0,-2400.0,-28.2,0.0,-25.9,2,...,0.000278,10000,0,0,0,0,0,0,0,0


Reading of one file: `Balance.out`

Returns
1. the total computing time of the simulation AND
2. A dataframe table of water balance info

In [5]:
display (*read_balance_out(src))

'80.7799999713898'

Unnamed: 0,Time,Volume,VolumeW,InFlow,hMean,WatBalT,WatBalR
0,0.0,21000.0,3798.8,0.0,-189.99,,
1,5.0,21000.0,3998.8,39.996,-123.71,0.030824,0.008
2,15.0,21000.0,4398.8,39.998,-112.81,-0.000182,0.0
3,30.0,21000.0,4998.8,40.001,-98.652,-0.004614,0.0
4,60.0,21000.0,6198.7,39.999,-67.802,-0.099587,0.004
5,120.0,21000.0,6191.2,-0.24252,-45.578,-0.22575,0.009
6,180.0,21000.0,6178.2,-0.18336,-37.625,-0.19091,0.008
7,240.0,21000.0,6169.4,-0.12746,-32.564,-0.1791,0.007
8,500.0,21000.0,6151.4,-0.041062,-23.068,-0.14819,0.006
9,720.0,21000.0,6144.9,-0.023799,-20.772,-0.14151,0.006


 Reading four files: `SELECTOR.IN`, `DIMENSIO.IN`, `Run_Inf.out`, `Balance.out`
 
 Returns a dataframe table of a summary of all infos in them

In [6]:
read_selector_in(src, geom='3D')

Unnamed: 0,SimulTime_s,L_Unit,T_Unit,Category,MaxIt,TolTh,TolH,InitH/W,lWat,lChem,...,TLevel_i,Time_i,dt_i,Iter_i,ItCum_i,TLevel_e,Time_e,dt_e,Iter_e,ItCum_e
0,80.78,1,1,3,10,0.001,1,1,1,0,...,1,0.001,0.001,2,2,565,1440.0,114.75,3,2214


Combines the most important info from the following files into one dataframe: 
`SELECTOR.IN`, `DIMENSIO.IN`, `Run_Inf.out`, `Balance.out`, `A_Level.out` and `ATMOSPH.IN`

In [7]:
get_one_line_df(src, simulation_name="Sand Ditch simulation", dims='3d').T

Unnamed: 0,SimulTime_s,L_Unit,T_Unit,Category,MaxIt,TolTh,TolH,InitH/W,lWat,lChem,...,hMean5,hMean15,hMean30,hMean60,hMean120,hMean180,hMean240,hMean500,hMean720,hMean1440
Sand Ditch simulation,80.78,1,1,3,10,0.001,1,1,1,0,...,-123.71,-112.81,-98.652,-67.802,-45.578,-37.625,-32.564,-23.068,-20.772,-19.604


## Files to be implemented here
### Spread-info files

1. `Cum_Q.out`
1. `h_Mean.out`
1. `v_Mean.out
1. `Run_inf.out`

### Fragment-info files

1. `Boundary.out`
1. `BOUNDARY.IN`
1. `Check.out`

## Trying to create new functions to read the first group

In [8]:
def read_mean_out(file_path, geom='3D'):
    '''
    Reading three files: `Cum_Q.out`, `h_Mean.out`, and `v_Mean.out`
    Returns a dataframe table of a summary of all infos in them
    NOT YET TESTED on 2D projects.
    
    '''
    
    if geom.lower() == '2d':
        is2d = True
        start = 3
    else:
        is2d = False
        start = 11
    filename = os.path.join(file_path, 'SELECTOR.IN')
    headers = ['L_Unit', 'T_Unit', 'Category']
    categ = {
        0: 'Horizontal plane XY',
        1: 'Axisymmetric Vertical Flow',
        2: 'Vertical Plane XZ',
        3: '3D General Domain'
    }
    body = []

    def proper_type(x):
        try:
            nf = float(x)
            ni = float(int(nf))
            # print(nf, ni, abs(nf - ni))
            if abs(nf - ni) < 0.0000000000001:
                return int(ni)
            else:
                return nf
        except:
            return x

    def replace_text(x):
        if x in ('t', 'f'):
            # return {'t':1, 'f':0}[x]
            return ['f', 't'].index(x)
        elif x in ('mm', 'cm', 'm'):
            return ['mm', 'cm', 'm'].index(x)
        elif x in ('sec', 'min', 'hours', 'days', 'years'):
            return ['sec', 'min', 'hours', 'days', 'years'].index(x)
        elif x in ('s', 'min', 'h', 'd', 'y'):
            return ['s', 'min', 'h', 'd', 'y'].index(x)
        else:
            return x  # proper_type(x)

    def get_line(pos):
        line_feed = linecache.getline(filename, pos).split()
        return list(map(replace_text, line_feed))

    def get_word(pos, loc=0):
        word = get_line(pos)
        if len(word) < 1:
            return ''
        else:
            word = word[loc]
        if isinstance(word, str):
            return word.strip()
        else:
            return word

    def get_num(p1, p2):
        '''
        p1, the line of 2D file
        p2, the line of 3D file
        '''
        return {True: p1, False: p2}[is2d]

    def adjust_body(replaceable):
        for _ in range(len(headers) - len(body)):
            body.append(replaceable)

    body.append(get_word(6))
    body.append(get_word(7))
    body.append({True: int(get_word(10)), False: 3}[is2d])
    headers += get_line(get_num(11, 9))[:4]
    body += get_line(get_num(12, 10))[:4]

    headers += get_line(get_num(13, 11))
    body += get_line(get_num(14, 12))
    headers += get_line(get_num(15, 13))
    body += get_line(get_num(16, 14))

    headers += get_line(get_num(20, 18))
    body += get_line(get_num(21, 19))
    adjust_body(0)

    headers += get_line(get_num(22, 20))
    body += get_line(get_num(23, 21))

    headers += get_line(get_num(24, 22))
    body += get_line(get_num(25, 23))

    headers += get_line(27)
    body += get_line(28)
    headers += get_line(29)
    body += get_line(30)

    # Getting data from the DIMENSIO.IN file
    filename = os.path.join(file_path, 'DIMENSIO.IN')
    headers += get_line(2)
    body += get_line(3)
    adjust_body(0)

    # Getting data from the Run_Inf.out file
    filename = os.path.join(file_path, 'Run_Inf.out')
    headers += ['TLevel_i', 'Time_i', 'dt_i', 'Iter_i', 'ItCum_i']
    body += get_line(5)
    i = 6
    while get_word(i) != 'end':
        i += 1
    #         print(i, get_word(i), end='||')
    headers += ['TLevel_e', 'Time_e', 'dt_e', 'Iter_e', 'ItCum_e']
    body += get_line(i - 1)

    # Getting data from the Balance.out file
    filename = os.path.join(file_path, 'Balance.out')
    headers = ['SimulTime_s'] + headers
    i = 10
    while get_word(i) != 'Calculation':
        i += 1
    body = [get_word(i, loc=3)] + body

    # finalize
    body = np.array(body)
    headers = np.array(headers)

    df = pd.DataFrame(data=body, index=headers).T
    df = df.apply(pd.to_numeric, errors='ignore')
    return df

In [41]:
def proper_type(x):
    try:
        nf = float(x)
        ni = float(int(nf))
        # print(nf, ni, abs(nf - ni))
        if abs(nf - ni) < 0.0000000000001:
            return int(ni)
        else:
            return nf
    except:
        return x

def replace_text(x):
    if x in ('t', 'f'):
        # return {'t':1, 'f':0}[x]
        return ['f', 't'].index(x)
    elif x in ('mm', 'cm', 'm'):
        return ['mm', 'cm', 'm'].index(x)
    elif x in ('sec', 'min', 'hours', 'days', 'years'):
        return ['sec', 'min', 'hours', 'days', 'years'].index(x)
    elif x in ('s', 'min', 'h', 'd', 'y'):
        return ['s', 'min', 'h', 'd', 'y'].index(x)
    else:
        return x  # proper_type(x)

def get_line(filename, pos):
    line_feed = linecache.getline(filename, pos).split()
    return list(map(replace_text, line_feed))

def get_word(filename, pos, loc=0):
    word = get_line(pos)
    if len(word) < 1:
        return ''
    else:
        word = word[loc]
    if isinstance(word, str):
        return word.strip()
    else:
        return word

def get_num(p1, p2, is2d):
    '''
    p1, the line of 2D file
    p2, the line of 3D file
    '''
    return {True: p1, False: p2}[is2d]

def adjust_body(replaceable, headers, body):
    for _ in range(len(headers) - len(body)):
        body.append(replaceable)

In [10]:
file_path = src
geom='3D'

if geom.lower() == '2d':
    is2d = True
    start = 3
else:
    is2d = False
    start = 11


In [42]:
filename = os.path.join(file_path, 'Cum_Q.out')
headers = []
body = []
headers +=get_line(filename, 11)
# reading to the end of the file
i = 14
feed = get_line(filename, i)
while feed[0]!='end':
    body.append(feed)
    i += 1
    feed = get_line(filename, i)
#     print(i, feed[0], end=', ')
body = np.array(body)
result = pd.DataFrame(body, columns=headers)
result.head(3)

Unnamed: 0,Time,CumQAP,CumQRP,CumQA,CumQR,CumQ3,CumQ1,CumQS,CumQ5,CumQ6,CumQ7,CumQ8,CumQ9,CRunOff,cEvapor,cInfiltr
0,0.001,0.0,0.0,0.0,0.0,-0.04,0.0,0.0,0.0,9.34e-06,0.0,0.0,0.0,0.0,0.0,0.0
1,0.002,0.0,0.0,0.0,0.0,-0.08,0.0,0.0,0.0,1.87e-05,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0033,0.0,0.0,0.0,0.0,-0.132,0.0,0.0,0.0,3.07e-05,0.0,0.0,0.0,0.0,0.0,0.0


In [184]:
# Make a standalone function

def get_means_table(filename, header_location, data_location, units_location=None):
    # filename = os.path.join(file_path, 'Cum_Q.out')
    headers = []
    body = []
    headers += get_line(filename, header_location)
    try:
        # If there is any empty columns, remove them
        for _ in range(5):
            headers.remove('...')
    except:
        pass
    if units_location is not None:
        units = get_line(filename, units_location)
        units = map(lambda x:x.replac)
        
    # reading to the end of the file
    i = data_location
    feed = get_line(filename, i)
    #print (feed)
    while feed[0] != 'end':
        body.append(feed)
        i += 1
        feed = get_line(filename, i)
#         print (feed)
#         if isinstance(feed, str):
#             feed = [feed]
    #     print(i, feed[0], end=', ')
    body = np.array(body)
    return pd.DataFrame(body, columns=headers)

In [67]:
# Test
get_means_table(os.path.join(file_path, 'Cum_Q.out'), 11, 14).head(3)

Unnamed: 0,Time,CumQAP,CumQRP,CumQA,CumQR,CumQ3,CumQ1,CumQS,CumQ5,CumQ6,CumQ7,CumQ8,CumQ9,CRunOff,cEvapor,cInfiltr
0,0.001,0.0,0.0,0.0,0.0,-0.04,0.0,0.0,0.0,9.34e-06,0.0,0.0,0.0,0.0,0.0,0.0
1,0.002,0.0,0.0,0.0,0.0,-0.08,0.0,0.0,0.0,1.87e-05,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0033,0.0,0.0,0.0,0.0,-0.132,0.0,0.0,0.0,3.07e-05,0.0,0.0,0.0,0.0,0.0,0.0


In [68]:
get_means_table(os.path.join(file_path, 'h_Mean.out'), 4, 7).head(3)

Unnamed: 0,Time,hAtm,hRoot,hKode3,hKode1,hSeep,hKode5,hKode6,hKode7,hKode8,hKode9
0,0.001,-21.9,0.0,-21.0,0.0,0.0,0.0,-64.2,0.0,0.0,0.0
1,0.002,-21.9,0.0,-20.2,0.0,0.0,0.0,-63.8,0.0,0.0,0.0
2,0.0033,-21.9,0.0,-19.2,0.0,0.0,0.0,-63.3,0.0,0.0,0.0


In [69]:
get_means_table(os.path.join(file_path, 'v_Mean.out'), 11,14).head(3)

Unnamed: 0,Time,rAtm,rRoot,vAtm,vRoot,vKode3,vKode1,vSeep,vKode5,vKode6,vKode7,vKode8,vKode9,RunOff,Evapor,Infiltr,SnowLayer...
0,0.001,0.0,0.000278,0.0,0.0,-40.0,0.0,0.0,0.0,0.00934,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.002,0.0,0.000278,0.0,0.0,-40.0,0.0,0.0,0.0,0.00932,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0033,0.0,0.000278,0.0,0.0,-40.0,0.0,0.0,0.0,0.00929,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [70]:
t=['Time', 'hAtm', 'hRoot', 'hKode3', 'hKode1', 'hSeep', 'hKode5', 'hKode6', 'hKode7', 'hKode8', 'hKode9', '...']
t.remove('...')
print(t)

['Time', 'hAtm', 'hRoot', 'hKode3', 'hKode1', 'hSeep', 'hKode5', 'hKode6', 'hKode7', 'hKode8', 'hKode9']


## Attempting units merging to headers

In [84]:
units = '         [T]      [L/T]       [L/T]     [L/T]       [L/T]      [L3/T]     [L3/T]    [L3/T]      [L3/T]     [L3/T]     [L3/T]     [L3/T]     [L3/T]     [L/T]       [L/T]     [L/T]         [L]'
units = units.split()
# units[1]
# list(map({'/','p'}, units[1][2]))
units[1][2]
units[1][2].replace('/', 'p')
units[1]#.split()

'[L/T]'

In [None]:
' '.join([d.get(i, i) for i in a.split()])

In [86]:
'Nesr'.split(), [_ for _ in 'Nesr']

(['Nesr'], ['N', 'e', 's', 'r'])

In [87]:
def split_letters(word):
    return [_ for _ in word]

In [88]:
split_letters(units[1])

['[', 'L', '/', 'T', ']']

In [96]:
def reform_unit(unit, prefix=None):
    '''
    
    '''
    def split_letters(word):
        return [_ for _ in word]
    
    reformed = ''.join([{
        '/': 'p',
        '[': '',
        ']': ''
    }.get(i, i) for i in split_letters(unit)])
    if prefix is None:
        return reformed
    else:
        return prefix + reformed


reform_unit(units[1]), reform_unit(units[1], '_')

('LpT', '_LpT')

In [93]:
print([reform_unit(u) for u in units])

['T', 'LpT', 'LpT', 'LpT', 'LpT', 'L3pT', 'L3pT', 'L3pT', 'L3pT', 'L3pT', 'L3pT', 'L3pT', 'L3pT', 'LpT', 'LpT', 'LpT', 'L']


In [100]:
print (t,'\n',  units,'\n', [reform_unit(u, '_') for u in units])

['Time', 'hAtm', 'hRoot', 'hKode3', 'hKode1', 'hSeep', 'hKode5', 'hKode6', 'hKode7', 'hKode8', 'hKode9'] 
 ['[T]', '[L/T]', '[L/T]', '[L/T]', '[L/T]', '[L3/T]', '[L3/T]', '[L3/T]', '[L3/T]', '[L3/T]', '[L3/T]', '[L3/T]', '[L3/T]', '[L/T]', '[L/T]', '[L/T]', '[L]'] 
 ['_T', '_LpT', '_LpT', '_LpT', '_LpT', '_L3pT', '_L3pT', '_L3pT', '_L3pT', '_L3pT', '_L3pT', '_L3pT', '_L3pT', '_LpT', '_LpT', '_LpT', '_L']


In [101]:
unt = [reform_unit(u, '_') for u in units]
merged = [x + y for x, y in zip(t, unt)]
merged

['Time_T',
 'hAtm_LpT',
 'hRoot_LpT',
 'hKode3_LpT',
 'hKode1_LpT',
 'hSeep_L3pT',
 'hKode5_L3pT',
 'hKode6_L3pT',
 'hKode7_L3pT',
 'hKode8_L3pT',
 'hKode9_L3pT']

In [131]:
# def get_means_table(filename,
#                     header_location,
#                     data_location,
#                     units_location=None):
#     '''
    
    
#     '''
#     # filename = os.path.join(file_path, 'Cum_Q.out')

#     # Reading the header of the table
#     headers = []
#     body = []
#     headers += get_line(filename, header_location)
#     try:
#         # If there is any empty columns, remove them
#         for _ in range(5):
#             headers.remove('...')
#     except:
#         pass

#     headers = [_.replace('...', '') for _ in headers]

#     # Functions to add units
#     def reform_unit(unit, prefix=None):
#         '''

#         '''

#         def split_letters(word):
#             return [_ for _ in word]

#         reformed = ''.join([{
#             '/': 'p',
#             '.': '',
#             '[': '',
#             ']': ''
#         }.get(i, i) for i in split_letters(unit)])
#         if prefix is None:
#             return reformed
#         else:
#             return prefix + reformed

#     if units_location is not None:
#         units = get_line(filename, units_location)
#         units = [reform_unit(_, '_') for _ in units]
#         headers = [x + y for x, y in zip(headers, unt)]

#     # reading to the end of the file
#     i = data_location
#     feed = get_line(filename, i)
#     #print (feed)
#     while feed[0] != 'end':
#         body.append(feed)
#         i += 1
#         feed = get_line(filename, i)


# #         print (feed)
# #         if isinstance(feed, str):
# #             feed = [feed]
# #     print(i, feed[0], end=', ')
#     body = np.array(body)
#     return pd.DataFrame(body, columns=headers)

## Creating a generic standalone function

In [187]:
def get_means_table(filename,
                    header_location,
                    data_location,
                    units_location=None):
    '''
    Returns a table of adjusted data to numeric analysis
    
    We have to specify the:
    header_location: the line number that contains the headers, 
    units_location : the line number that contains the units (if None, then
                        no units is allowed), 
    data_location  : the line number that contains the first line of data.
    
    Units will be written after an underscore without any special chars
    e.g. velocity: m/s  --> velocity_mps
    
    # filename = os.path.join(file_path, 'Cum_Q.out')
    '''

    # Defining some functions
    def proper_type(x):
        try:
            nf = float(x)
            ni = float(int(nf))
            # print(nf, ni, abs(nf - ni))
            if abs(nf - ni) < 0.0000000000001:
                return int(ni)
            else:
                return nf
        except:
            return x

    def replace_text(x):
        if x in ('t', 'f'):
            # return {'t':1, 'f':0}[x]
            return ['f', 't'].index(x)
        elif x in ('mm', 'cm', 'm'):
            return ['mm', 'cm', 'm'].index(x)
        elif x in ('sec', 'min', 'hours', 'days', 'years'):
            return ['sec', 'min', 'hours', 'days', 'years'].index(x)
        elif x in ('s', 'min', 'h', 'd', 'y'):
            return ['s', 'min', 'h', 'd', 'y'].index(x)
        else:
            return x  # proper_type(x)

    def get_line(filename, pos):
        line_feed = linecache.getline(filename, pos).split()
        return list(map(replace_text, line_feed))

    def get_word(filename, pos, loc=0):
        word = get_line(pos)
        if len(word) < 1:
            return ''
        else:
            word = word[loc]
        if isinstance(word, str):
            return word.strip()
        else:
            return word

    def get_num(p1, p2, is2d):
        '''
        p1, the line of 2D file
        p2, the line of 3D file
        '''
        return {True: p1, False: p2}[is2d]

    def adjust_body(replaceable, headers, body):
        for _ in range(len(headers) - len(body)):
            body.append(replaceable)

    def reform_unit(unit, prefix=None):
        '''
        Functions to add units

        '''

        def split_letters(word):
            return [_ for _ in word]

        reformed = ''.join([{
            '/': 'p',
            '.': '',
            '[': '',
            ']': ''
        }.get(i, i) for i in split_letters(unit)])
        if prefix is None:
            return reformed
        else:
            return prefix + reformed

    # Defining variables
    headers = []
    body = []
    headers += get_line(filename, header_location)
    try:
        # If there is any empty columns, remove them
        for _ in range(5):
            headers.remove('...')
    except:
        pass
    headers = [_.replace('...', '') for _ in headers]

    if units_location is not None:
        units = get_line(filename, units_location)
        units = [reform_unit(_, '_') for _ in units]
        headers = [x + y for x, y in zip(headers, units)]

    # reading to the end of the file
    i = data_location
    feed = get_line(filename, i)
    while feed[0] != 'end':
        body.append(feed)
        i += 1
        feed = get_line(filename, i)

    body = np.array(body)
    return pd.DataFrame(body, columns=headers)

## Trying to create a combined function to get a table for 4 files

In [188]:
display(
    get_means_table(os.path.join(file_path, 'Cum_Q.out'), 11, 14).head(1),
    get_means_table(os.path.join(file_path, 'Cum_Q.out'), 11, 14, 12).head(1),
    get_means_table(os.path.join(file_path, 'h_Mean.out'), 4, 7).head(1),
    get_means_table(os.path.join(file_path, 'h_Mean.out'), 4, 7, 5).head(1),
    get_means_table(os.path.join(file_path, 'v_Mean.out'), 11, 14).head(1),
    get_means_table(os.path.join(file_path, 'v_Mean.out'), 11, 14, 12).head(1))

Unnamed: 0,Time,CumQAP,CumQRP,CumQA,CumQR,CumQ3,CumQ1,CumQS,CumQ5,CumQ6,CumQ7,CumQ8,CumQ9,CRunOff,cEvapor,cInfiltr
0,0.001,0.0,0.0,0.0,0.0,-0.04,0.0,0.0,0.0,9.34e-06,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,Time_T,CumQAP_L3,CumQRP_L3,CumQA_L3,CumQR_L3,CumQ3_L3,CumQ1_L3,CumQS_L3,CumQ5_L3,CumQ6_L3,CumQ7_L3,CumQ8_L3,CumQ9_L3,CRunOff_L3,cEvapor_L3,cInfiltr_L3
0,0.001,0.0,0.0,0.0,0.0,-0.04,0.0,0.0,0.0,9.34e-06,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,Time,hAtm,hRoot,hKode3,hKode1,hSeep,hKode5,hKode6,hKode7,hKode8,hKode9
0,0.001,-21.9,0.0,-21.0,0.0,0.0,0.0,-64.2,0.0,0.0,0.0


Unnamed: 0,Time_T,hAtm_L,hRoot_L,hKode3_L,hKode1_L,hSeep_L,hKode5_L,hKode6_L,hKode7_L,hKode8_L,hKode9_L
0,0.001,-21.9,0.0,-21.0,0.0,0.0,0.0,-64.2,0.0,0.0,0.0


Unnamed: 0,Time,rAtm,rRoot,vAtm,vRoot,vKode3,vKode1,vSeep,vKode5,vKode6,vKode7,vKode8,vKode9,RunOff,Evapor,Infiltr,SnowLayer
0,0.001,0.0,0.000278,0.0,0.0,-40.0,0.0,0.0,0.0,0.00934,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,Time_T,rAtm_LpT,rRoot_LpT,vAtm_LpT,vRoot_LpT,vKode3_L3pT,vKode1_L3pT,vSeep_L3pT,vKode5_L3pT,vKode6_L3pT,vKode7_L3pT,vKode8_L3pT,vKode9_L3pT,RunOff_LpT,Evapor_LpT,Infiltr_LpT,SnowLayer_L
0,0.001,0.0,0.000278,0.0,0.0,-40.0,0.0,0.0,0.0,0.00934,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [133]:
def get_mean_outs_table(file_path):
    tables=[]
    tables.append(get_means_table(os.path.join(file_path, 'Cum_Q.out'), 11, 14, 12))
    tables.append(get_means_table(os.path.join(file_path, 'h_Mean.out'), 4, 7, 5))
    tables.append(get_means_table(os.path.join(file_path, 'v_Mean.out'), 11, 14, 12))
    merged = pd.merge(tables[0], tables[1], on='Time_T')
    merged = pd.merge(merged, tables[2], on='Time_T')
    tables=None
    return merged
    

In [164]:
# Test
result = get_mean_outs_table(file_path)
result

Unnamed: 0,Time_T,CumQAP_LpT,CumQRP_LpT,CumQA_LpT,CumQR_LpT,CumQ3_L3pT,CumQ1_L3pT,CumQS_L3pT,CumQ5_L3pT,CumQ6_L3pT,...,vSeep_L3pT,vKode5_L3pT,vKode6_L3pT,vKode7_L3pT,vKode8_L3pT,vKode9_L3pT,RunOff_LpT,Evapor_LpT,Infiltr_LpT,SnowLayer_L
0,0.0010,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.400E-01,0.000E+00,0.000E+00,0.000E+00,0.934E-05,...,0.000E+00,0.000E+00,0.934E-02,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00
1,0.0020,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.800E-01,0.000E+00,0.000E+00,0.000E+00,0.187E-04,...,0.000E+00,0.000E+00,0.932E-02,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00
2,0.0033,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.132E+00,0.000E+00,0.000E+00,0.000E+00,0.307E-04,...,0.000E+00,0.000E+00,0.929E-02,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00
3,0.0050,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.200E+00,0.000E+00,0.000E+00,0.000E+00,0.464E-04,...,0.000E+00,0.000E+00,0.925E-02,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00
4,0.0072,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.287E+00,0.000E+00,0.000E+00,0.000E+00,0.666E-04,...,0.000E+00,0.000E+00,0.921E-02,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,980.9964,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.240E+04,0.000E+00,0.000E+00,0.000E+00,0.585E+02,...,0.000E+00,0.000E+00,0.155E-01,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00
561,1095.7473,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.240E+04,0.000E+00,0.000E+00,0.000E+00,0.601E+02,...,0.000E+00,0.000E+00,0.142E-01,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00
562,1210.4982,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.240E+04,0.000E+00,0.000E+00,0.000E+00,0.616E+02,...,0.000E+00,0.000E+00,0.128E-01,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00
563,1325.2491,0.000E+00,0.000E+00,0.000E+00,0.000E+00,-0.240E+04,0.000E+00,0.000E+00,0.000E+00,0.630E+02,...,0.000E+00,0.000E+00,0.121E-01,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00,0.000E+00


In [143]:
# See the columns of the dataframe
np.array(list(result)).reshape(14, 3)

array([['Time_T', 'CumQAP_LpT', 'CumQRP_LpT'],
       ['CumQA_LpT', 'CumQR_LpT', 'CumQ3_L3pT'],
       ['CumQ1_L3pT', 'CumQS_L3pT', 'CumQ5_L3pT'],
       ['CumQ6_L3pT', 'CumQ7_L3pT', 'CumQ8_L3pT'],
       ['CumQ9_L3pT', 'CRunOff_LpT', 'cEvapor_LpT'],
       ['cInfiltr_LpT', 'hAtm_LpT', 'hRoot_LpT'],
       ['hKode3_LpT', 'hKode1_LpT', 'hSeep_L3pT'],
       ['hKode5_L3pT', 'hKode6_L3pT', 'hKode7_L3pT'],
       ['hKode8_L3pT', 'hKode9_L3pT', 'rAtm_LpT'],
       ['rRoot_LpT', 'vAtm_LpT', 'vRoot_LpT'],
       ['vKode3_L3pT', 'vKode1_L3pT', 'vSeep_L3pT'],
       ['vKode5_L3pT', 'vKode6_L3pT', 'vKode7_L3pT'],
       ['vKode8_L3pT', 'vKode9_L3pT', 'RunOff_LpT'],
       ['Evapor_LpT', 'Infiltr_LpT', 'SnowLayer_L']], dtype='<U12')

## Getting information from the  `Run_inf.out` file

In [165]:
rnf = get_means_table(os.path.join(file_path, 'Run_inf.out'), 3, 5, None)
rnf

Unnamed: 0,TLevel,Time,dt,Iter,ItCum
0,1,0.10000000E-02,0.10000E-02,2,2
1,2,0.20000001E-02,0.10000E-02,2,4
2,3,0.33000001E-02,0.13000E-02,3,7
3,4,0.49900000E-02,0.16900E-02,3,10
4,5,0.71870000E-02,0.21970E-02,3,13
...,...,...,...,...,...
560,561,0.98099644E+03,0.76501E+02,3,2201
561,562,0.10957473E+04,0.11475E+03,4,2205
562,563,0.12104982E+04,0.11475E+03,3,2208
563,564,0.13252491E+04,0.11475E+03,3,2211


In [148]:
# See if the columns of the two tables are equal
rnf.Time
result.Time_T
rnf.Time == result.Time_T

0      False
1      False
2      False
3      False
4      False
       ...  
560    False
561    False
562    False
563    False
564    False
Length: 565, dtype: bool

In [153]:
# But they are actually equal! Why they are different?
rnf.info(), result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 565 entries, 0 to 564
Data columns (total 5 columns):
TLevel    565 non-null object
Time      565 non-null object
dt        565 non-null object
Iter      565 non-null object
ItCum     565 non-null object
dtypes: object(5)
memory usage: 22.2+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 565 entries, 0 to 564
Data columns (total 42 columns):
Time_T          565 non-null object
CumQAP_LpT      565 non-null object
CumQRP_LpT      565 non-null object
CumQA_LpT       565 non-null object
CumQR_LpT       565 non-null object
CumQ3_L3pT      565 non-null object
CumQ1_L3pT      565 non-null object
CumQS_L3pT      565 non-null object
CumQ5_L3pT      565 non-null object
CumQ6_L3pT      565 non-null object
CumQ7_L3pT      565 non-null object
CumQ8_L3pT      565 non-null object
CumQ9_L3pT      565 non-null object
CRunOff_LpT     565 non-null object
cEvapor_LpT     565 non-null object
cInfiltr_LpT    565 non-null object
hAtm_LpT        565 non-

(None, None)

In [169]:
# Converting both dataframes to numeric
rnf2 = rnf.apply(pd.to_numeric, errors='coerce')
result2 = result.apply(pd.to_numeric, errors='coerce')#.info()

In [170]:
rnf2.Time == result2.Time_T

0       True
1      False
2      False
3      False
4      False
       ...  
560    False
561     True
562     True
563     True
564     True
Length: 565, dtype: bool

**Mostly OK, but some were not met due to approximation erros**

In [173]:
final_table = pd.concat([result2, rnf2], sort=False, axis=1)
final_table

Unnamed: 0,Time_T,CumQAP_LpT,CumQRP_LpT,CumQA_LpT,CumQR_LpT,CumQ3_L3pT,CumQ1_L3pT,CumQS_L3pT,CumQ5_L3pT,CumQ6_L3pT,...,vKode9_L3pT,RunOff_LpT,Evapor_LpT,Infiltr_LpT,SnowLayer_L,TLevel,Time,dt,Iter,ItCum
0,0.0010,0.0,0.0,0.0,0.0,-0.040,0.0,0.0,0.0,0.000009,...,0.0,0.0,0.0,0.0,0.0,1,0.001000,0.001000,2,2
1,0.0020,0.0,0.0,0.0,0.0,-0.080,0.0,0.0,0.0,0.000019,...,0.0,0.0,0.0,0.0,0.0,2,0.002000,0.001000,2,4
2,0.0033,0.0,0.0,0.0,0.0,-0.132,0.0,0.0,0.0,0.000031,...,0.0,0.0,0.0,0.0,0.0,3,0.003300,0.001300,3,7
3,0.0050,0.0,0.0,0.0,0.0,-0.200,0.0,0.0,0.0,0.000046,...,0.0,0.0,0.0,0.0,0.0,4,0.004990,0.001690,3,10
4,0.0072,0.0,0.0,0.0,0.0,-0.287,0.0,0.0,0.0,0.000067,...,0.0,0.0,0.0,0.0,0.0,5,0.007187,0.002197,3,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,980.9964,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,58.500000,...,0.0,0.0,0.0,0.0,0.0,561,980.996440,76.501000,3,2201
561,1095.7473,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,60.100000,...,0.0,0.0,0.0,0.0,0.0,562,1095.747300,114.750000,4,2205
562,1210.4982,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,61.600000,...,0.0,0.0,0.0,0.0,0.0,563,1210.498200,114.750000,3,2208
563,1325.2491,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,63.000000,...,0.0,0.0,0.0,0.0,0.0,564,1325.249100,114.750000,3,2211


**Drop `TLevel`, `Time`, keep `dt`, `Iter`, `ItCum`**

In [175]:
final_table.drop(['TLevel', 'Time'], axis=1)

Unnamed: 0,Time_T,CumQAP_LpT,CumQRP_LpT,CumQA_LpT,CumQR_LpT,CumQ3_L3pT,CumQ1_L3pT,CumQS_L3pT,CumQ5_L3pT,CumQ6_L3pT,...,vKode7_L3pT,vKode8_L3pT,vKode9_L3pT,RunOff_LpT,Evapor_LpT,Infiltr_LpT,SnowLayer_L,dt,Iter,ItCum
0,0.0010,0.0,0.0,0.0,0.0,-0.040,0.0,0.0,0.0,0.000009,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001000,2,2
1,0.0020,0.0,0.0,0.0,0.0,-0.080,0.0,0.0,0.0,0.000019,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001000,2,4
2,0.0033,0.0,0.0,0.0,0.0,-0.132,0.0,0.0,0.0,0.000031,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001300,3,7
3,0.0050,0.0,0.0,0.0,0.0,-0.200,0.0,0.0,0.0,0.000046,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001690,3,10
4,0.0072,0.0,0.0,0.0,0.0,-0.287,0.0,0.0,0.0,0.000067,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002197,3,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,980.9964,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,58.500000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76.501000,3,2201
561,1095.7473,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,60.100000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,114.750000,4,2205
562,1210.4982,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,61.600000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,114.750000,3,2208
563,1325.2491,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,63.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,114.750000,3,2211


## Creating a combined function for the four files

In [178]:
def get_mean_outs_table(file_path):
    '''
    Returns a dataframe of the data in four files:
    'Cum_Q.out', 'h_Mean.out', 'v_Mean.out', 'Run_inf.out'
    The input is the path of the HYDRUS working folder that contains the files
    '''
    tables=[]
    tables.append(get_means_table(os.path.join(file_path, 'Cum_Q.out'), 11, 14, 12))
    tables.append(get_means_table(os.path.join(file_path, 'h_Mean.out'), 4, 7, 5))
    tables.append(get_means_table(os.path.join(file_path, 'v_Mean.out'), 11, 14, 12))
    merged = pd.merge(tables[0], tables[1], on='Time_T')
    
    # The final combined table of means
    merged = pd.merge(merged, tables[2], on='Time_T')
    tables=None
    # Runtime info table
    rnf = get_means_table(os.path.join(file_path, 'Run_inf.out'), 3, 5, None)
    # Convert all into numeric
    rnf = rnf.apply(pd.to_numeric, errors='coerce')
    merged = merged.apply(pd.to_numeric, errors='coerce')
    merged = pd.concat([merged, rnf], sort=False, axis=1)
    merged.drop(['TLevel', 'Time'], axis=1, inplace=True)
    return merged

In [179]:
result = get_mean_outs_table(file_path)
result

Unnamed: 0,Time_T,CumQAP_LpT,CumQRP_LpT,CumQA_LpT,CumQR_LpT,CumQ3_L3pT,CumQ1_L3pT,CumQS_L3pT,CumQ5_L3pT,CumQ6_L3pT,...,vKode7_L3pT,vKode8_L3pT,vKode9_L3pT,RunOff_LpT,Evapor_LpT,Infiltr_LpT,SnowLayer_L,dt,Iter,ItCum
0,0.0010,0.0,0.0,0.0,0.0,-0.040,0.0,0.0,0.0,0.000009,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001000,2,2
1,0.0020,0.0,0.0,0.0,0.0,-0.080,0.0,0.0,0.0,0.000019,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001000,2,4
2,0.0033,0.0,0.0,0.0,0.0,-0.132,0.0,0.0,0.0,0.000031,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001300,3,7
3,0.0050,0.0,0.0,0.0,0.0,-0.200,0.0,0.0,0.0,0.000046,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001690,3,10
4,0.0072,0.0,0.0,0.0,0.0,-0.287,0.0,0.0,0.0,0.000067,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002197,3,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,980.9964,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,58.500000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76.501000,3,2201
561,1095.7473,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,60.100000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,114.750000,4,2205
562,1210.4982,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,61.600000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,114.750000,3,2208
563,1325.2491,0.0,0.0,0.0,0.0,-2400.000,0.0,0.0,0.0,63.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,114.750000,3,2211


In [180]:
result.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 565 entries, 0 to 564
Data columns (total 45 columns):
Time_T          565 non-null float64
CumQAP_LpT      565 non-null float64
CumQRP_LpT      565 non-null float64
CumQA_LpT       565 non-null float64
CumQR_LpT       565 non-null float64
CumQ3_L3pT      565 non-null float64
CumQ1_L3pT      565 non-null float64
CumQS_L3pT      565 non-null float64
CumQ5_L3pT      565 non-null float64
CumQ6_L3pT      565 non-null float64
CumQ7_L3pT      565 non-null float64
CumQ8_L3pT      565 non-null float64
CumQ9_L3pT      565 non-null float64
CRunOff_LpT     565 non-null float64
cEvapor_LpT     565 non-null float64
cInfiltr_LpT    565 non-null float64
hAtm_LpT        565 non-null float64
hRoot_LpT       565 non-null float64
hKode3_LpT      565 non-null float64
hKode1_LpT      565 non-null float64
hSeep_L3pT      565 non-null float64
hKode5_L3pT     565 non-null float64
hKode6_L3pT     565 non-null float64
hKode7_L3pT     565 non-null float64
hKode8_

In [182]:
result.describe()

Unnamed: 0,Time_T,CumQAP_LpT,CumQRP_LpT,CumQA_LpT,CumQR_LpT,CumQ3_L3pT,CumQ1_L3pT,CumQS_L3pT,CumQ5_L3pT,CumQ6_L3pT,...,vKode7_L3pT,vKode8_L3pT,vKode9_L3pT,RunOff_LpT,Evapor_LpT,Infiltr_LpT,SnowLayer_L,dt,Iter,ItCum
count,565.0,565.0,565.0,565.0,565.0,565.0,565.0,565.0,565.0,565.0,...,565.0,565.0,565.0,565.0,565.0,565.0,565.0,565.0,565.0,565.0
mean,57.399914,0.0,0.0,0.0,0.0,-1099.367483,0.0,0.0,0.0,2.579352,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.548664,3.918584,1114.474336
std,148.179961,0.0,0.0,0.0,0.0,954.269579,0.0,0.0,0.0,10.312379,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.251031,0.509232,651.585318
min,0.001,0.0,0.0,0.0,0.0,-2400.0,0.0,0.0,0.0,9e-06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001,2.0,2.0
25%,4.0906,0.0,0.0,0.0,0.0,-2240.0,0.0,0.0,0.0,0.0107,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066542,4.0,549.0
50%,21.0436,0.0,0.0,0.0,0.0,-842.0,0.0,0.0,0.0,0.0211,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.19005,4.0,1108.0
75%,55.9418,0.0,0.0,0.0,0.0,-164.0,0.0,0.0,0.0,0.0279,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.24706,4.0,1708.0
max,1440.0,0.0,0.0,0.0,0.0,-0.04,0.0,0.0,0.0,64.3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,114.75,6.0,2214.0
