In [1]:
station_file = "/net/babaracus/home/benr/wqmodels/SSMC_Share/SSM_2014_v2.7hyak_chk/WQM/SSM_2014_DO_Ph_T52/outputs/ssm_station.out"
dest = "model_results/stations.csv.gz"

water_vars = 52
bottom_vars = 104

from collections import deque
import numpy as np
import pandas as pd
from IPython.display import clear_output

In [2]:
with open(station_file) as fp:
    # The first line is just a label
    next(fp)
    nstation, nlayer = np.loadtxt([next(fp)]).astype(int)
    #print("Nstation", nstation)
    #print("Nlayer", nlayer)
    
    def get_list_of_variable_names_from_line(line):
        return line.replace("Variables=", "").replace("\"", "").rstrip().split(",")
    variables_list = get_list_of_variable_names_from_line(next(fp))
    #print(variables_list)
    variables_list.insert(0, "Time")

    data = {}
    for v in variables_list:
        data[v] = []
    times = []

    def read_block(varct, t):
        block = []
        for i, v in enumerate(variables_list):
            if v == 'Time':
                data[v].append(t)
                continue
            # The extra three is for the station, node, and layer
            if i >= varct + 3:
                # Fill in empty data that's not applicable to this layer
                data[v].append(np.nan)
                continue
            if len(block) == 0:
                block = deque(np.genfromtxt([next(fp)], missing_values='*************'))
            data[v].append(block.popleft())

    try:
        while True:
            # Read the number of stations/layers and the time
            istation, ilayers, t = np.loadtxt([next(fp)])
            istation = int(istation)
            ilayers = int(ilayers)
            times.append(t)
            clear_output(wait = True)
            print("TIME", t)
            for s in range(istation):
                for l in range(ilayers-1):
                    read_block(water_vars, t)
                read_block(bottom_vars, t)
    except StopIteration:
        pass

TIME 6.75


In [3]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Time,StationID,Node,Layer,depth(m),DO,NO3,NH4,Alg1,Alg2,...,POC23,PON21,PON22,PON23,POP21,POP22,POP23,POS2,H1,BEN_STR
0,0.000463,1.0,6151.0,1.0,0.323122,9.74991,0.442787,0.018428,0.003604,0.003,...,,,,,,,,,,
1,0.000463,1.0,6151.0,2.0,1.23705,9.71216,0.441843,0.018393,0.003599,0.003,...,,,,,,,,,,
2,0.000463,1.0,6151.0,3.0,2.59292,9.39384,0.438833,0.0164,0.0035,0.003,...,,,,,,,,,,
3,0.000463,1.0,6151.0,4.0,4.26397,8.84907,0.434198,0.013402,0.003201,0.003,...,,,,,,,,,,
4,0.000463,1.0,6151.0,5.0,6.19759,8.03486,0.429173,0.011206,0.003002,0.003,...,,,,,,,,,,


In [4]:
# Extract the station node IDs to a separate dataframe
station_nodes = df[["StationID","Node"]].groupby('StationID')["Node"].first()
del df['StationID']
station_nodes.head()

StationID
1.0     6151.0
2.0     7786.0
3.0    11793.0
4.0     4040.0
5.0     5112.0
Name: Node, dtype: float64

In [5]:
# Fix dtype for node and layer
df['Node'] = df['Node'].astype(int)
df['Layer'] = df['Layer'].astype(int)
# Build the MultiIndex for time/node/layer
mi = pd.MultiIndex.from_frame(df[["Time","Node","Layer"]])
del df['Time']
del df['Node']
del df['Layer']
df.set_index(mi, inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,depth(m),DO,NO3,NH4,Alg1,Alg2,LDOC,RDOC,LPOC,RPOC,...,POC23,PON21,PON22,PON23,POP21,POP22,POP23,POS2,H1,BEN_STR
Time,Node,Layer,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0.000463,6151,1,0.323122,9.74991,0.442787,0.018428,0.003604,0.003,0.289448,0.25179,0.0002,0.000798,...,,,,,,,,,,
0.000463,6151,2,1.23705,9.71216,0.441843,0.018393,0.003599,0.003,0.288896,0.25131,0.0002,0.0008,...,,,,,,,,,,
0.000463,6151,3,2.59292,9.39384,0.438833,0.0164,0.0035,0.003,0.285019,0.247718,0.0004,0.0014,...,,,,,,,,,,
0.000463,6151,4,4.26397,8.84907,0.434198,0.013402,0.003201,0.003,0.281161,0.244453,0.0007,0.0023,...,,,,,,,,,,
0.000463,6151,5,6.19759,8.03486,0.429173,0.011206,0.003002,0.003,0.274869,0.237044,0.001201,0.003903,...,,,,,,,,,,


Save all of the output to a CSV, compressed.
There is probably too much data for Excel to handle it as one gigantic file; see below if you want to get a spreadsheet of just one variable for one station.

In [6]:
df.to_csv(dest, compression="gzip")

Example of how to save one station's output to Excel

In [7]:
node_to_save = 40
var_to_save = "DO"

# All times, just this station
#      |         /  all layers
#      |        /       |   just this variable
#      |       /        |         /
df.loc[:, node_to_save, :][var_to_save]

Time      Layer
0.000463  1        8.88914
          2        8.04703
          3        7.08813
          4        5.59414
          5        5.03795
                    ...   
6.750000  6        4.34445
          7        4.30333
          8        4.24084
          9        4.11616
          10       3.96165
Name: DO, Length: 280, dtype: float64

In [8]:
df.loc[:, node_to_save, :][var_to_save].to_excel("notebook_outs/node40_do.xlsx")

Grab the state variable output from all nodes in a single layer. The `dropna` call ensures the DataFrame will have no columns if a bottom-layer-only state variable is requested.

In [9]:
df.loc[:, :, 1]["DO"].unstack(level=1).dropna(axis=1, how='all')

Node,40,1424,4040,5112,5308,6151,6231,7294,7786,7796,...,11959,12166,13264,13789,14271,14885,15199,15490,15903,15967
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.000463,8.88914,9.14636,8.63955,9.18824,9.07107,9.74991,8.15863,9.64832,9.64691,10.7043,...,10.0591,9.65705,9.78704,10.4212,9.41646,9.84068,9.55779,10.7193,10.2573,10.7892
0.25,8.8713,9.07574,8.53041,8.74934,9.89064,9.37986,9.32571,9.57541,9.53682,10.2321,...,9.86172,9.53881,9.82993,10.6173,9.54964,9.63023,9.29671,10.4173,10.9967,10.8698
0.5,8.63889,8.85504,7.79561,8.52855,9.47001,9.95057,7.77888,9.45214,9.1634,9.52499,...,10.3325,9.45617,9.9114,9.51471,9.36187,9.50742,9.30183,10.1812,9.91834,10.4818
0.75,8.71468,9.03552,7.76934,9.0353,8.25365,10.3327,9.01649,9.6303,9.06807,7.92512,...,10.4671,9.40975,9.89702,9.11084,9.36636,9.18367,9.49366,10.5887,9.39015,10.3364
1.0,8.6193,8.90543,7.55615,9.15116,8.30097,10.0613,9.01233,9.5115,9.26795,9.14635,...,10.457,9.31066,9.78804,8.38609,9.46486,9.2895,9.54329,10.3873,9.67648,10.4139
1.25,8.35042,8.61955,7.28585,8.50084,7.49276,9.65071,7.64612,9.55977,9.21367,8.83923,...,10.3275,9.22635,9.56974,8.50355,9.19299,8.90442,9.12326,9.95631,9.24615,9.15752
1.5,8.32351,8.02203,7.12841,8.54889,7.25524,8.74623,7.89922,9.46803,8.11143,8.30162,...,10.0767,8.94167,8.90907,8.21879,8.59956,8.81776,8.98322,9.4206,9.19993,9.55977
1.75,8.3427,8.17748,7.28917,9.39997,7.6743,8.49504,8.19422,9.16735,8.23364,9.43222,...,9.7353,8.94402,8.88661,8.95248,8.48288,8.64,8.93304,9.56404,9.46787,9.42363
2.0,8.38471,8.79249,7.58468,9.17462,7.76546,8.13759,8.64629,8.98421,8.10123,9.9225,...,9.65342,9.12748,9.14438,8.97913,8.38037,8.86621,9.17109,9.04909,9.87346,9.94183
2.25,8.3579,8.76385,8.20808,9.13733,8.14755,9.73276,8.24358,9.06128,8.77159,9.49875,...,9.57151,9.25375,9.69057,9.37932,8.72809,8.67545,8.94461,9.22267,9.40553,10.1719
