In [10]:
import glob
import os
import pandas as pd
from collections import defaultdict
import netCDF4
import re

### Initialize nested dictionary

In [11]:
nested_dict = lambda: defaultdict(nested_dict)
var_dict = nested_dict()

### Control runs

In [12]:
ctrl_dir = '/g/data1/v14/coupled_model/'

ctrl_vers = ['v0','v1','v2','v3'] #next(os.walk(ctrl_dir))[1]
ext = '/OUTPUT/'

for idx, ctrl_ver in enumerate(ctrl_vers):
    path = ctrl_dir + ctrl_vers[idx] + ext
    os.chdir(path)
    files = glob.glob('*.nc')

    # Find files with numbers at the end, truncated the numbers and include in table -----
    realm_freqs = []
    for file in files:
        ind_str = re.search('\d', file)
        if ind_str is not None:
            realm_freqs.append(file[:ind_str.start()-1])
    
    realm_freqs = list(set(realm_freqs))
    
    # Fill dictionary -----
    for realm_freq in realm_freqs:
        # Load a single file starting with specified string -----
        file = glob.glob(realm_freq + '*.nc')[0]
        try:
            variables = list(netCDF4.Dataset(file).variables.keys())
        except PermissionError:
            variables = ['No permission']
        var_dict['Control runs'][ctrl_vers[idx]][realm_freq]['variables'] = ', '.join(variables)

### Forecast runs

In [13]:
fcst_dir = '/g/data1/v14/forecast/'

fcst_vers = ['v0','v1'] #next(os.walk(fcst_dir))[1]
ext = '/yr2010/mn1/OUTPUT.1/'

for idx, fcst_ver in enumerate(fcst_vers):
    path = fcst_dir + fcst_vers[idx] + ext
    os.chdir(path)
    files = glob.glob('*.nc')

    # Find files with numbers at the end, truncated the numbers and include in table -----
    realm_freqs = []
    for file in files:
        ind_str = re.search('\d', file)
        if ind_str is not None:
            realm_freqs.append(file[:ind_str.start()-1])
    
    realm_freqs = list(set(realm_freqs))
    
    # Fill dictionary -----
    for realm_freq in realm_freqs:
        # Load a single file starting with specified string -----
        file = glob.glob(realm_freq + '*.nc')[0]
        try:
            variables = list(netCDF4.Dataset(file).variables.keys())
        except PermissionError:
            variables = ['No permission']
        var_dict['Forecast runs'][fcst_vers[idx]][realm_freq]['variables'] = ', '.join(variables)

### Build into pd.DataFrame

In [34]:
from IPython.display import display, HTML

table = pd.DataFrame.from_dict({(i,j,k): var_dict[i][j][k]
                           for i in var_dict.keys() 
                           for j in var_dict[i].keys()
                           for k in var_dict[i][j].keys()},
                       orient='index')
table

Unnamed: 0,Unnamed: 1,Unnamed: 2,variables
Control runs,v0,atmos_annual,"lon, lat, phalf, pfull, time, time_bnds, drag_..."
Control runs,v0,atmos_daily,"lon, lonb, lat, latb, time, nv, t_ref, u_ref, ..."
Control runs,v0,atmos_month,"lon, lonb, lat, latb, time, nv, phalf, pfull, ..."
Control runs,v0,ice_month,"xt, xb, yt, yb, time, nv, ct, xv, yv, CELL_ARE..."
Control runs,v0,land_month,"lon, lonb, lat, latb, time, nv, scalar_axis, z..."
Control runs,v0,ocean_bgc_month,"xt_ocean, yt_ocean, st_ocean, st_edges_ocean, ..."
Control runs,v0,ocean_daily,"xu_ocean, yu_ocean, time, nv, xt_ocean, yt_oce..."
Control runs,v0,ocean_instant,"xt_ocean, yt_ocean, st_ocean, st_edges_ocean, ..."
Control runs,v0,ocean_month,"xt_ocean, yt_ocean, time, nv, xu_ocean, yu_oce..."
Control runs,v0,ocean_scalar,"scalar_axis, time, nv, total_ocean_river, tota..."


In [33]:
os.chdir('/home/599/ds0092/Documents/pylatte/support')
table.to_csv('CAFE_run_variables.csv')