### Process CLUSTER CDF files, extract necessary information, output as netCDF

In [2]:
import pandas as pd
import numpy as np
import cdflib
from datetime import datetime
import xarray as xr
import re
import os

### Loading variable and attribute info

In [7]:
dir_path = "./5vbf"
files = os.listdir(dir_path)

file_in = cdflib.CDF(os.path.join(dir_path, files[0]))

file_info = file_in.cdf_info()
zvarinfo = file_info.zVariables

znum = []
zvar = []
zdim = []
zfill = []
zdtype = []

for i in zvarinfo:
    ivar = file_in.varinq(i)
    znum.append(ivar.Num)
    zvar.append(ivar.Variable)
    zdim.append(ivar.Dim_Sizes)
    zdtype.append(ivar.Data_Type_Description)
    try:
        zfill.append(file_in.varattsget(i)['FILLVAL'])
    except KeyError:
        zfill.append('NaN')
        continue

pd.DataFrame({'zvar': zvar, 'zdim': zdim, 'ztype': zdtype, 'zfill': zfill})

Unnamed: 0,zvar,zdim,ztype,zfill
0,time_tags__C2_CP_FGM_5VPS,[],CDF_EPOCH,315569519999000.0
1,half_interval__C2_CP_FGM_5VPS,[],CDF_FLOAT,-1.0000000150474662e+30
2,B_vec_xyz_gse__C2_CP_FGM_5VPS,[3],CDF_FLOAT,-1.0000000150474662e+30
3,B_mag__C2_CP_FGM_5VPS,[],CDF_FLOAT,-1.0000000150474662e+30
4,sc_pos_xyz_gse__C2_CP_FGM_5VPS,[3],CDF_FLOAT,-1.0000000150474662e+30
5,range__C2_CP_FGM_5VPS,[],CDF_INT4,-9.0
6,tm__C2_CP_FGM_5VPS,[],CDF_INT4,-99.0
7,B_vec_xyz_gse__C2_CP_FGM_5VPS_REPRESENTATION_1,[3],CDF_CHAR,
8,B_vec_xyz_gse__C2_CP_FGM_5VPS_LABEL_1,[3],CDF_CHAR,
9,sc_pos_xyz_gse__C2_CP_FGM_5VPS_REPRESENTATION_1,[3],CDF_CHAR,


Need to change data['example-z-var']

In [4]:
%%time

bvec = np.zeros((1,3), dtype='object')
dt = np.zeros((1,), dtype='object')

i = 0
for file in files:
    if file.endswith(".cdf"):
        file_path = os.path.join(dir_path, file)
        data_store = cdflib.CDF(file_path)
        time_store = data_store[zvar[0]]
        dt_store = pd.to_datetime(cdflib.cdfepoch.to_datetime(time_store))
        dt = np.append(dt, dt_store, axis=0)
        bvec_store = data_store[zvar[2]]
        bvec = np.append(bvec, bvec_store, axis=0)

dt = dt[1:]
dt[dt == float(zfill[0])] = 'NaN'

bvec = bvec[1:]
bvec[bvec == float(zfill[2])] = 'NaN'

out = xr.DataArray(data = bvec, dims = ("t","x"), coords = {"t": dt, "x": ['x', 'y', 'z']}, name = 'bvec')
out.to_netcdf('fgm_bvec.nc')

CPU times: total: 1h 34min 18s
Wall time: 1h 53min 54s
