### Process CLUSTER CDF files, extract necessary information, output as netCDF

In [2]:
import pandas as pd
import numpy as np
import cdflib
from datetime import datetime
import xarray as xr
import re
import os

### Loading variable and attribute info

In [3]:
dir_path = "./5vbf"
files = os.listdir(dir_path)

file_in = cdflib.CDF(os.path.join(dir_path, files[0]))

file_info = file_in.cdf_info()
zvarinfo = file_info.zVariables

znum = []
zvar = []
zdim = []
zfill = []
zdtype = []
zunits = []

for i in zvarinfo:
    ivar = file_in.varinq(i)
    znum.append(ivar.Num)
    zvar.append(ivar.Variable)
    zdim.append(ivar.Dim_Sizes)
    zdtype.append(ivar.Data_Type_Description)
    try:
        zfill.append(file_in.varattsget(i)['FILLVAL'])
        zunits.append(file_in.varattsget(i)['UNITS'])
    except KeyError:
        zfill.append('NaN')
        zunits.append('NaN')
        continue

pd.DataFrame({'zvar': zvar, 'zdim': zdim, 'ztype': zdtype, 'zfill': zfill, 'zunits': zunits})

Unnamed: 0,zvar,zdim,ztype,zfill,zunits
0,time_tags__C2_CP_FGM_5VPS,[],CDF_EPOCH,315569519999000.0,ms
1,half_interval__C2_CP_FGM_5VPS,[],CDF_FLOAT,-1.0000000150474662e+30,s
2,B_vec_xyz_gse__C2_CP_FGM_5VPS,[3],CDF_FLOAT,-1.0000000150474662e+30,nT
3,B_mag__C2_CP_FGM_5VPS,[],CDF_FLOAT,-1.0000000150474662e+30,nT
4,sc_pos_xyz_gse__C2_CP_FGM_5VPS,[3],CDF_FLOAT,-1.0000000150474662e+30,km
5,range__C2_CP_FGM_5VPS,[],CDF_INT4,-9.0,Unitless
6,tm__C2_CP_FGM_5VPS,[],CDF_INT4,-99.0,Unitless
7,B_vec_xyz_gse__C2_CP_FGM_5VPS_REPRESENTATION_1,[3],CDF_CHAR,,
8,B_vec_xyz_gse__C2_CP_FGM_5VPS_LABEL_1,[3],CDF_CHAR,,
9,sc_pos_xyz_gse__C2_CP_FGM_5VPS_REPRESENTATION_1,[3],CDF_CHAR,,


In [4]:
%%time

dt = np.zeros((1,), dtype='object')
bvec = np.zeros((1,3), dtype='object')
bmag = np.zeros((1,), dtype='object')
posgse = np.zeros((1,3), dtype='object')

for file in files:
    if file.endswith(".cdf"):
        file_path = os.path.join(dir_path, file)
        data_store = cdflib.CDF(file_path)
        time_store = data_store[zvar[0]]
        dt_store = pd.to_datetime(cdflib.cdfepoch.to_datetime(time_store))
        dt = np.append(dt, dt_store, axis=0)
        bvec_store = data_store[zvar[2]]
        bvec = np.append(bvec, bvec_store, axis=0)
        bmag_store = data_store[zvar[3]]
        bmag = np.append(bmag, bmag_store, axis=0)
        posgse_store = data_store[zvar[4]]
        posgse = np.append(posgse, posgse_store, axis=0)

dt = dt[1:]
dt[dt == float(zfill[0])] = 'NaN'
bvec[bvec == float(zfill[2])] = 'NaN'
bmag[bmag == float(zfill[3])] = 'NaN'
posgse[posgse == float(zfill[4])] = 'NaN'

data = np.hstack((bvec, bmag, posgse))[1:]

out = xr.DataArray(data = data, dims = ("t","x"), coords = {"t": dt, "x": ['b_x', 'b_y', 'b_z', 'b_mag', 'pos_x', 'pos_y', 'pos_z']}, name = 'value')
out.to_netcdf('fgm_data.nc')

CPU times: total: 1h 34min 18s
Wall time: 1h 53min 54s
