In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import re
import glob
import sys

In [2]:
# Create a dataarray for each species
species = np.loadtxt("species.txt", dtype='str')

files = glob.glob("Species/*")
files_string = '|'.join(files)                
print(files_string)

zeros = np.full([36, 72, 9, 12, 243], np.nan)

da = xr.DataArray(zeros, coords={
    "latitude": np.arange(-90, 90, 5),
    "longitude": np.arange(-180, 180, 5),
    "level": [962, 861, 759, 658, 556, 454, 353, 251, 150.5],
    "month": np.arange(0,12),
    "species": species,
})

print(da)


# Create numpy array that encapsulates all data for a particular species (month and level indexed)
for s in species:
    print(s)
    
    for month in np.arange(0, 12):
        for lvl_idx, level in enumerate([962, 861, 759, 658, 556, 454, 353, 251, 150.5]):
            if re.search("Species/" + s, files_string):
                da.loc[:,:,level, month, s] = np.loadtxt("Species/" + s + "/" + s + "_MONTH_" + str(month + 1) + "_LEVEL_" + str(lvl_idx + 1) + ".csv", delimiter=",")
            else:
                print("No files found for " + s)


Species/NPROPOL|Species/RU14OOH|Species/DHPR12OOH|Species/BPINENE|Species/RN15AO2|Species/RN16O2|Species/NRTX28OOH|Species/MEK|Species/UDCARB14|Species/HOC2H4OOH|Species/RA19OOH|Species/TDICLETH|Species/RTN24OOH|Species/RN10NO3|Species/NRU14O2|Species/C2H5CHO|Species/CCARB12|Species/RA13NO3|Species/RN13AO2|Species/RN9NO3|Species/RU12NO3|Species/CH3CL|Species/IPROPOL|Species/UCARB10|Species/SA|Species/HOC2H4NO3|Species/TOLUENE|Species/RN18AO2|Species/NRU12O2|Species/HCOOH|Species/RTX22O2|Species/SO2|Species/P3612|Species/UCARB12|Species/RN14OOH|Species/CH3CCL3|Species/NO2|Species/CH3CO2H|Species/P2635|Species/HOCH2CO3|Species/IEPOX|Species/C2H2|Species/P2630|Species/UDCARB11|Species/OH|Species/CH3COCH3|Species/UDCARB8|Species/RAROH17|Species/RN8OOH|Species/RN13O2|Species/P3613|Species/NRN6OOH|Species/C2H5CO3H|Species/CARB16|Species/O1D|Species/RU10O2|Species/HONO|Species/P2604|Species/AROH17|Species/HO2|Species/NRTN28O2|Species/CARB14|Species/RTX28O2|Species/TNCARB10|Species/DHCARB9|Spe

In [3]:
da.sel(species="CO", month=11, level=962)

In [4]:
has_nans = xr.where(da.isnull(), True, False)

In [5]:
# Print the result
print(f"Does the DataArray have NaN values? {has_nans.any().values}")

Does the DataArray have NaN values? False


In [6]:
da.to_netcdf('species.nc')

In [7]:
sys.getsizeof(da)

96