# pysat demo on creating netCDF4 files

In [1]:
import datetime as dt
import os

import pysat
import netCDF4

In [2]:
!pip list | grep pysat

pysat                         3.2.0
pysatCDAAC                    0.0.4
pysatCDF                      0.3.2
pysatMadrigal                 0.2.0
pysatMissions                 0.3.4
pysatNASA                     0.0.5
pysatSeasons                  0.2.0
pysatSpaceWeather             0.1.0


In [3]:
# Set data directory if user hasn't already set one
print(f"old: {pysat.params['data_dirs']}")
if len(pysat.params['data_dirs']) == 0 or pysat.params['data_dirs'] == ['.']:
    # Set a directory for pysat to use for data
    pysat.params['data_dirs'] = '/home/jovyan/scratch_space/.pysat/'
else:
    print('pysat directory has been set previously. Leaving unchanged.')

print(f"new: {pysat.params['data_dirs']}")

old: ['/home/jovyan/scratch_space/.pysat']
pysat directory has been set previously. Leaving unchanged.
new: ['/home/jovyan/scratch_space/.pysat']


In [4]:
filename = 'demo_test_file_{year:04d}{day:03d}.nc'
date = dt.datetime(2009, 1, 1)

In [5]:
# Instrument with variety of 1D variables
inst = pysat.Instrument('pysat', 'testing')

# Instrument with a variety of 1D and 2D variables
# inst = pysat.Instrument('pysat', 'testing2D')

# Instrument with xarray data, mixed data dimensipnality.
# inst = pysat.Instrument('pysat', 'testmodel')

# Instrument with xarray data, mixed data dimensipnality.
# inst = pysat.Instrument('pysat', 'testing2D_xarray')

In [6]:
# Load data
inst.load(date=date)

In [7]:
# Options when writing files.

# Translate metadata labels to something new.

# Metadata labels in the file may be different than used by Instrument object.
# Default behavior
meta_translation = None
inv_translation = None
export_nan = None

# # Map existing labels to multiple labels in the file

# Made up translation #1
# meta_translation = {inst.meta.labels.units: ['funny_units', 'serious_units'],
#                    inst.meta.labels.fill_val: ['funny_fill', 'fill_serious']}
# inv_translation = {'funny_units': inst.meta.labels.units,
#                    'serious_units': inst.meta.labels.units,
#                    'funny_fill': inst.meta.labels.fill_val,
#                    'fill_serious': inst.meta.labels.fill_val}


# Arbitrary processing of metadata is also supported when writing/loading files.
# See pysat documentation for more on the `meta_processor` keyword.


# Add additional metadata

# `new_label` will only appear in the file for 'mlt' since values for other variables are NaN. To include metadata
# with NaN values, use the `export_nan` keyword. It will, by default, include fill, and the min and max values.
# Note that adding a new metadata type to meta will also add it to `meta.labels`.

# drop_label = 'new_label'
# inst.meta['mlt'] = {drop_label: 1.}
# inst.meta.data

# export_nan = [inst.meta.labels.fill_val, inst.meta.labels.max_val,
#               inst.meta.labels.min_val, inst.meta.labels.drop_label]


# By setting the `drop_meta_labels` keyword, users can prevent metadata information from being loaded.

# Keep all metadata
drop_labels = []

# Drop newly added label
# drop_labels = ['new_label']

In [8]:
# Add custom information to `inst.meta.header` which is written to file.
inst.meta.header.demo_thang_pysat_style = 'Yes'

In [9]:

# Write file, but first, format filename.
form_filename = os.path.join(pysat.params['data_dirs'][0], inst.files.data_path, filename.format(year=inst.yr, day=inst.doy))
print(inst.files.data_path)

# Write file using `pysat.utils.io`.
pysat.utils.io.inst_to_netcdf(inst, form_filename, meta_translation=meta_translation, export_nan=export_nan)

/home/jovyan/scratch_space/.pysat/pysat/testing/


  pysat.utils.io.inst_to_netcdf(inst, form_filename, meta_translation=meta_translation, export_nan=export_nan)


In [10]:
# List global file attributes
with netCDF4.Dataset(form_filename) as data:
    print('Global File Attributes\n')
    for attr in data.ncattrs():
        print('\n', attr, ': ', data.getncattr(attr))

Global File Attributes


 demo_thang_pysat_style :  Yes

 new_thing :  1

 test_clean_kwarg :  

 test_init_kwarg :  

 test_preprocess_kwarg :  

 pysat_version :  3.2.0

 Conventions :  pysat-simplified SPDF ISTP/IACG for NetCDF

 Text_Supplement :  

 platform :  pysat

 name :  testing

 tag :  

 inst_id :  

 acknowledgements :  Test instruments provided through the pysat project. https://www.github.com/pysat/pysat

 references :  Stoneback, Russell, et al. (2023). pysat/pysat v3.1 (Version v3.1). Zenodo. http://doi.org/10.5281/zenodo.1199703


 Date_End :  Thu, 01 Jan 2009,  2009-01-01T23:59:59.000 UTC

 Date_Start :  Thu, 01 Jan 2009,  2009-01-01T00:00:00.000 UTC

 File :  ['/home/jovyan/scratch_space/.pysat/pysat/testing', 'demo_test_file_2009001.nc']

 File_Date :  Thu, 01 Jan 2009,  2009-01-01T23:59:59.000 UTC

 Generation_Date :  20240520

 Logical_File_ID :  demo_test_file_2009001


In [11]:
# List variable metadata
with netCDF4.Dataset(form_filename) as data:
    print('File Variable Attributes\n')
    for var in data.variables.keys():
        print('File Variable: ', var)
        print(''.join(['----------------','-'*len(var)]))
        
        for nc_key in data.variables[var].ncattrs():
            print(nc_key, ': ', data.variables[var].getncattr(nc_key))
            
        print('\n')


File Variable Attributes

File Variable:  Epoch
---------------------
units :  Milliseconds since 1970-1-1 00:00:00
long_name :  Epoch
calendar :  standard
Format :  i8
Var_Type :  data
Time_Base :  Milliseconds since 1970-1-1 00:00:00
Time_Scale :  UTC
MonoTon :  increase


File Variable:  uts
-------------------
units :  s
long_name :  Universal Time
notes :  
desc :  Number of seconds since mindight UT
value_min :  0.0
value_max :  86400.0
_FillValue :  nan
FillVal :  nan
fill :  nan
Depend_0 :  Epoch
Display_Type :  Time Series
Var_Type :  data
Format :  f8


File Variable:  mlt
-------------------
units :  hours
long_name :  Magnetic Local Time
notes :  
desc :  Local time at magnetic field line at equator.
value_min :  0.0
value_max :  24.0
_FillValue :  nan
FillVal :  nan
fill :  nan
Depend_0 :  Epoch
Display_Type :  Time Series
Var_Type :  data
Format :  f8


File Variable:  slt
-------------------
units :  hours
long_name :  Solar Local Time
notes :  Example of notes.
desc :  

In [12]:
# Load data and and meta using `pysat.utils`
data, meta = pysat.utils.io.load_netcdf(form_filename, pandas_format=inst.pandas_format, 
                                        meta_translation=inv_translation,
                                        drop_meta_labels=drop_labels)

# Print loaded metadata information to Jupyter.
meta.data

Unnamed: 0,units,long_name,notes,desc,value_min,value_max,fill
uts,s,Universal Time,,Number of seconds since mindight UT,0.0,86400.0,
mlt,hours,Magnetic Local Time,,Local time at magnetic field line at equator.,0.0,24.0,
slt,hours,Solar Local Time,Example of notes.,Mean solar time.,0.0,24.0,
longitude,degrees,Longitude,,Geographic Longitude,0.0,360.0,
latitude,degrees,Latitude,,Geographic Latituce,-90.0,90.0,
altitude,km,Altitude,,Height above mean Earth.,0.0,inf,
orbit_num,,Orbit Number,Number of orbits since the start of the missio...,Orbit Number,0.0,25000.0,-1.0
dummy1,,dummy1,,,0.0,24.0,-1.0
dummy2,,dummy2,,,0.0,24.0,-1.0
dummy3,,dummy3,,,0.0,24024.0,-1.0


In [13]:
# Print loaded data to Jupyter.
data

Unnamed: 0_level_0,altitude,dummy1,dummy2,dummy3,dummy4,int16_dummy,int32_dummy,int64_dummy,int8_dummy,latitude,longitude,mlt,orbit_num,slt,string_dummy,unicode_dummy,uts
Epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2009-01-01 00:00:00,400.0,0,0,0.0,0.0,1,1,1,1,90.000000,0.000000,0.000000,5433,0.082474,test,test,0.0
2009-01-01 00:00:01,400.0,0,0,0.0,1.0,1,1,1,1,89.999948,0.057692,0.004124,5433,0.086598,test,test,1.0
2009-01-01 00:00:02,400.0,0,0,0.0,2.0,1,1,1,1,89.999790,0.115385,0.008247,5433,0.090722,test,test,2.0
2009-01-01 00:00:03,400.0,0,0,0.0,3.0,1,1,1,1,89.999528,0.173077,0.012371,5433,0.094845,test,test,3.0
2009-01-01 00:00:04,400.0,0,0,0.0,4.0,1,1,1,1,89.999161,0.230769,0.016495,5433,0.098969,test,test,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2009-01-01 23:59:55,400.0,20,20,20020.0,86395.0,1,1,1,1,50.354209,304.326923,20.268041,5448,20.350515,test,test,86395.0
2009-01-01 23:59:56,400.0,20,20,20020.0,86396.0,1,1,1,1,50.434711,304.384615,20.272165,5448,20.354639,test,test,86396.0
2009-01-01 23:59:57,400.0,20,20,20020.0,86397.0,1,1,1,1,50.515155,304.442308,20.276289,5448,20.358763,test,test,86397.0
2009-01-01 23:59:58,400.0,20,20,20020.0,86398.0,1,1,1,1,50.595540,304.500000,20.280412,5448,20.362887,test,test,86398.0


In [14]:
# Load data using pysat.Instrument. Presumes that current `inst` created NetCDF file.
# First, define general pysat.Instrument.
load_inst = pysat.Instrument('pysat', 'netcdf', pandas_format=inst.pandas_format, 
                             update_files=True, file_format=filename, data_dir=inst.files.data_path,
                             meta_translation=inv_translation, drop_meta_labels=drop_labels)

# Load data.
load_inst.load(2009, 1)

# Print to Jupyter.
load_inst.data

Unnamed: 0_level_0,altitude,dummy1,dummy2,dummy3,dummy4,int16_dummy,int32_dummy,int64_dummy,int8_dummy,latitude,longitude,mlt,orbit_num,slt,string_dummy,unicode_dummy,uts
Epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2009-01-01 00:00:00,400.0,0,0,0.0,0.0,1,1,1,1,90.000000,0.000000,0.000000,5433,0.082474,test,test,0.0
2009-01-01 00:00:01,400.0,0,0,0.0,1.0,1,1,1,1,89.999948,0.057692,0.004124,5433,0.086598,test,test,1.0
2009-01-01 00:00:02,400.0,0,0,0.0,2.0,1,1,1,1,89.999790,0.115385,0.008247,5433,0.090722,test,test,2.0
2009-01-01 00:00:03,400.0,0,0,0.0,3.0,1,1,1,1,89.999528,0.173077,0.012371,5433,0.094845,test,test,3.0
2009-01-01 00:00:04,400.0,0,0,0.0,4.0,1,1,1,1,89.999161,0.230769,0.016495,5433,0.098969,test,test,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2009-01-01 23:59:55,400.0,20,20,20020.0,86395.0,1,1,1,1,50.354209,304.326923,20.268041,5448,20.350515,test,test,86395.0
2009-01-01 23:59:56,400.0,20,20,20020.0,86396.0,1,1,1,1,50.434711,304.384615,20.272165,5448,20.354639,test,test,86396.0
2009-01-01 23:59:57,400.0,20,20,20020.0,86397.0,1,1,1,1,50.515155,304.442308,20.276289,5448,20.358763,test,test,86397.0
2009-01-01 23:59:58,400.0,20,20,20020.0,86398.0,1,1,1,1,50.595540,304.500000,20.280412,5448,20.362887,test,test,86398.0


In [15]:
# Print metadata to Jupyter.
load_inst.meta.data

Unnamed: 0,units,long_name,notes,desc,value_min,value_max,fill
uts,s,Universal Time,,Number of seconds since mindight UT,0.0,86400.0,
mlt,hours,Magnetic Local Time,,Local time at magnetic field line at equator.,0.0,24.0,
slt,hours,Solar Local Time,Example of notes.,Mean solar time.,0.0,24.0,
longitude,degrees,Longitude,,Geographic Longitude,0.0,360.0,
latitude,degrees,Latitude,,Geographic Latituce,-90.0,90.0,
altitude,km,Altitude,,Height above mean Earth.,0.0,inf,
orbit_num,,Orbit Number,Number of orbits since the start of the missio...,Orbit Number,0.0,25000.0,-1.0
dummy1,,dummy1,,,0.0,24.0,-1.0
dummy2,,dummy2,,,0.0,24.0,-1.0
dummy3,,dummy3,,,0.0,24024.0,-1.0
