In [None]:
# default_exp nc4_convert

# NASA NC4 Loader

This module allow you to :
- load Level 2 NC4 files from disk
- read the datas with netCDF4 library
- save them to CSV

In a recursive way.

In [None]:
#hide
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
#%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
#export
import os
import glob
import re
import sys
from netCDF4 import Dataset
import numpy as np
import pandas as pd
from tqdm import tqdm

## Get the content of a directory
A filter allow you to get only some file. Useful to process only a month for example.

In [None]:
#export
def get_file_list(nc4_directory, pattern='*.nc4'):
    nc4_directory = os.path.realpath(nc4_directory)
    #print(nc4_directory)
    return glob.glob(nc4_directory + '/' + pattern)

In [None]:
last_nc4 = get_file_list('/media/NAS-Divers/dev/datasets/OCO2/nc4-v10/')[-1]
last_nc4

'/media/NAS-Divers/dev/datasets/OCO2/nc4-v10/oco2_LtCO2_20200407_B10offline.nc4'

## Open the file with netCDF4

In [None]:
#export
def get_nc4_raw_content(one_file):
    return Dataset(one_file, 'r')

This allow you to get info from the file by yourself.

In [None]:
get_nc4_raw_content(last_nc4).groups['Sounding']

<class 'netCDF4._netCDF4.Group'>
group /Sounding:
    dimensions(sizes): 
    variables(dimensions): float32 solar_azimuth_angle(sounding_id), float32 sensor_azimuth_angle(sounding_id), float32 polarization_angle(sounding_id), int8 land_fraction(sounding_id), float32 glint_angle(sounding_id), float32 airmass(sounding_id), float32 snr_o2a(sounding_id), float32 snr_wco2(sounding_id), float32 snr_sco2(sounding_id), int32 l1b_type(sounding_id), int8 operation_mode(sounding_id), int32 orbit(sounding_id), uint8 path(sounding_id), int8 footprint(sounding_id), int8 land_water_indicator(sounding_id), float32 altitude(sounding_id), float32 altitude_stddev(sounding_id)
    groups: 

To get the version of the NASA pipeline :

In [None]:
get_nc4_raw_content(last_nc4).BuildId

'B10.0.04'

## Extract the useful informations

In [None]:
#export
columns=['flag','sounding_id', 'latitude', 'longitude', 'xco2', 'xco2_uncert', 'orbit', 'windspeed_u', 'windspeed_v',
    'surface_pressure_apriori', 'surface_pressure', 'altitude', 'land_water_indicator', 'land_fraction']
def get_np_table(one_file):
    # Open the file
    try:
        file_nc = Dataset(one_file, 'r')
    except:
        print('ERROR reading', one_file)
        return np.empty((0,len(columns)))
    # Documentation of data : https://docserver.gesdisc.eosdis.nasa.gov/public/project/OCO/OCO2_DUG.V9.pdf
    #print(file_nc)
    if file_nc.BuildId[0:3] == 'B10' :
        np_table = np.column_stack((file_nc.variables['xco2_quality_flag'],file_nc.variables['sounding_id'],file_nc.variables['latitude'],file_nc.variables['longitude'],
            file_nc.variables['xco2'],file_nc.variables['xco2_uncertainty'],file_nc.groups['Sounding'].variables['orbit'], file_nc.groups['Meteorology']['windspeed_u_met'], file_nc.groups['Meteorology']['windspeed_v_met'],
            file_nc.groups['Meteorology']['psurf_apriori_o2a'], file_nc.groups['Retrieval']['psurf'], file_nc.groups['Sounding']['altitude'], file_nc.groups['Sounding']['land_water_indicator'],
             file_nc.groups['Sounding']['land_fraction']))
    else:
        np_table = np.column_stack((file_nc.variables['xco2_quality_flag'],file_nc.variables['sounding_id'],file_nc.variables['latitude'],file_nc.variables['longitude'],
            file_nc.variables['xco2'],file_nc.variables['xco2_uncertainty'],file_nc.groups['Sounding'].variables['orbit'], file_nc.groups['Meteorology']['windspeed_u_met'], file_nc.groups['Meteorology']['windspeed_v_met'],
            file_nc.groups['Meteorology']['psurf_apriori'], file_nc.groups['Retrieval']['psurf'], file_nc.groups['Sounding']['altitude'], file_nc.groups['Sounding']['land_water_indicator'],
             file_nc.groups['Sounding']['land_fraction']))
    return np_table

In [None]:
np_table =  get_np_table(last_nc4)
np_table.shape

(202617, 14)

## Convert them to Pandas
This method take a list of file in argument, and process them all in one Pandas Dataframe.

In [None]:
#export
def get_dataframe(nc4_list):
    columns=['flag','sounding_id', 'latitude', 'longitude', 'xco2', 'xco2_uncert', 'orbit', 'windspeed_u', 'windspeed_v',
    'surface_pressure_apriori', 'surface_pressure', 'altitude', 'land_water_indicator', 'land_fraction']
    month_data = np.empty((0,len(columns)))
    # Loop over the files
    for one_file in tqdm(nc4_list):
        np_table = get_np_table(one_file)
        month_data = np.concatenate((month_data, np_table), axis=0)
    if(month_data.size == 0):
        return pd.DataFrame(columns=columns)
    
    df = pd.DataFrame(month_data, columns=columns)
    # using dictionary to convert specific columns (https://www.geeksforgeeks.org/change-data-type-for-one-or-more-columns-in-pandas-dataframe/)
    convert_dict = {'sounding_id': int, 
                    'orbit': int
                } 
    df = df.astype(convert_dict) 
    # Remove bad quality
    df=df[df['flag']==0]
    # Remove flag
    df.drop(['flag'], axis=1, inplace=True)
    return df

In [None]:
df = get_dataframe([last_nc4])
df.head(3)

100%|██████████| 1/1 [00:00<00:00, 13.03it/s]


Unnamed: 0,sounding_id,latitude,longitude,xco2,xco2_uncert,orbit,windspeed_u,windspeed_v,surface_pressure_apriori,surface_pressure,altitude,land_water_indicator,land_fraction
7,2020040700492638,-42.680946,-163.926727,409.077057,0.494898,30665,-8.192998,-1.161757,1005.479065,996.547852,0.0,1.0,0.0
13,2020040700492703,-42.659702,-163.818771,409.052002,0.489938,30665,-7.951118,-1.333183,1005.495911,996.65625,0.0,1.0,0.0
16,2020040700492707,-42.648354,-163.912247,408.720642,0.471686,30665,-8.057251,-1.286553,1005.430115,997.372009,0.0,1.0,0.0


## Do everything with two line of code

In [None]:
# export
def get_pattern_yearmonth():
    '''
    Generate a list of every YYMM from 2014 to 2020
    '''
    years_months = []
    for year in range(14, 20+1):
        for month in range(1,12+1):
            years_months.append(str(year)+str(month).zfill(2))
    return years_months

def process_files(input_dir, output_dir, patterns):
    '''
    Process all NC4 file corresponding to the patterns list.
    '''
    for pattern in patterns:
        print('Loading ', pattern)
        # Get the file list in directory
        nc4_list = get_file_list(input_dir, pattern='*'+pattern+"*.nc4")
        df = get_dataframe(nc4_list)
        print('Saving to disk...')
        df.to_csv(output_dir + 'oco2_'+pattern+'.csv', sep=';', index=False)
        del df

In [None]:
years_months = get_pattern_yearmonth()
print(years_months)
input_dir = r'/media/NAS-Divers/dev/datasets/OCO2/nc4-v10/'
output_dir = r'../../../datasets/OCO2/csv-v10/'
# Replace ['1409'] by years_months to process every files.
process_files(input_dir, output_dir, ['1409'])

  0%|          | 0/21 [00:00<?, ?it/s]

['1401', '1402', '1403', '1404', '1405', '1406', '1407', '1408', '1409', '1410', '1411', '1412', '1501', '1502', '1503', '1504', '1505', '1506', '1507', '1508', '1509', '1510', '1511', '1512', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1609', '1610', '1611', '1612', '1701', '1702', '1703', '1704', '1705', '1706', '1707', '1708', '1709', '1710', '1711', '1712', '1801', '1802', '1803', '1804', '1805', '1806', '1807', '1808', '1809', '1810', '1811', '1812', '1901', '1902', '1903', '1904', '1905', '1906', '1907', '1908', '1909', '1910', '1911', '1912', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012']
Loading  1409


100%|██████████| 21/21 [00:06<00:00,  3.38it/s]


Saving to disk...


In [None]:
from nbdev.export import *
notebook2script()

Converted 03_25_OCO2_Data_Exploration.ipynb.
Converted 04_01_OCO2_Work_Base.ipynb.
Converted 04_04_OCO2_China_Peaks.ipynb.
Converted 04_15_OCO2_Laiwu_Peak_Detection.ipynb.
Converted CO2_emissions_Inventory_data.ipynb.
Converted Find_Peaks_with_LSTM_autoencoders.ipynb.
Converted Laiwu_Plume-more_data.ipynb.
Converted Laiwu_Plume-more_data_CD_exploration_selection_peaks.ipynb.
Converted Laiwu_Plume.ipynb.
Converted WIP_OCO2_Capture.ipynb.
Converted WIP_OCO2_Peaks_Wind.ipynb.
Converted WIP_OCO2_Peaks_Wind_Visualization.ipynb.
Converted index.ipynb.
Converted oco2peak-datasets.ipynb.
Converted oco2peak-find_peak.ipynb.
Converted oco2peak-map.ipynb.
Converted oco2peak-nc4_convert.ipynb.
Converted oco2peak-swift_utils.ipynb.
Converted show_map.ipynb.
Converted view_peak.ipynb.
