# Cleaning IMPACTS P-3 aircraft meteorological/navigation data

##### The following read routine is adapted from the IMPACTS Github repository (https://github.com/joefinlon/impacts_tools.git), written for the IMPACTS P-3 meteorological/navigation datasets. Unpacking and cleaning of the dataset are included in the read routine, where the output is an xarray.Dataset. This notebooks saves the xarray.Dataset to data/clean/ as a NetCDF file. 

In [1]:
# Import necessary packages
import glob
import xarray as xr
import numpy as np
import pandas as pd

In [2]:
class P3():
    """
    A class to represent the P-3 aircraft during the IMPACTS field campaign.
    """

    def __init__(self, filepath, date, start_time=None, end_time=None, tres='1S', fmt='ames'):
        self.name = 'P-3 Met-Nav'
        
        # read the raw data
        self.data = self.readfile(filepath, date, start_time, end_time, tres, fmt)
        """
        xarray.Dataset of P-3 meteorological and navigation variables and attributes
        Dimensions:
            - time: np.array(np.datetime64[ns]) - The UTC time stamp
        Coordinates:
            - time (time): np.array(np.datetime64[ns]) - The UTC time stamp
        Variables:
            - lat (time): xarray.DataArray(float) - Latitude (degrees)
            - lon (time): xarray.DataArray(float) - Longitude (degrees)
            - alt_gps (time) : xarray.DataArray(float) - Aircraft GPS altitude (m above mean sea level)
            - alt_pres (time) : xarray.DataArray(float) - Aircraft pressure altitude (ft)
            - alt_radar (time) : xarray.DataArray(float) - Aircraft radar altitude (ft)
            - grnd_spd (time) : xarray.DataArray(float) - Aircraft ground speed (m/s)
            - tas (time) : xarray.DataArray(float) - Aircraft true air speed (m/s)
            - ias (time) : xarray.DataArray(float) - Aircraft indicated air speed (m/s)
            - mach (time) : xarray.DataArray(float) - Aircraft mach number
            - zvel_p3 (time) : xarray.-DataArray(float) - Aircraft vertical speed (m/s)
            - heading (time) : xarray.DataArray(float) - Aircraft true heading (deg clockwise from +y)
            - track (time) : xarray.DataArray(float) - Aircraft track angle (deg clockwise from +y)
            - drift (time) : xarray.DataArray(float) - Aircraft drift angle (deg clockwise from +y)
            - pitch (time) : xarray.DataArray(float) - Aircraft pitch angle (deg, positive is up)
            - roll (time) : xarray.DataArray(float) - Aircraft roll angle (deg, positive is right turn)
            - temp (time) : xarray.DataArray(float) - Static (ambient) air temperature (deg C)
            - temp_total (time) : xarray.DataArray(float) - Total air temperature (deg C, static and dynamic)
            - temp_ir (time) : xarray.DataArray(float) - Infrared surface temperature (deg C)
            - temp_pot (time) : xarray.DataArray(float) - Potential temperature (K)
            - dwpt (time) : xarray.DataArray(float) - Dew point temperature (deg C)
            - pres_static (time) : xarray.DataArray(float) - Static air pressure (hPa)
            - pres_cabin (time) : xarray.DataArray(float) - Cabin air pressure (hPa)
            - wspd (time) : xarray.DataArray(float) - Horizontal wind speed (m/s, limited to where roll <= 5 degrees)
            - wdir (time) : xarray.DataArray(float) - Horizontal wind direction (deg clockwise from +y)
            - uwnd (time) : xarray.DataArray(float) - Horizontal U-component wind speed (m/s, not available in 2020 data)
            - vwnd (time) : xarray.DataArray(float) - Horizontal V-component wind speed (m/s, not available in 2020 data)
            - mixrat (time) : xarray.DataArray(float) - Mixing ratio (g/kg)
            - pres_vapor (time) : xarray.DataArray(float) - Partial pressure (hPa) with respect to water vapor
            - svp_h2o (time) : xarray.DataArray(float) - Saturation vapor pressure (hPa) with respect to water
            - svp_ice (time) : xarray.DataArray(float) - Saturation vapor pressure (hPa) with respect to ice
            - rh (time) : xarray.DataArray(float) - Relative humidity with respect to water (percent)
            - zenith (time) : xarray.DataArray(float) - Solar zenith angle (deg)
            - sun_elev_p3 (time) : xarray.DataArray(float) - Aircraft sun elevation (deg)
            - sun_az (time) : xarray.DataArray(float) - Sun azimuth (deg)
            - sun_az_p3 (time) : xarray.DataArray(float) - Aircraft sun azimuth (deg)
            
        Attribute Information:
            [TEXT]
        """


    def parse_header(self, f, date):
        '''
        NLHEAD : Number of header lines
        FFI : NASA AMES FFI format number
        ONAME : Originator/PI Name
        ORG : Name of organization
        SNAME : Instrument/platform name
        MNAME : Project/mission name
        IVOL : Current volume number (almost always 1)
        NVOL : Number of volumes for data (almost always 1)
        DATE : YYYY MM DD UTC begin date
        RDATE : Reduction/revision UTC date
        DX : Interval between successive values (data rate)
        XNAME : Name/Description of DX variable above
        NV : Number of primary variables in file
        VSCL : Scaling factor for each variable column
        VMISS : Missing value for each variable column
        VNAME : Name of first variable
        NSCOML : Number of special comment lines within header
        SCOM : Special comments about file/data, etc.
        NNCOML : Number of normal comment lines within header
        NCOM : Normal comments
        '''
        hdr = {}
        hdr['NLHEAD'], hdr['FFI'] = f.readline().rstrip('\n').split(',')
        hdr['NLHEAD'] = int(hdr['NLHEAD'])

        # Check that the file is indeed NASA AMES 1001
        if hdr['FFI'].replace(' ', '') != '1001':
            print("Check file type, looks like it's not FFI 1001")
            return

        hdr['ONAME'] = f.readline().rstrip('\n')
        hdr['ORG'] = f.readline().rstrip('\n')
        hdr['SNAME'] = f.readline().rstrip('\n')
        hdr['MNAME'] = f.readline().rstrip('\n')
        hdr['IVOL'], hdr['NVOL'] = f.readline().rstrip('\n').split(',')
        yy1, mm1, dd1, yy2, mm2, dd2 = f.readline().split(',')
        hdr['DATE'] = (int(yy1), int(mm1), int(dd1))
        hdr['RDATE'] = (int(yy2), int(mm2), int(dd2))
        hdr['DX'] = f.readline().rstrip('\n')
        hdr['XNAME'] = f.readline().rstrip('\n')
        hdr['NV'] = int(f.readline().rstrip('\n'))
        vscl = f.readline().split(',')
        hdr['VSCAL'] = [float(x) for x in vscl]
        vmiss = f.readline().split(',')
        hdr['VMISS'] = [float(x) for x in vmiss]
        hdr['VNAME'] = ['time']
        hdr['VUNIT'] = ['seconds since ' + date]
        for nvar in range(hdr['NV']):
            line_buffer = f.readline().rstrip('\n').split(',', 1)
            hdr['VNAME'].append(line_buffer[0])
            hdr['VUNIT'].append(line_buffer[1][1:])
        hdr['NSCOML'] = int(f.readline().rstrip('\n'))
        hdr['SCOM'] = []
        for nscom in range(hdr['NSCOML']):
            hdr['SCOM'].append(f.readline().rstrip('\n'))
        hdr['NNCOML'] = int(f.readline().rstrip('\n'))
        hdr['NCOM'] = []
        for nncom in range(hdr['NNCOML']):
            hdr['NCOM'].append(f.readline().rstrip('\n'))
        # Insert elements to account for time column
        hdr['VSCAL'].insert(0, 1)
        hdr['VMISS'].insert(0, np.nan)
        f.close()

        return hdr
    
    def readfile(self, filepath, date, start_time=None, end_time=None, tres='1s', fmt='ames'):
        """
        Reads the P-3 Met-Nav data file and unpacks the fields into an xarray.Dataset
        
        Parameters
        ----------
        filepath : str
            Path to the data file
        date: str
            Flight start date in YYYY-mm-dd format
        start_time : np.datetime64 or None
            The initial time of interest
        end_time : np.datetime64 or None
            The final time of interest
        tres: str
            The time interval to average over (e.g., '5S' for 5 seconds)
        fmt: str
            ames - NASA Ames format; iwg - IWG packet format (no headers)
        
        Returns
        -------
        data : xarray.Dataset
            The unpacked dataset
        """
        if fmt == 'ames':
            # get header info following the NASA AMES format
            header = self.parse_header(open(filepath, 'r'), date)

            # parse the data
            data_raw = np.genfromtxt(
                filepath, delimiter=',', skip_header=header['NLHEAD'],
                missing_values=header['VMISS'], usemask=True, filling_values=np.nan
            )

            # construct dictionary of variable data and metadata
            readfile = {}
            if len(header['VNAME']) != len(header['VSCAL']):
                print(
                    'ALL variables must be read in this type of file. '
                    'Please check name_map to make sure it is the correct length.'
                )
            for jj, unit in enumerate(header['VUNIT']):
                header['VUNIT'][jj] = unit.split(',')[0]

            for jj, name in enumerate(header['VNAME']): # fix scaling and missing data flags for some vars
                if (name=='True_Air_Speed' or name=='Indicated_Air_Speed'
                        or name=='Mach_Number'):
                    header['VMISS'][jj] = -8888.
                if name=='True_Air_Speed' and header['VUNIT'][jj]=='kts': # [m/s]
                    header['VMISS'][jj] = -8888. * 0.51
                    header['VSCAL'][jj] = 0.51
                    header['VUNIT'][jj] = 'm/s'
                readfile[name] = np.array(data_raw[:, jj] * header['VSCAL'][jj])
                # turn missing values to nan
                readfile[name][readfile[name]==header['VMISS'][jj]] = np.nan
            readfile['Wind_Speed'][readfile['Wind_Speed']==-8888.] = np.nan # wspd has two missing data flags
            
            # compute time
            time = np.array([
                np.datetime64(date) + np.timedelta64(int(readfile['time'][i]), 's')
                for i in range(len(readfile['time']))], dtype='datetime64[ns]'
            )

            # populate dataset attributes
            p3_attrs = {
                'Experiment': 'IMPACTS',
                'Platform': 'P-3',
                'Mission PI': 'Lynn McMurdie (lynnm@uw.edu)'
            }
            instrum_info_counter = 1
            for ii, comment in enumerate(header['NCOM'][:-1]): # add global attrs
                parsed_comment = comment.split(':')
                if len(parsed_comment) > 1:
                    p3_attrs[parsed_comment[0]] = parsed_comment[1][1:]
                else: # handles multiple instrument info lines in *_R0.ict files
                    instrum_info_counter += 1
                    p3_attrs[
                        'INSTRUMENT_INFO_'+str(instrum_info_counter)] = parsed_comment[0][1:]
        elif fmt == 'iwg':
            names = [
                'fmt', 'time', 'Latitude', 'Longitude', 'GPS_Altitude', 'WGS_84_Alt',
                'Pressure_Altitude', 'Radar_Altitude', 'Ground_Speed', 'True_Air_Speed',
                'Indicated_Air_Speed', 'Mach_Number', 'Vertical_Speed', 'True_Heading',
                'Track_Angle', 'Drift_Angle', 'Pitch_Angle', 'Roll_Angle', 'Side_slip',
                'Angle_of_Attack', 'Static_Air_Temp', 'Dew_Point', 'Total_Air_Temp',
                'Static_Pressure', 'Dynamic_Press', 'Cabin_Pressure', 'Wind_Speed',
                'Wind_Direction', 'Vert_Wind_Spd', 'Solar_Zenith_Angle',
                'Aircraft_Sun_Elevation', 'Sun_Azimuth', 'Aircraft_Sun_Azimuth'
            ]
            dtypes = [
                str, 'datetime64[ns]', float, float, float, float, float, float, float, float,
                float, float, float, float, float, float, float, float, float, float, float,
                float, float, float, float, float, float, float, float, float, float, float,
                float,
            ]
            readfile = np.genfromtxt(filepath, delimiter=',', names=names, dtype=dtypes)
            time = readfile['time']
            p3_attrs = {
                'Experiment': 'IMPACTS',
                'Platform': 'P-3',
                'Mission PI': 'Lynn McMurdie (lynnm@uw.edu)'
            }

        # populate data arrays
        lat = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Latitude']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft latitude',
                units='degrees_north')
        )
        lon = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Longitude']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft longitude',
                units='degrees_east')
        )
        alt_gps = xr.DataArray(
            data = np.ma.masked_invalid(readfile['GPS_Altitude']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft GPS altitude (mean sea level)',
                units='meters')
        )
        alt_pres = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Pressure_Altitude']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft pressure altitude',
                units='feet')
        )
        alt_radar = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Radar_Altitude']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft radar altitude',
                units='feet')
        )
        grnd_spd = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Ground_Speed']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft ground speed',
                units='m/s')
        )
        tas = xr.DataArray(
            data = np.ma.masked_invalid(readfile['True_Air_Speed']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft true air speed',
                units='m/s')
        )
        ias = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Indicated_Air_Speed']),
            dims = ['time'], coords = dict(time = time),
            attrs = dict(
                description='Aircraft indicated air speed',
                units='kts')
        )
        mach = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Mach_Number']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft mach number',
                units='mach')
        )
        vert_vel = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Vertical_Speed']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft vertical speed',
                units='m/s')
        )
        heading = xr.DataArray(
            data = np.ma.masked_invalid(readfile['True_Heading']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft true heading (clockwise from +y)',
                units='degrees')
        )
        track = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Track_Angle']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft track angle (clockwise from +y)',
                units='degrees')
        )
        drift = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Drift_Angle']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft drift angle (clockwise from +y)',
                units='degrees')
        )
        pitch = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Pitch_Angle']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft pitch angle (positive is up)',
                units='degrees')
        )
        roll = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Roll_Angle']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft roll angle (positive is right turn)',
                units='degrees')
        )
        t = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Static_Air_Temp']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Static (ambient) air temperature',
                units='degrees_Celsius')
        )
        t_tot = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Total_Air_Temp']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Total air temperature',
                units='degrees_Celsius')
        )
        if (fmt == 'iwg') or ('Dew_Point' in readfile.keys()):
            td_data = np.ma.masked_invalid(readfile['Dew_Point'])
        else:
            td_data = np.ma.masked_invalid(readfile['Dew/Frost_Point'])
        td = xr.DataArray(
            data = td_data, dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Dew point temperature',
                units='degrees_Celsius')
        )
        pstat = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Static_Pressure']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Static air pressure',
                units='hPa')
        )
        pcab = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Cabin_Pressure']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Cabin air pressure',
                units='hPa')
        )
        wspd = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Wind_Speed']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Horizontal wind speed (limited to where roll <= 5 degrees)',
                units='m/s')
        )
        wdir = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Wind_Direction']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Horizontal wind direction (clockwise from +y)',
                units='degrees')
        )
        zenith = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Solar_Zenith_Angle']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Solar zenith angle',
                units='degrees')
        )
        sun_elev_ac = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Aircraft_Sun_Elevation']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft sun elevation',
                units='degrees')
        )
        sun_az = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Sun_Azimuth']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Sun azimuth',
                units='degrees')
        )
        sun_az_ac = xr.DataArray(
            data = np.ma.masked_invalid(readfile['Aircraft_Sun_Azimuth']), dims = ['time'],
            coords = dict(time = time),
            attrs = dict(
                description='Aircraft sun azimuth',
                units='degrees')
        )
        if (fmt == 'ames') and ('U' in readfile) and ('V' in readfile): # for 2022 AMES data
            uwnd = xr.DataArray(
                data = np.ma.masked_invalid(readfile['U']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Horizontal U-component wind speed',
                    units='m/s')
            )
            vwnd = xr.DataArray(
                data = np.ma.masked_invalid(readfile['V']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Horizontal V-component wind speed',
                    units='m/s')
            )
        else: # if no u, v data
            uwnd = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True),
                dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Horizontal U-component wind speed',
                    units='m/s')
            )
            vwnd = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True),
                dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Horizontal V-component wind speed',
                    units='m/s')
            )
        if fmt == 'ames': # NASA AMES format
            r = xr.DataArray(
                data = np.ma.masked_invalid(readfile['Mixing_Ratio']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Mixing ratio',
                    units='g/kg')
            )
            pres_vapor = xr.DataArray(
                data = np.ma.masked_invalid(readfile['Part_Press_Water_Vapor']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Partial pressure with respect to water vapor',
                    units='hPa')
            )
            es_h2o = xr.DataArray(
                data = np.ma.masked_invalid(readfile['Sat_Vapor_Press_H2O']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Saturation vapor pressure with respect to water',
                    units='hPa')
            )
            es_ice = xr.DataArray(
                data = np.ma.masked_invalid(readfile['Sat_Vapor_Press_Ice']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Saturation vapor pressure with respect to ice',
                    units='hPa')
            )
            rh = xr.DataArray(
                data = np.ma.masked_invalid(readfile['Relative_Humidity']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Relative humidity with respect to water',
                    units='percent')
            )
            pt = xr.DataArray(
                data = np.ma.masked_invalid(readfile['Potential_Temp']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Potential temperature',
                    units='degrees_Kelvin')
            )
            t_ir = xr.DataArray(
                data = np.ma.masked_invalid(readfile['IR_Surf_Temp']), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Infrared surface temperature',
                    units='degrees_Celsius')
            )
        elif fmt == 'iwg': # IWG1 packets don't have these vars
            r = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Mixing ratio',
                    units='g/kg')
            )
            pres_vapor = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Partial pressure with respect to water vapor',
                    units='hPa')
            )
            es_h2o = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Saturation vapor pressure with respect to water',
                    units='hPa')
            )
            es_ice = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Saturation vapor pressure with respect to ice',
                    units='hPa')
            )
            rh = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Relative humidity with respect to water',
                    units='percent')
            )
            pt = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Potential temperature',
                    units='degrees_Kelvin')
            )
            t_ir = xr.DataArray(
                data = np.ma.array(np.zeros(len(time)), mask=True), dims = ['time'],
                coords = dict(time = time),
                attrs = dict(
                    description='Infrared surface temperature',
                    units='degrees_Celsius')
            )
        
        # put everything together into an XArray Dataset
        ds = xr.Dataset(
            data_vars={
                'lon': lon,
                'lat': lat,
                'alt_gps': alt_gps,
                'alt_pres': alt_pres,
                'alt_radar': alt_radar,
                'grnd_spd': grnd_spd,
                'tas': tas,
                'ias': ias,
                'mach': mach,
                'zvel_P3': vert_vel,
                'heading': heading,
                'track': track,
                'drift': drift,
                'pitch': pitch,
                'roll': roll,
                'temp': t,
                'temp_total': t_tot,
                'temp_ir': t_ir,
                'temp_pot': pt,
                'dwpt': td,
                'pres_static': pstat,
                'pres_cabin': pcab,
                'wspd': wspd,
                'wdir': wdir,
                'uwnd': uwnd,
                'vwnd': vwnd,
                'mixrat': r,
                'pres_vapor': pres_vapor,
                'svp_h2o': es_h2o,
                'svp_ice': es_ice,
                'rh': rh,
                'zenith': zenith,
                'sun_elev_P3': sun_elev_ac,
                'sun_az': sun_az,
                'sun_az_P3': sun_az_ac
            },
            coords={
                'time': time
            },
            attrs=p3_attrs
        )
        
        if fmt == 'iwg': # remove duplicate times (bad data)
            ds = ds.drop_duplicates('time')
        
        # trim the dataset if needed
        if (start_time is not None) or (end_time is not None):
            if start_time is None:
                start_time = str(np.datetime_as_string(ds['time'][0]))
            if end_time is None:
                end_time = str(np.datetime_as_string(ds['time'][-1]))
                
            # remove 1 s from end_time if tres > 1 (for slice() function)
            if pd.to_timedelta(tres) > pd.to_timedelta('1s'):
                end_time = str(
                    np.datetime_as_string(
                        np.datetime64(end_time) - np.timedelta64(1, 's')
                    )
                )
            ds = ds.sel(time=slice(start_time, end_time))
                
                
        # resample (average) the dataset if needed
        if pd.to_timedelta(tres) > pd.to_timedelta('1s'):
            ds = ds.resample(time=tres).mean(skipna=True, keep_attrs=True)
        elif pd.to_timedelta(tres) < pd.to_timedelta('1s'):
            print('Upsampling data is not supported at this time.')
            
        return ds

In [3]:
dates = ['2020-02-07', '2022-02-17', '2023-01-23', '2023-02-14'] # flight dates

for date in dates:
    # read in the P-3 data
    datestr = date.replace('-', '')

    fname_p3 = glob.glob(f'/home/disk/meso-home/vgarcia1/PHIPS_classification/MLGEO2024_Snowflake_Classification/data/raw/IMPACTS_MetNav_P3B_{datestr}_R0.ict')[0]

    p3_nav = P3(fname_p3, date=date, tres='1S', fmt='ames').data

    print(p3_nav)

    # save the data as a NetCDF file
    p3_nav.to_netcdf(f'/home/disk/meso-home/vgarcia1/PHIPS_classification/MLGEO2024_Snowflake_Classification/data/clean/{date}_P3_MetNav.nc')

  if pd.to_timedelta(tres) > pd.to_timedelta('1s'):
  elif pd.to_timedelta(tres) < pd.to_timedelta('1s'):


<xarray.Dataset>
Dimensions:      (time: 21359)
Coordinates:
  * time         (time) datetime64[ns] 2020-02-07T14:05:47 ... 2020-02-07T20:...
Data variables: (12/35)
    lon          (time) float64 -75.46 -75.46 -75.46 ... -75.48 -75.48 -75.48
    lat          (time) float64 37.95 37.95 37.95 37.95 ... 37.94 37.94 37.94
    alt_gps      (time) float64 10.8 10.8 10.8 10.8 10.8 ... 12.2 12.3 12.2 12.3
    alt_pres     (time) float64 814.0 812.0 811.0 810.0 ... 591.0 587.0 588.0
    alt_radar    (time) float64 1.4 nan 0.0 0.8 0.3 0.2 ... 2.0 1.2 2.7 1.7 1.4
    grnd_spd     (time) float64 2.45 2.55 2.54 2.51 2.54 ... 5.83 6.11 6.32 6.58
    ...           ...
    svp_ice      (time) float64 20.42 20.36 20.13 20.0 ... 16.47 16.45 16.46
    rh           (time) float64 83.64 83.72 84.43 84.78 ... 46.33 46.39 46.24
    zenith       (time) float64 69.6 69.6 69.6 69.6 69.6 ... 65.7 65.7 65.7 65.7
    sun_elev_P3  (time) float64 21.1 20.9 20.9 20.7 20.6 ... 25.6 25.6 25.4 25.0
    sun_az       (t

  if pd.to_timedelta(tres) > pd.to_timedelta('1s'):
  elif pd.to_timedelta(tres) < pd.to_timedelta('1s'):


<xarray.Dataset>
Dimensions:      (time: 29478)
Coordinates:
  * time         (time) datetime64[ns] 2022-02-17T16:33:20 ... 2022-02-18T00:...
Data variables: (12/35)
    lon          (time) float64 -75.45 -75.45 -75.45 ... -75.47 -75.47 -75.47
    lat          (time) float64 37.95 37.95 37.95 37.95 ... 37.93 37.93 37.93
    alt_gps      (time) float64 -26.9 -26.9 -27.0 -27.0 ... -23.3 -23.3 -23.3
    alt_pres     (time) float64 -253.0 -240.0 -245.0 -247.0 ... 71.0 70.0 70.0
    alt_radar    (time) float64 1.7 1.7 1.7 0.8 2.6 1.2 ... 2.1 2.7 3.7 3.1 2.6
    grnd_spd     (time) float64 0.02 0.03 0.53 1.6 3.56 ... 4.96 4.94 4.94 4.92
    ...           ...
    svp_ice      (time) float64 22.78 22.65 22.36 22.15 ... 13.53 13.5 13.51
    rh           (time) float64 61.15 63.54 66.45 63.94 ... 96.97 96.47 95.15
    zenith       (time) float64 50.8 50.8 50.8 50.8 ... 114.4 114.4 114.4 114.4
    sun_elev_P3  (time) float64 39.4 39.4 39.4 39.4 ... -23.3 -23.2 -22.9 -22.9
    sun_az       (time) 

  if pd.to_timedelta(tres) > pd.to_timedelta('1s'):
  elif pd.to_timedelta(tres) < pd.to_timedelta('1s'):


<xarray.Dataset>
Dimensions:      (time: 29095)
Coordinates:
  * time         (time) datetime64[ns] 2023-01-23T11:43:36 ... 2023-01-23T19:...
Data variables: (12/35)
    lon          (time) float64 -75.46 -75.46 -75.46 ... -75.48 -75.48 -75.48
    lat          (time) float64 37.94 37.94 37.94 37.94 ... 37.94 37.94 37.94
    alt_gps      (time) float64 14.0 14.0 14.0 14.0 14.0 ... 14.0 14.0 14.0 13.0
    alt_pres     (time) float64 296.0 285.0 302.0 303.0 ... 194.0 198.0 192.0
    alt_radar    (time) float64 nan nan nan nan nan nan ... nan nan nan nan nan
    grnd_spd     (time) float64 0.03 0.04 0.04 1.2 3.88 ... 7.0 6.95 6.8 6.79
    ...           ...
    svp_ice      (time) float64 9.13 9.19 9.19 9.15 ... 12.55 12.56 12.51 12.57
    rh           (time) float64 96.69 96.08 96.02 96.35 ... 59.27 59.34 58.97
    zenith       (time) float64 96.4 96.4 96.4 96.4 96.4 ... 67.7 67.7 67.7 67.7
    sun_elev_P3  (time) float64 -7.8 -7.8 -7.7 -7.7 -7.5 ... 24.6 24.2 24.1 23.9
    sun_az       (t

  if pd.to_timedelta(tres) > pd.to_timedelta('1s'):
  elif pd.to_timedelta(tres) < pd.to_timedelta('1s'):


<xarray.Dataset>
Dimensions:      (time: 32007)
Coordinates:
  * time         (time) datetime64[ns] 2023-02-14T21:00:11 ... 2023-02-15T05:...
Data variables: (12/35)
    lon          (time) float64 -75.46 -75.46 -75.46 ... -75.47 -75.47 -75.47
    lat          (time) float64 37.95 37.95 37.95 37.95 ... 37.93 37.93 37.93
    alt_gps      (time) float64 12.0 12.0 12.0 12.0 12.0 ... 14.0 14.0 14.0 14.0
    alt_pres     (time) float64 -155.0 -156.0 -156.0 ... -195.0 -195.0 -195.0
    alt_radar    (time) float64 nan nan nan nan nan nan ... nan nan nan nan nan
    grnd_spd     (time) float64 4.37 4.41 4.34 4.35 4.83 ... 4.73 4.44 4.06 3.57
    ...           ...
    svp_ice      (time) float64 16.2 16.07 15.89 15.74 ... 12.35 12.25 12.25
    rh           (time) float64 39.03 39.01 39.4 39.08 ... 67.52 68.07 67.97
    zenith       (time) float64 72.8 72.8 72.8 72.8 ... 153.6 153.6 153.6 153.6
    sun_elev_P3  (time) float64 17.8 17.8 17.8 17.8 ... -64.8 -64.8 -64.8 -64.8
    sun_az       (time