### Load Packages

In [95]:
import os
os.makedirs("icartt", exist_ok=True)
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union
from pathlib import Path
import getpass
import requests
import pandas as pd
from zipfile import ZipFile
from datetime import datetime, timedelta

### Load ICT Converter

In [54]:
PathLike = Union[str, Path]


@dataclass(frozen=True)
class ICARTTInfo:
    path: Path
    header_length: int
    ffi: str


@dataclass(frozen=True)
class VariableDef:
    name: str
    unit: Optional[str] = None
    description: Optional[str] = None
    missing: Optional[float] = None  # per-variable missing, if known


class ICARTTReader:
    """
    General ICARTT/ICT reader.

    Goals:
      - Robustly read the data table (CSV-like) for typical ICT files.
      - Avoid file-specific assumptions (campaign/platform/column names).
      - Provide best-effort metadata parsing (especially for FFI=1001) but
        never let metadata parsing prevent data extraction.

    Notes:
      - Many airborne ICT files are FFI=1001 (1D time series), but other FFIs exist.
      - Header length is always the first token on the first line in the files you've shown.
    """

    def __init__(self, path: PathLike):
        self.path = Path(path)
        self.info = self._read_info()

    # ----------------------------
    # Core: file format info
    # ----------------------------
    def _read_info(self) -> ICARTTInfo:
        with open(self.path, "r", encoding="utf-8", errors="ignore") as f:
            line1 = f.readline().strip()

        parts = [p.strip() for p in line1.split(",")]
        if len(parts) < 2:
            raise ValueError(f"Unexpected ICARTT first line format: {line1!r}")

        header_length = int(parts[0])
        ffi = parts[1]
        return ICARTTInfo(path=self.path, header_length=header_length, ffi=ffi)

    def read_header_lines(self) -> List[str]:
        """Return the raw header lines (including line 1)."""
        n = self.info.header_length
        lines: List[str] = []
        with open(self.path, "r", encoding="utf-8", errors="ignore") as f:
            for _ in range(n):
                line = f.readline()
                if not line:
                    break
                lines.append(line.rstrip("\n"))
        return lines

    # ----------------------------
    # Minimal assumptions: table extraction
    # ----------------------------
    def read_table(
        self,
        *,
        na_values: Optional[List[Union[str, float, int]]] = None,
        strip_colnames: bool = True,
    ) -> pd.DataFrame:
        """
        Extract the data table.

        Strategy:
          - Most ICARTT files place the column header row at line `header_length`.
          - So we skip `header_length - 1` lines and let pandas treat the next line as header.

        This is general and does not depend on campaign/platform.
        """
        skiprows = max(self.info.header_length - 1, 0)

        # Many ICT files use -9999, -99999, etc., but we won't assume; allow caller to pass.
        # We'll also attempt to auto-detect common missing indicators from header if possible.
        if na_values is None:
            na_values = self._guess_missing_values()

        df = pd.read_csv(
            self.path,
            skiprows=skiprows,
            sep=",",
            encoding = "latin-1",
            encoding_errors = "ignore",
            engine="python",
            na_values=na_values,
        )

        if strip_colnames:
            df.columns = [str(c).strip() for c in df.columns]

        return df

    # ----------------------------
    # Best-effort metadata parsing
    # ----------------------------
    def read_metadata(self) -> Dict[str, str]:
        """
        Best-effort metadata extraction.

        Returns a dict of key metadata fields when the header matches common ICARTT layouts.
        If parsing fails, returns what it can without throwing.
        """
        lines = self.read_header_lines()
        meta: Dict[str, str] = {}

        # Common ICARTT: line indices below assume a conventional layout often used with FFI=1001.
        # We'll guard everything with length checks.
        def safe(i: int) -> str:
            return lines[i].strip() if 0 <= i < len(lines) else ""

        meta["path"] = str(self.path)
        meta["header_length"] = str(self.info.header_length)
        meta["ffi"] = self.info.ffi

        # These are common but not guaranteed. Keep them best-effort.
        meta["pi"] = safe(1)
        meta["organization"] = safe(2)
        meta["data_description"] = safe(3)
        meta["mission"] = safe(4)
        meta["volume_info"] = safe(5)
        meta["date_info"] = safe(6)
        meta["data_interval"] = safe(7)
        meta["independent_variable"] = safe(8)
        meta["seconds"] = safe(9)

        return {k: v for k, v in meta.items() if v}

    def read_variable_defs(self) -> List[VariableDef]:
        """
        Best-effort variable definitions, primarily for common FFI=1001 layout:
          - line 10: number of dependent variables
          - line 12+: variable definition lines (often "NAME, UNIT, DESCRIPTION...")

        If layout doesn't match, returns empty list.
        """
        lines = self.read_header_lines()

        # Attempt the common ICARTT/FFI=1001 positions
        # Line 10 (0-index 9) is often number of dependent variables.
        if len(lines) < 11:
            return []

        try:
            n_dep = int(lines[9].strip())
        except Exception:
            return []

        start = 12  # 0-index start of var definition block in common layout
        block = lines[start : start + n_dep]
        out: List[VariableDef] = []

        for ln in block:
            parts = [p.strip() for p in ln.split(",")]
            if not parts:
                continue
            name = parts[0]
            unit = parts[1] if len(parts) > 1 else None
            desc = ",".join(parts[2:]).strip() if len(parts) > 2 else None
            out.append(VariableDef(name=name, unit=unit or None, description=desc or None))

        # Attach per-variable missing if we can infer it (optional)
        miss_map = self._guess_per_variable_missing()
        if miss_map:
            out = [
                VariableDef(v.name, v.unit, v.description, miss_map.get(v.name))
                for v in out
            ]

        return out

    # ----------------------------
    # Missing-value handling
    # ----------------------------
    def _guess_missing_values(self) -> List[Union[str, float, int]]:
        """
        Heuristic: try to extract missing indicators from the header.
        Falls back to common sentinel values.

        Many ICT files have a line describing missing indicators (often around line 12),
        but formats vary. We keep this conservative.
        """
        lines = self.read_header_lines()
        candidates: List[Union[str, float, int]] = []

        # Scan header for something that looks like a missing indicator list: "-9999" etc.
        for ln in lines[: min(len(lines), 200)]:
            # find numeric sentinels that look like -9999, -99999, 9999 etc.
            for tok in ln.replace(",", " ").split():
                if tok.startswith(("-", "+")) and tok[1:].isdigit():
                    val = int(tok)
                    # common missing sentinels are large magnitude
                    if abs(val) >= 999:
                        candidates.append(val)

        # De-duplicate while preserving order
        seen = set()
        ordered = []
        for v in candidates:
            if v not in seen:
                seen.add(v)
                ordered.append(v)

        # Add very common defaults if we found nothing
        if not ordered:
            ordered = [-9999, -99999, -8888, 9999, 99999]

        return ordered

    def _guess_per_variable_missing(self) -> Dict[str, float]:
        """
        Some ICARTT headers specify per-variable missing indicators.
        This is not standardized across all producers; implement only as a best-effort hook.

        Returns {} if nothing reliable is found.
        """
        # For now, keep minimal: many files effectively use a single sentinel across columns.
        # You can extend this if you encounter a known pattern you want to support.
        return {}

    # ----------------------------
    # Exports
    # ----------------------------
    def to_csv(
        self,
        out: Optional[PathLike] = None,
        *,
        na_values: Optional[List[Union[str, float, int]]] = None,
        strip_colnames: bool = True,
    ) -> Path:
        df = self.read_table(na_values=na_values, strip_colnames=strip_colnames)
        out_path = Path(out) if out else self.path.with_suffix(".csv")
        df.to_csv(out_path, index=False)
        return out_path

    def to_parquet(
        self,
        out: Optional[PathLike] = None,
        *,
        na_values: Optional[List[Union[str, float, int]]] = None,
        strip_colnames: bool = True,
    ) -> Path:
        df = self.read_table(na_values=na_values, strip_colnames=strip_colnames)
        out_path = Path(out) if out else self.path.with_suffix(".parquet")
        df.to_parquet(out_path, index=False)
        return out_path

open("icartt/_init_.py", "w").close()

### Log Into NASA Session

In [15]:
home = Path.home()
netrc_path = home / "_netrc"

username = input("Enter username: ")
password = getpass.getpass("Enter password: ")

content = f"machine urs.earthdata.nasa.gov login {username} password {password}"

with open(netrc_path, "w") as f:
    f.write(content)

os.chmod(netrc_path, 0o600)

print(f".netrc file created at {netrc_path}")

file_path = home / ".urs_cookies"
file_path.touch(exist_ok=True)
print(f"Created: {file_path}")

session = requests.Session()
session.auth = None  # requests will use your .netrc automatically

auth_url = "http://asdc.larc.nasa.gov/soot-api/Authenticate/user"

Enter username: me.gonzalez674
Enter password: ········
.netrc file created at C:\Users\megon\_netrc
Created: C:\Users\megon\.urs_cookies


### Get List of Campaigns/File Names

In [90]:
#campaigns
base_url = "https://asdc.larc.nasa.gov/soot-api/campaigns"
response = requests.get(base_url)
response.raise_for_status()
campaign_json = response.json()
campaign_table = pd.DataFrame(campaign_json)

#list of years for given campaign
with pd.option_context('display.max_rows', None, 'display.max_columns', None,'display.max_colwidth', -1):
    display(campaign_table[["projectacronym", "description", "projecttitles"]])
campaign = input("Choose Campaign: ")
index = int(campaign_table[campaign_table["projectacronym"] == campaign].index.values)

campaign_specification = campaign_table.iloc[index]["projectacronym"] #choose campaign name
url = f'{base_url}/years/{campaign_specification}'
response = requests.get(url)
response.raise_for_status()
years_for_campaign_json = response.json()
years_for_campaign_table = pd.DataFrame(years_for_campaign_json)

#platforms for given year for given campaign
with pd.option_context('display.max_rows', None, 'display.max_columns', None,'display.max_colwidth', -1):
    display(years_for_campaign_table)
year = str(input("Choose Year: "))
index = int(years_for_campaign_table[years_for_campaign_table["year"] == year].index.values)

year_specification = years_for_campaign_table.iloc[index]["year"] #choose year
url = f'{base_url}/years/{campaign_specification}/{year_specification}'
response = requests.get(url)
response.raise_for_status()
platforms_for_year_json = response.json()
platforms_for_year_table = pd.DataFrame(platforms_for_year_json)

#PI for given platform for given year for given campaign
with pd.option_context('display.max_rows', None, 'display.max_columns', None,'display.max_colwidth', -1):
    display(platforms_for_year_table)
platform = str(input("Choose Platform (name NOT platformtype - DO NOT CHOOSE SATELLITE): "))
index = int(platforms_for_year_table[platforms_for_year_table["name"] == platform].index.values)

platform_specification = platforms_for_year_table.iloc[index]["name"] #choose platform name
url = f'{base_url}/years/{campaign_specification}/{year_specification}/{platform_specification}'
response = requests.get(url)
response.raise_for_status()
pi_for_platform_json = response.json()
pi_for_platform_table = pd.DataFrame(pi_for_platform_json)

#file names for given PI for given platform for given year for given campaign
with pd.option_context('display.max_rows', None, 'display.max_columns', None,'display.max_colwidth', -1):
    display(pi_for_platform_table[["investigatorid", "firstname", "lastname"]])
pi = str(input("Choose PI Last Name (Copy Exactly): "))
index = int(pi_for_platform_table[pi_for_platform_table["lastname"] == pi].index.values)

pi_specification = pi_for_platform_table.iloc[index]["lastname"]
url = f'{base_url}/years/{campaign_specification}/{year_specification}/{platform_specification}/{pi_specification}'
response = requests.get(url)
response.raise_for_status()
file_names_for_pi_json = response.json()
file_names_for_pi_table = pd.DataFrame(file_names_for_pi_json)

Unnamed: 0,projectacronym,description,projecttitles
0,ACCLIP,"The Asian Summer Monsoon Chemical & Climate Impact Project (ACCLIP) was an international, multi-organizational suborbital campaign that aimed to study aerosols and chemical transport that is associated with the Asian Summer Monsoon (ASM) in the Western Pacific region from 15 July 2022 to 31 August 2022. The ASM is the largest meteorological pattern in the Northern Hemisphere (NH) during the summer and is associated with persistent convection and large anticyclonic flow patterns in the upper troposphere and lower stratosphere (UTLS). This leads to significant enhancements in the UTLS of trace species that originate from pollution or biomass burning. Convection connected to the ASM occurs over South, Southeast, and East Asia, a region with complex and rapidly changing emissions due to its high population density and economic growth. Pollution that reaches the UTLS from this region can have significant effects on the climate and chemistry of the atmosphere, making it important to have an accurate representation and understanding of ASM transport, chemical, and microphysical processes for chemistry-climate models to characterize these interactions and for predicting future impacts on climate.\r\r The ACCLIP campaign was conducted by the National Aeronautics and Space Administration (NASA) and the National Center for Atmospheric Research (NCAR) with the primary goal of investigating the impacts of Asian gas and aerosol emissions on global chemistry and climate. The NASA WB-57 and NCAR G-V aircraft were outfitted with state-of-the-art sensors to accomplish this. ACCLIP addressed four scientific objectives related to its main goal. The first was to investigate the transport pathways of ASM uplifted air from inside of the anticyclone to the global UTLS. Another objective was sampling the chemical content of air processed in the ASM in order to quantify the role of the ASM in transporting chemically active species and short-lived climate forcing agents to the UTLS to determine their impact on stratospheric ozone chemistry and global climate. Third, information was obtained on aerosol size, mass, and chemical composition that is necessary for determining the radiative effects of the ASM to constrain models of aerosol formation and for contrasting the organic-rich ASM UTLS aerosol population with that of the background aerosols. Last, ACCLIP measured the water vapor distribution associated with the monsoon dynamical structure to evaluate transport across the tropopause and determine the role of the ASM in water vapor transport in the stratosphere.",[2022]
1,ACEPOL,"In order to improve our understanding of the effect of aerosols on climate and air quality, measurements of aerosol chemical composition, size distribution, height profile, and optical properties are of crucial importance. In terms of remotely sensed instrumentation, the most extensive set of aerosol properties can be obtained by combining passive multi-angle, multi-spectral measurements of intensity and polarization with active measurements performed by a High Spectral Resolution Lidar. During Fall 2017, the Aerosol Characterization from Polarimeter and Lidar (ACEPOL) campaign, jointly sponsored by NASA and the Netherlands Institute for Space Research (SRON), performed aerosol and cloud measurements over the United States from the NASA high altitude ER-2 aircraft. Six instruments were deployed on the aircraft. Four of these instruments were multi-angle polarimeters: the Airborne Hyper Angular Rainbow Polarimeter (AirHARP), the Airborne Multiangle SpectroPolarimetric Imager (AirMSPI), the Airborne Spectrometer for Planetary Exploration (SPEX Airborne) and the Research Scanning Polarimeter (RSP). The other two instruments were lidars: the High Spectral Resolution Lidar 2 (HSRL-2) and the Cloud Physics Lidar (CPL). The ACEPOL operation was based at NASA’s Armstrong Flight Research Center in Palmdale California, which enabled observations of a wide variety of scene types, including urban, desert, forest, coastal ocean and agricultural areas, with clear, cloudy, polluted and pristine atmospheric conditions. The primary goal of ACEPOL was to assess the capabilities of the different polarimeters for retrieval of aerosol and cloud microphysical and optical parameters, as well as their capabilities to derive aerosol layer height (near-UV polarimetry, O2 A-band). ACEPOL also focused on the development and evaluation of aerosol retrieval algorithms that combine data from both active (lidar) and passive (polarimeter) instruments. ACEPOL data are appropriate for algorithm development and testing, instrument intercomparison, and investigations of active and passive instrument data fusion, which make them valuable resources for remote sensing communities as they prepare for the next generation of spaceborne MAP and lidar missions.",[2017]
2,ACTIVATE,"Marine boundary layer clouds play a critical role in Earth’s energy balance and water cycle. These clouds cover more than 45% of the ocean surface and exert a net cooling effect. The Aerosol Cloud meTeorology Interactions oVer the western Atlantic Experiment (ACTIVATE) project is a five-year project that provides important globally-relevant data about changes in marine boundary layer cloud systems, atmospheric aerosols and multiple feedbacks that warm or cool the climate. ACTIVATE studies the atmosphere over the western North Atlantic and samples its broad range of aerosol, cloud and meteorological conditions using two aircraft, the UC-12 King Air and HU-25 Falcon. The UC-12 King Air will primarily be used for remote sensing measurements while the HU-25 Falcon will contain a comprehensive instrument payload for detailed in-situ measurements of aerosol, cloud properties, and atmospheric state. A few trace gas measurements will also be onboard the HU-25 Falcon for the measurements of pollution traces, which will contribute to airmass classification analysis. A total of 150 coordinated flights over the western North Atlantic are planned through 6 deployments from 2020-2022. The ACTIVATE science observing strategy intensively targets the shallow cumulus cloud regime and aims to collect sufficient statistics over a broad range of aerosol and weather conditions which enables robust characterization of aerosol-cloud-meteorology interactions. This strategy is implemented by two nominal flight patterns: Statistical Survey and Process Study. The statistical survey pattern involves close coordination between the remote sensing and in-situ aircraft to conduct near coincident sampling at and below cloud base as well as above and within cloud top. The process study pattern involves extensive vertical profiling to characterize the target cloud and surrounding aerosol and meteorological conditions.","[2020, 2021, 2022]"
3,AEOLUS,"NASA’s Aeolus Calibration/Validation (Cal/Val) field campaign was conducted to validate measurements from the European Space Agency’s (ESA’s) Atmospheric Dynamics Mission-Aeolus (ADM-Aeolus) satellite that uses Doppler lidar to measure wind profiles. In addition, Aeolus provides valuable information on aerosols and cloud layer vertical distribution and their optical properties. Based out of Palmdale, CA, the Aeolus Cal/Val campaign conducted five research flights from April 17-30, 2019 over the eastern Pacific Ocean. The goal of Aeolus was to demonstrate the performance of NASA’s Doppler Aerosol WiNd (DAWN) lidar and High Altitude Lidar Observatory (HALO), flown onboard NASA’s DC-8 aircraft, under a wide variety of weather and aerosol conditions and to perform initial comparisons with ADM-Aeolus level 2 measurements. Dropsondes were also deployed during these flights. DAWN is an airborne instrument that uses pulsed lasers at varying scan angles to detect the movement of atmospheric aerosols, such as dust or sea salt, and can profile wind vectors (both speed and direction). HALO uses a combined Differential Absorption Lidar (DIAL) and High Spectral Resolution Lidar (HSRL) to profile atmospheric aerosols and water vapor. Aerosol and wind measurements can be paired together to understand the capabilities of DAWN and what its strengths and weaknesses are, and to evaluate ADM-Aeolus’s aerosol and wind measurements.",[2019]
4,AJAX,"The Alpha Jet Atmospheric eXperiment (AJAX) is a partnership between NASA's Ames Research Center and H211, L.L.C., facilitating routine in-situ measurements over California, Nevada, and the coastal Pacific in support of satellite validation. The standard payload complement includes rigorously-calibrated ozone (O3), formaldehyde (HCHO), carbon dioxide (CO2), and methane (CH4) mixing ratios, as well as meteorological data including 3-D winds. Multiple vertical profiles (to ~8.5 km) can be accomplished in each 2-hr flight. The AJAX project has been collecting trace gas data on a regular basis in all seasons for over a decade, helping to assess satellite sensors' health and calibration over significant portions of their lifetimes, and complementing surface and tower-based observations collected elsewhere in the region.\r\rAJAX supports NASA's Orbiting Carbon Observatory (OCO-2/3) and Japan's GOSAT and GOSAT-2, and collaborates with many other research organizations (e.g. CARB, NOAA, USFS, EPA). AJAX celebrated its 200th science flight in 2016, and previous studies have investigated topics as varied as stratospheric-to-tropospheric transport, forest fire plumes, atmospheric river events, long-range transport of pollution from Asia to the western US, urban outflow, and emissions from gas leaks, oil fields, and dairies.","[2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]"
5,ARISE,"ARISE was NASA's first Arctic airborne campaign designed to take simultaneous measurements of ice, clouds and the levels of incoming and outgoing radiation, the balance of which determined the degree of climate warming. Over the past few decades, an increase in global temperatures led to decreased Arctic summer sea ice. Typically, Arctic sea ice reflects sunlight from the Earth. However, a loss of sea ice means there is more open water to absorb heat from the sun, enhancing warming in the region. More open water can also cause the release of more moisture into the atmosphere. This additional moisture could affect cloud formation and the exchange of heat from Earth’s surface to space. Conducted during the peak of summer ice melt (August 28, 2014-October 1, 2014), ARISE was designed to study and collect data on thinning sea ice, measure cloud and atmospheric properties in the Arctic, and to address questions about the relationship between retreating sea ice and the Arctic climate. During the campaign, instruments on NASA’s C-130 aircraft conducted measurements of spectral and broadband radiative flux profiles, quantified surface characteristics, cloud properties, and atmospheric state parameters under a variety of Arctic atmospheric and surface conditions (e.g. open water, sea ice, and land ice). When possible, C-130 flights were coordinated to fly under satellite overpasses. The primary aerial focus of ARISE was over Arctic sea ice and open water, with minor coverage over Greenland land ice. Through these efforts, the ARISE field campaign helped improve cloud and sea ice computer modeling in the Arctic.",[2014]
6,ASIAAQ,"The Airborne and Satellite Investigation of Asian Air Quality (ASIA-AQ) was an international cooperative field study designed to address local air quality challenges. Conducted from January-March 2024, ASIA-AQ deployed multiple aircraft to collect in situ and remote sensing measurements, along with numerous ground-based observations and modeling assessments. Data was collected over four countries including, the Philippines, Taiwan, South Korea and Thailand and flights were conducted in full partnership with local scientists and environmental agencies responsible for air quality monitoring and assessment. One of the primary goals of ASIA-AQ was to contribute improving integration of satellite observations with existing air quality ground monitoring and modeling efforts across Asia. Air quality observations from satellites are evolving with new capabilities from South Korea’s Geostationary Environment Monitoring Spectrometer (GEMS), which conducts hourly measurements to provide a new view of air quality conditions from space that complements and depends upon ground-based monitoring efforts of countries in its field of view. ASIA-AQ science goals focused on satellite validation and interpretation, emissions quantification and verification, model evaluation, aerosol chemistry, and ozone chemistry.",[2024]
7,CALIPSO-NVF,"The CALIPSO Night Validation Flights (CALIPSO-NVF) airborne deployment was conducted in August 2022 out of Bermuda. The goal was to conduct a series of nighttime underflights of the CALIPSO satellite with the NASA Langley High Spectral Resolution Lidar (HSRL-2). Airborne measurements from the NASA Langley HSRL-2 instrument are essential for verifying the calibration accuracy of the CALIPSO lidar and for acquiring information on aerosol optical properties used for its aerosol profile retrievals. By flying under the CALIPSO ground track, HSRL-2 provides an independent measurement of lidar attenuated backscatter with a higher signal-to-noise ratio. To obtain this important validation dataset, the HSRL-2 was flown on board the LaRC B-200 King Air as CALIPSO passed within range of the aircraft. The western Atlantic Ocean was selected for CALIPSO-NVF to allow unobstructed, 45-minute flights along the satellite ground track. Five nighttime underflights were executed in total – four in cloud-free skies on August 7, 10, 12, and 17th, yielding ideal data from both instruments for calibration validation. The fifth flight on August 18th targeted measurements beneath cirrus to assess the accuracy of CALIPSO aerosol retrievals through high clouds at night, an important but previously unexplored validation target. Total research flight time was 17.7 hours, sampling 2,200 km along the CALIPSO ground track.",[2022]
8,CAMP2EX,"The CAMP2Ex (Clouds, Aerosol and Monsoon Processes-Philippines Experiment) and PISTON (Propagation of Intra-Seasonal Tropical Oscillations) are two field studies conducted in Southeast Asia. While each study has its own set of science objectives, there are common and complementary instrument payloads between these two projects. Consequently, a synergistic partnership was established at the very beginning of the projects and a coordinated sampling strategy was developed to extend spatial coverage and obtain temporal context information, which benefits the analysis of both data sets for achieving the science objectives.\r\rThe CAMP2Ex was a NASA-funded field study with three main science objectives: aerosol effect on cloud microphysical and optical properties, aerosol and cloud influence on radiation as well as radiative feedback, and meteorology related effects on aerosol distribution and aerosol-cloud interactions. Research on these topics requires a comprehensive characterization of aerosol, cloud, and precipitation properties, as well as the associated meteorological and radiative parameters. Trace gas measurements are also needed for airmass type analysis to characterize the role of anthropogenic and natural aerosols. These CAMP2Ex observations were delivered by NASA P-3B aircraft and SPEC Learjet 35A. The sampling strategy designed for CAMP2Ex allowed for coordinated flights for both aircraft to maximize the science return. The P-3B was used primarily to conduct remote sensing measurements of cloud and precipitation structure and aerosol layers and vertical profiles of aerosol and atmospheric state variable, while the Learjet flew below the P-3B to obtain the detailed cloud microphysical properties. During the 2019 field deployment (August 20-October 10), the P-3B conducted 19 science flights and the SPEC Learjet conducted 11 flights in the vicinity of the Philippines. Three NASA P3B and 7 SPEC Lear 35A flights were also flown in coordination with PISTON shipboard measurements. Ground-based aerosol observations were also recorded in 2018 and 2019. CAMP2Ex was completed in partnership with Philippine research and operational weather communities. Measurements completed during CAMP2EX provide a 4-D observational view of the environment of the Philippines and its neighboring waters in terms of microphysical, hydrological, dynamical, thermodynamical, and radiative properties of the environment. The data target the environment of shallow cumulus and cumulus congestus clouds.","[2018, 2019]"
9,CPEXAW,"The Convective Processes Experiment – Aerosols & Winds (CPEX-AW) campaign was a joint effort between the US National Aeronautics and Space Administration (NASA) and the European Space Agency (ESA) with the primary goal of conducting a post-launch calibration and validation activities of the Atmospheric Dynamics Mission-Aeolus (ADM-AEOLUS) Earth observation wind Lidar satellite in St. Croix. CPEX-AW is a follow-on to the Convective Processes Experiment (CPEX) field campaign which took place in the summer of 2017 (https://cpex.jpl.nasa.gov/). In addition to joint calibration/validation of ADM-AEOLUS, CPEX-AW studied the dynamics related to the Saharan Air Layer, African Easterly Waves and Jets, Tropical Easterly Jet, and deep convection in the InterTropical Convergence Zone (ITCZ). CPEX-AW science goals include:\r • Better understanding interactions of convective cloud systems and tropospheric winds as part of the joint NASA-ESA Aeolus Cal/Val effort over the tropical Atlantic;\r • Observing the vertical structure and variability of the marine boundary layer in relation to initiation and lifecycle of the convective cloud systems, convective processes (e.g., cold pools), and environmental conditions within and across the ITCZ;\r • Investigating how the African easterly waves and dry air and dust associated with Sahara Air Layer control the convectively suppressed and active periods of the ITCZ; \r • Investigating interactions of wind, aerosol, clouds, and precipitation and effects on long range dust transport and air quality over the western Atlantic.\r\rIn order to successfully achieve the objectives of the campaign, NASA deployed its DC-8 aircraft equipped with an Airborne Third Generation Precipitation Radar (APR-3), Doppler Aerosol WiNd Lidar (DAWN), High Altitude Lidar Observatory (HALO), High Altitude Monolithic Microwave Integrated Circuit (MMIC) Sounding Radiometer (HAMSR), and dropsondes. This campaign aims to provide useful material to atmospheric scientists, meteorologists, lidar experts, air quality experts, professors, and students.\r\rThe Atmospheric Science Data Center (ASDC) archives the Dropsonde, HALO, ADM-Aeolus, and DAWN data products for CPEX-AW. For additional archived data products from CPEX-AW, please visit the project page on<a href='https://www.earthdata.nasa.gov/data/catalog?data_center_h[]=Global%20Hydrology%20Resource%20Center%20%28GHRC%29&keyword=CPEX-AW' target='_blank'> EarthData.</a>",[2021]


Choose Campaign: DISCOVERAQ


Unnamed: 0,year,projectacronym
0,2011-MD,DISCOVERAQ
1,2013-CA,DISCOVERAQ
2,2013-TX,DISCOVERAQ
3,2014-CO,DISCOVERAQ


Choose Year: 2014-CO


Unnamed: 0,name,platformtype
0,ANALYSIS,Other
1,B200,Aircraft
2,FALCON,Aircraft
3,GROUND-AURORA-EAST,Ground Station
4,GROUND-BAO-TOWER,Ground Station
5,GROUND-BOULDER,Ground Station
6,GROUND-CAMP,Ground Station
7,GROUND-CHATFIELD-PARK,Ground Station
8,GROUND-DENVER-LACASA,Ground Station
9,GROUND-FORT-COLLINS,Ground Station


Choose Platform (name NOT platformtype - DO NOT CHOOSE SATELLITE): SONDES


Unnamed: 0,investigatorid,firstname,lastname
0,41,ANNE,THOMPSON


Choose PI Last Name (Copy Exactly): THOMPSON


### Download Files for Selected PI/Platform/Year/Campaign

In [96]:
session = requests.Session()
session.auth = None  # requests will use your .netrc automatically

auth_url = "http://asdc.larc.nasa.gov/soot-api/Authenticate/user"

for file in file_names_for_pi_table['filename']:
    get_file_url = f"http://asdc.larc.nasa.gov/soot-api/data_files/downloadFiles?filenames={file}"
    zip_file_name = get_file_url.split('=')[-1].split('.ict')[0] #get last part of filename
    
    response = session.get(auth_url)
    if response.status_code == 200:
        response = session.get(get_file_url)
        if response.status_code == 200:
            with open(f'{zip_file_name}.zip', 'wb') as f:
                f.write(response.content)
        else:
            print('ERROR: Unable to download files. Response code {response.status_code}')
    else:
        print('ERROR: User not authorized')
    
    #unzip the file you downloaded
    with ZipFile(f"{os.getcwd()}" + "\\" + f"{zip_file_name}.zip", 'r') as zObject:
        zObject.extractall()
    
    #delete the zip file, only keep the unzipped files
    os.remove(f"{zip_file_name}.zip")

### Convert Downloaded Files to CSVs and Delete ICT Files

In [112]:
folder = os.getcwd()

file_paths = []
for entry in os.scandir(folder):
    if entry.is_file():
        if entry.path[-3:] == "ict":
            file_paths += [entry.path]

format = '%Y,%m,%d'
        
combined_df = pd.DataFrame()
for file in file_paths:
    r = ICARTTReader(file)
    df = r.read_table()
    meta = r.read_metadata()
    vars_ = r.read_variable_defs()
    
    #find start date/time (only first 3 values)
    s = meta.get("date_info").split(',')
    s = ','.join(s[0:3])
    s = s.replace(" ", "")
    
    start_date = datetime.strptime(s, format)
    start_time = timedelta(seconds = int(meta.get("seconds")))
    start_datetime = start_date + start_time
    
    #find columns that have UTC seconds (Start_UTC, Seconds_UTC, etc.)
    time_columns = [col for col in df.columns if "UTC" in col.upper()]
    
    #create new column with full date listed
    for col in time_columns:
        new_col_name = col.replace("UTC", "Datetime")
        df[new_col_name] = start_datetime + pd.to_timedelta(df[col], unit = "s")
    
    #some data types have Time instead of UTC
    new_time_columns = []
    if len(time_columns) == 0:
        new_time_columns = [col for col in df.columns if "TIME" in col.upper()]
    
    for col in new_time_columns:
        column = col.title()
        new_col_name = column.replace("Time", "Datetime")
        df[new_col_name] = start_datetime + pd.to_timedelta(df[col], unit = "s")
   
    #combine dataframes
    combined_df = pd.concat([combined_df, df], ignore_index = True)

out_path = Path(f"{folder}\\{campaign}_{year}_{platform}_{pi}.csv")
csv_path = combined_df.to_csv(out_path)

#remove ICT files, only csvs left
for file in file_paths:
    file_name = file.split("\\")[-1]
    os.remove(file_name)