In [2]:
import os
from getpass import getpass
from netrc import netrc
from platform import system
from subprocess import Popen

In [3]:
urs = "urs.earthdata.nasa.gov"  # Earthdata URL endpoint for authentication
prompts = ["Enter NASA Earthdata Login Username: ", "Enter NASA Earthdata Login Password: "]

# Determine the OS (Windows machines usually use an '_netrc' file)
netrc_name = "_netrc" if system() == "Windows" else ".netrc"

# Determine if netrc file exists, and if so, if it includes NASA Earthdata Login Credentials
try:
    netrcDir = os.path.expanduser(f"~/{netrc_name}")
    netrc(netrcDir).authenticators(urs)[0]

# Below, create a netrc file and prompt user for NASA Earthdata Login Username and Password
except FileNotFoundError:
    homeDir = os.path.expanduser("~")
    Popen(
        "touch {0}{2} | echo machine {1} >> {0}{2}".format(homeDir + os.sep, urs, netrc_name),
        shell=True,
    )
    Popen(
        "echo login {} >> {}{}".format(getpass(prompt=prompts[0]), homeDir + os.sep, netrc_name),
        shell=True,
    )
    Popen(
        "echo 'password {} '>> {}{}".format(
            getpass(prompt=prompts[1]), homeDir + os.sep, netrc_name
        ),
        shell=True,
    )
    # Set restrictive permissions
    Popen("chmod 0600 {0}{1}".format(homeDir + os.sep, netrc_name), shell=True)

    # Determine OS and edit netrc file if it exists but is not set up for NASA Earthdata Login
except TypeError:
    homeDir = os.path.expanduser("~")
    Popen("echo machine {1} >> {0}{2}".format(homeDir + os.sep, urs, netrc_name), shell=True)
    Popen(
        "echo login {} >> {}{}".format(getpass(prompt=prompts[0]), homeDir + os.sep, netrc_name),
        shell=True,
    )
    Popen(
        "echo 'password {} '>> {}{}".format(
            getpass(prompt=prompts[1]), homeDir + os.sep, netrc_name
        ),
        shell=True,
    )

In [6]:
import boto3
import requests
import xarray as xr

%matplotlib inline

In [7]:
s3_cred_endpoint = "https://data.asdc.earthdata.nasa.gov/s3credentials"

In [8]:
def get_temp_creds():
    temp_creds_url = s3_cred_endpoint
    return requests.get(temp_creds_url).json()

In [9]:
temp_creds_req = get_temp_creds()

In [10]:
s3_client = boto3.client(
    "s3",
    aws_access_key_id=temp_creds_req["accessKeyId"],
    aws_secret_access_key=temp_creds_req["secretAccessKey"],
    aws_session_token=temp_creds_req["sessionToken"],
)



In [11]:
bucket = "asdc-prod-protected"
local_dir = "./no2"
os.makedirs(local_dir, exist_ok=True)

In [12]:
# cell 2:  read the list you saved earlier
with open("AWS_NO2.txt") as f:
    s3_urls = [line.strip() for line in f if line.strip()]

In [15]:
# cell 3:  loop and download
count = 1
for s3_url in s3_urls:
    key = s3_url.replace("s3://asdc-prod-protected/", "")
    file_name = key.split("/")[-1]
    local_path = os.path.join(local_dir, file_name)

    if os.path.exists(local_path):
        print("skip ", file_name)
        continue

    print(count,"-","downloading", file_name)
    s3_client.download_file(
        bucket,
        key,
        local_path,
        ExtraArgs={"RequestPayer": "requester"}
    )
    count+=1


1 - downloading TEMPO_NO2_L3_V03_20250131T230228Z_S013.nc
2 - downloading TEMPO_NO2_L3_V03_20250131T222220Z_S012.nc
3 - downloading TEMPO_NO2_L3_V03_20250131T212220Z_S011.nc
4 - downloading TEMPO_NO2_L3_V03_20250131T202220Z_S010.nc
5 - downloading TEMPO_NO2_L3_V03_20250131T192220Z_S009.nc
6 - downloading TEMPO_NO2_L3_V03_20250131T182220Z_S008.nc
7 - downloading TEMPO_NO2_L3_V03_20250131T172220Z_S007.nc
8 - downloading TEMPO_NO2_L3_V03_20250131T162220Z_S006.nc
9 - downloading TEMPO_NO2_L3_V03_20250131T152220Z_S005.nc
10 - downloading TEMPO_NO2_L3_V03_20250131T142220Z_S004.nc
11 - downloading TEMPO_NO2_L3_V03_20250131T134212Z_S003.nc
12 - downloading TEMPO_NO2_L3_V03_20250131T130204Z_S002.nc
13 - downloading TEMPO_NO2_L3_V03_20250130T230218Z_S013.nc
14 - downloading TEMPO_NO2_L3_V03_20250130T222210Z_S012.nc
15 - downloading TEMPO_NO2_L3_V03_20250130T212210Z_S011.nc
16 - downloading TEMPO_NO2_L3_V03_20250130T202210Z_S010.nc
17 - downloading TEMPO_NO2_L3_V03_20250130T192210Z_S009.nc
18 - d

In [4]:
import xarray as xr

In [5]:
df = xr.open_dataset("/content/no2/TEMPO_NO2_L3_V03_20250123T172043Z_S007.nc")
df.dims



In [1]:
import xarray as xr, pandas as pd, glob, os, numpy as np ,tqdm
from datetime import datetime


In [3]:
folder = "/content/no2"               # wherever you downloaded
files  = sorted(glob.glob(os.path.join(folder, "*.nc")))
epoch = pd.Timestamp('1980-01-06')
print(f"{len(files)} files found")

100 files found


In [4]:
def one_file_to_hourly(path):
    """return 24-row dataframe (hour, mean trop, mean strat, mean flag)"""
    # 1. real scan time
    with xr.open_dataset(path) as root:
        dt = epoch + pd.Timedelta(seconds=float(root.time)/1e9)
    date = dt.date()

    # 2. open product group
    with xr.open_dataset(path, group='product') as prod:
        trop  = prod.vertical_column_troposphere.squeeze()
        strat = prod.vertical_column_stratosphere.squeeze()
        flag  = prod.main_data_quality_flag.squeeze()

    # 3. spatial mean per hour (already 1 file = 1 hour)
    hour = dt.hour
    return pd.DataFrame({
        'date': date,
        'hour': hour,
        'latitude_centre': trop.latitude.mean().item(),
        'longitude_centre': trop.longitude.mean().item(),
        'vertical_column_troposphere': trop.mean().item(),
        'vertical_column_stratosphere': strat.mean().item(),
        'main_data_quality_flag': flag.mean().item()
    }, index=[0])

# 4. collect all
df_all = []
for f in tqdm.tqdm(files, desc='nc → 24-row'):
    df_all.append(one_file_to_hourly(f))

# 5. pivot to 24 rows per calendar day
big = pd.concat(df_all, ignore_index=True)
big = big.sort_values(['date','hour'])
big.to_csv('TEMPO_NO2_24hour_perDay.csv', index=False)
print("done → TEMPO_NO2_24hour_perDay.csv")

nc → 24-row: 100%|██████████| 100/100 [04:36<00:00,  2.77s/it]

done → TEMPO_NO2_24hour_perDay.csv





In [4]:
# open ONLY the science_data group, keep the two column densities
ds = xr.open_mfdataset(
        files,
        group='product',          # <-- key line
        combine='nested',
        concat_dim='time',
        chunks={'time': 100}
     )
[['vertical_column_troposphere', 'vertical_column_stratosphere','main_data_quality_flag','time','latitude','longitude']]

[['vertical_column_troposphere',
  'vertical_column_stratosphere',
  'main_data_quality_flag',
  'time',
  'latitude',
  'longitude']]

In [6]:
ds.time

In [26]:
# TEMPO files carry a scalar ‘time’ – turn it into a coordinate
ds = ds.assign_coords(time=pd.to_datetime(ds.time))

In [27]:
hourly = ds.groupby('time.hour').mean('time')   # or .sum(), .first(), etc.
# if you prefer a true datetime index instead of ‘hour’ 0…23:
# hourly = ds.resample(time='1H').mean()

In [30]:
print("original time coord:", ds.time.dtype, ds.time.shape)
print("sample times:", ds.time.isel(time=slice(0,3)).values)

original time coord: datetime64[ns] (100,)
sample times: ['1970-01-01T00:00:00.000000000' '1970-01-01T00:00:00.000000001'
 '1970-01-01T00:00:00.000000002']
