In [46]:
from tatc import utils
from tatc.schemas import PointedInstrument, Satellite, Instrument, TwoLineElements  

earthcare = Satellite(
    name = "EarthCare",
    orbit = TwoLineElements(
        tle=[
            "1 59908U 24101A   25200.34125573  .00010433  00000+0  14571-3 0  9999",
            "2 59908  97.0168 326.4971 0001222 108.6708 251.4681 15.57041891 64775"
        ]
    ),
    instruments=[
        #PointedInstrument(
         #   name = "CPR",
            #field_of_regard=utils.swath_width_to_field_of_regard(394e3,650),
          #  cross_track_field_of_view = utils.swath_width_to_field_of_regard(394e3, 6500),
           # along_track_field_of_view = utils.swath_width_to_field_of_regard(394e3,10000)
        #),
        PointedInstrument(
            name = "MSI",
            field_of_regard=utils.swath_width_to_field_of_regard(394e3,150e3) + 2*5.760868, #cross_track_field_of_view + 2*roll angle
            cross_track_field_of_view = utils.swath_width_to_field_of_view(394e3, 150e3, 5.760868),
            along_track_field_of_view = utils.swath_width_to_field_of_view(394e3, 50000, 5.760868),
            roll_angle = 5.760868,  # degrees
            is_rectangular = True  
        )
    ]
)
        
satellites = [earthcare]

In [47]:
from datetime import datetime, timezone, timedelta
from tatc.analysis import compute_ground_track
import pandas as pd
from joblib import Parallel, delayed

startdate = datetime(2025,7,19,15,4, tzinfo = timezone.utc)  # initial date, discard the year. 
duration = timedelta(hours = 6)
step = timedelta(seconds = 1)
batch_duration = timedelta(minutes=10)

def compute_instrument_ground_track(satellite, start, duration, batch_duration, time_step):
    return pd.concat(
    Parallel(-1)(
        delayed(compute_ground_track)(
            satellite,
            pd.date_range(start + i*batch_duration, start + (i+1)*batch_duration, freq=time_step, inclusive="left"),
            crs="spice"
        ) 
        for i in range( duration // batch_duration)
    ),
    ignore_index=True
)



In [48]:
from copy import deepcopy

#earthcare_cpr = deepcopy(earthcare)
#earthcare_cpr.instruments = [inst for inst in earthcare.instruments if inst.name == "CPR"]

earthcare_msi = deepcopy(earthcare)
earthcare_msi.instruments = [inst for inst in earthcare.instruments if inst.name == "MSI"]

#ground_tracks_cpr = compute_instrument_ground_track(
#    earthcare_cpr, startdate, duration, batch_duration, step
#)

ground_tracks_msi = compute_instrument_ground_track(
    earthcare_msi, startdate, duration, batch_duration, step
)


In [49]:
print(ground_tracks_msi.head())
print(ground_tracks_msi.columns)


                                            geometry  \
0  POLYGON Z ((-21.38835 -24.35425 0, -21.40002 -...   
1  POLYGON Z ((-33.11564 -62.67654 0, -33.14941 -...   
2  POLYGON Z ((-163.57706 -81.02084 0, -163.59047...   
3  MULTIPOLYGON Z (((-178.25186 -76.9185 0, -178....   
4  POLYGON Z ((150.55866 -5.59381 0, 150.55325 -5...   

                       time  satellite instrument  valid_obs  
0 2025-07-19 15:04:00+00:00  EarthCare        MSI       True  
1 2025-07-19 15:14:00+00:00  EarthCare        MSI       True  
2 2025-07-19 15:24:00+00:00  EarthCare        MSI       True  
3 2025-07-19 15:34:00+00:00  EarthCare        MSI       True  
4 2025-07-19 15:44:00+00:00  EarthCare        MSI       True  
Index(['geometry', 'time', 'satellite', 'instrument', 'valid_obs'], dtype='object')


In [50]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from cartopy import crs as ccrs
from IPython.display import HTML
from matplotlib.patches import Patch

fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})

frame_duration = batch_duration

def animate(frame):
    ax.clear()
    time = startdate + frame*frame_duration
    tracks_cpr = ground_tracks_cpr[
        (time <= ground_tracks_cpr.time) 
        & (ground_tracks_cpr.time < time + frame_duration)
    ]
    tracks_msi = ground_tracks_msi[
        (time <= ground_tracks_msi.time) 
        & (ground_tracks_msi.time < time + frame_duration)
    ]
    if not tracks_msi.empty:
        tracks_msi.plot(ax=ax, color="r", transform=ccrs.PlateCarree())
    if not tracks_cpr.empty:
        tracks_cpr.plot(ax=ax, color="b", transform=ccrs.PlateCarree())
    #ax.set_global()
    ax.set_aspect("equal")
    ax.coastlines()
    ax.set_title(time)
    fig.tight_layout()

ani = animation.FuncAnimation(
    fig,
    animate, 
    frames=duration // frame_duration, 
    interval=100, 
    blit=False
)
display(HTML(ani.to_jshtml()))
plt.close()

In [7]:
grid_size = 0.0625
g5nr_frame_duration = timedelta(hours=1)

In [8]:
import rioxarray
import xarray as xr

dataset = xr.open_dataset(
    "https://opendap.nccs.nasa.gov/dods/OSSE/G5NR/Ganymed/7km/0.0625_deg/tavg/tavg30mn_2d_met3_Nx",
    decode_times=True,
)
#xr.open_dataset('https://opendap.nccs.nasa.gov/dods/OSSE/G5NR/Ganymed/7km/0.0625_deg/tavg/tavg30mn_2d_met3_Nx').to_netcdf('dataset.nc')
#dataset = xr.open_dataset('dataset.nc', decode_times=True)
dataset.rio.write_crs("epsg:4326", inplace=True)
dataset.rio.set_spatial_dims("lon", "lat", inplace=True)


  ref_date = _ensure_padded_year(ref_date)


In [9]:
'''
constant = xr.open_dataset(
    "http://opendap.nccs.nasa.gov:80/dods/OSSE/G5NR/Ganymed/7km/0.0625_deg/const/const_2d_asm_Nx",
    decode_times=True,
)

#xr.open_dataset('http://opendap.nccs.nasa.gov:80/dods/OSSE/G5NR/Ganymed/7km/0.0625_deg/const/const_2d_asm_Nx').to_netcdf('constant.nc')
#constant = xr.open_dataset('dataset.nc', decode_times=True)
constant.rio.write_crs("epsg:4326", inplace=True)
constant.rio.set_spatial_dims("lon", "lat", inplace=True)
'''

'\nconstant = xr.open_dataset(\n    "http://opendap.nccs.nasa.gov:80/dods/OSSE/G5NR/Ganymed/7km/0.0625_deg/const/const_2d_asm_Nx",\n    decode_times=True,\n)\n\n#xr.open_dataset(\'http://opendap.nccs.nasa.gov:80/dods/OSSE/G5NR/Ganymed/7km/0.0625_deg/const/const_2d_asm_Nx\').to_netcdf(\'constant.nc\')\n#constant = xr.open_dataset(\'dataset.nc\', decode_times=True)\nconstant.rio.write_crs("epsg:4326", inplace=True)\nconstant.rio.set_spatial_dims("lon", "lat", inplace=True)\n'

In [10]:
import numpy as np
import geopandas as gpd
from shapely.geometry import box
import scipy.ndimage as ndi

def compute_grid_cell_area(lat, lon):
    R = 6371.0  # Earth radius in km
    d2r = np.pi / 180.0
    ny, nx = len(lat), len(lon)

    dlat = abs(lat[1] - lat[0]) if len(lat) > 1 else 1.0
    dlon = abs(lon[1] - lon[0]) if len(lon) > 1 else 1.0

    lat_rad = lat * d2r
    dlat_rad = dlat * d2r
    dlon_rad = dlon * d2r

    area = np.zeros((ny, nx), dtype=np.float64)
    for i in range(ny):
        area[i, :] = (
            (R**2) * dlon_rad * 
            (np.sin(lat_rad[i] + dlat_rad / 2) - np.sin(lat_rad[i] - dlat_rad / 2))
        )
    return area


In [None]:
import scipy.ndimage as ndi
import numpy as np
import geopandas as gpd
from shapely.geometry import box

threshold = 220  # brightness temperature threshold in Kelvin

def get_clusters(frame):
    # filter the dataset by time interval and latitude 
    ds = dataset.sel(
        time=slice(
            (startdate+frame*g5nr_frame_duration).replace(day = 20, month=7, year=2006,tzinfo=None), 
            (startdate+(frame+1)*g5nr_frame_duration).replace(day = 20, month=1, year=2007,tzinfo=None)
        ),
        lat=slice(-90,90)
    )

    
    
    # get data sets from g5nr
    lwtup = ds.isel(time=0)["lwtup"].rio.set_spatial_dims("lon", "lat")
    prectot = ds.isel(time=0)["prectot"].rio.set_spatial_dims("lon", "lat")
    tautot = ds.isel(time=0)["tautot"].rio.set_spatial_dims("lon", "lat")

    lat = lwtup["lat"].values
    lon = lwtup["lon"].values
    area_2d = compute_grid_cell_area(lat, lon) 
    lon2d, lat2d = np.meshgrid(lon, lat)

    # perform the clustering based on lwtup threshold
    tb = np.sqrt(np.sqrt(lwtup / 5.67037e-8))  # effective temperature from radiation
    labels, _ = ndi.label(tb<threshold) # colder than 220 K

    mask = labels > 0
    cluster_labels = labels[mask].flatten()
    lat_vals       = lat2d[mask].flatten()
    lon_vals       = lon2d[mask].flatten()
    prectot_vals = prectot.where(mask).values.flatten()
    tautot_vals = tautot.where(mask).values.flatten()
    area_vals = area_2d[mask].flatten()
    
    # build a dataframe with cells having positive cluster label
    cells = gpd.GeoDataFrame(
        {
            "count": 1,
            "cluster": cluster_labels,
            "lat": lat_vals,
            "lon": lon_vals,
            "time": startdate + frame*g5nr_frame_duration,
            "prectot": [v for v in prectot_vals if not np.isnan(v)],
            "tautot": [v for v in tautot_vals if not np.isnan(v)],
            "area": [v for v in area_vals if not np.isnan(v)],
        }, 
        geometry=[
            box(cell.lon, cell.lat, cell.lon+grid_size, cell.lat+grid_size) 
            for row in prectot.where(labels>0) 
            for cell in row 
            if ~np.isnan(cell)
        ],
        crs="EPSG:4326"
    )
    # add other columns (cell count, cell area, and total prectot)
    cells["tot_prectot"] = cells["prectot"] 
    cells["avg_prectot"] = cells["prectot"]
    cells["max_prectot"] = cells["prectot"]

    cells["tot_tautot"] = cells["tautot"] 
    cells["avg_tautot"] = cells["tautot"]
    cells["max_tautot"] = cells["tautot"]
    
    return cells[cells.cluster>0].dissolve(
        by=["time", "cluster"], 
        aggfunc={
            "count": "sum", 
            "area": "sum",
            "tot_prectot": "sum", 
            "avg_prectot": "mean", 
            "max_prectot": "max",
            "tot_tautot": "sum",
            "avg_tautot": "mean",
            "max_tautot": "max"
        }
    )

clusters = pd.concat(
    Parallel(n_jobs=-1)(
        delayed(get_clusters)(
            frame,
        )
        for frame in range(duration // g5nr_frame_duration)
    ),
).reset_index()

'''results = []
for frame_df in range(duration // g5nr_frame_duration):
    try:
        clusters = get_clusters(frame_df)
        results.append(clusters)
    except Exception as e:
        print(f"Frame {frame_df} failed: {e}")
clusters = pd.concat(results).reset_index()'''

display(clusters)

Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short
Error:DAP DATADDS packet is apparently too short


In [11]:
print(len(clusters))

8799


In [31]:
'''import geojson
import glob
import os

input_dir = '/Users/shashwatraj/Documents/Github/Code-Lab_RL_PriorityObs/Geos5datasets/'
output_dir = 'Geos5datasets.geojson'


features=[]

for file in (glob.glob(os.path.join(input_dir,"*.nc4"))):
    timestamp = file.split('_')[-1].replace('.nc4', '')

    with netcdf(file,'r') as nc:
        tbmin = nc.variables['tbmin'][:]
        areas = nc.variables['area'][:]
        lat = nc.variables['lat'][:]
        lon = nc.variables['lon'][:]
        albedo = nc.variables['albedo'][:]
        swgdn = nc.variables['swgdn'][:]
        preccon = nc.variables['preccon'][:]
        precanv = nc.variables['precanv'][:]
        preclsc = nc.variables['preclsc'][:]
        tauthgh = nc.variables['tauthgh'][:]
        tautmid = nc.variables['tautmid'][:]
        tautlow = nc.variables['tautlow'][:]
        tauttot = nc.variables['tauttot'][:]
        prectot = nc.variables['prectot'][:]

        point = geojson.point(float(lon), float(lat))

        properties = {
            'timestamp': timestamp,
            'area': float(areas),
            'tbmin': float(tbmin),
            'albedo': float(albedo),
            'swgdn': float(swgdn),
            'preccon': float(preccon),
            'precanv': float(precanv),
            'preclsc': float(preclsc),
            'tauthgh': float(tauthgh),
            'tautmid': float(tautmid),
            'tautlow': float(tautlow),
            'tauttot': float(tauttot),
            'prectot': float(prectot)
        }

        feature = geojson.Feature(geometry=point, properties=properties)
        features.append(feature)

feature_collection = geojson.FeatureCollection(features)

with open(output_dir, 'w') as f:
    geojson.dump(feature_collection, f)

print(f"GeoJSON file created: {output_dir}")
'''


GeoJSON file created: Geos5datasets.geojson
