In [2]:
import rioxarray
import xarray as xr

dataset = xr.open_dataset(
    "https://opendap.nccs.nasa.gov/dods/OSSE/G5NR/Ganymed/7km/0.0625_deg/tavg/tavg30mn_2d_met3_Nx",
    decode_times=True,
)
dataset.rio.write_crs("epsg:4326", inplace=True)
dataset.rio.set_spatial_dims("lon", "lat", inplace=True)

  ref_date = _ensure_padded_year(ref_date)


In [16]:
print(dataset['preccon'])

<xarray.DataArray 'preccon' (time: 36576, lat: 2881, lon: 5760)> Size: 2TB
[606962626560 values with dtype=float32]
Coordinates:
  * time         (time) datetime64[ns] 293kB 2005-05-15T21:15:00 ... 2007-06-...
  * lat          (lat) float64 23kB -90.0 -89.94 -89.88 ... 89.88 89.94 90.0
  * lon          (lon) float64 46kB -180.0 -179.9 -179.9 ... 179.8 179.9 179.9
    spatial_ref  int64 8B 0
Attributes:
    units:      kg m-2 s-1
    long_name:  convective_precipitation 


In [None]:
'''
import geopandas as gpd
from datetime import datetime, timezone, timedelta

grid_size = 0.0625
g5nr_frame_duration = timedelta(hours=1)
frame_duration = timedelta(minutes=10)
duration = timedelta(days=1)
start = datetime(year=2025, month=4, day=1, tzinfo=timezone.utc)

def get_clusters(frame):
    ds = dataset.sel(
        time=slice(
            (start+frame*g5nr_frame_duration).replace(day=7, month=1, year=2005, tzinfo=timezone.utc),
            (start+(frame+1)*g5nr_frame_duration).replace(day=7, month=1, year=2005, tzinfo=timezone.utc)
        ),
        lon=slice(-179, 179),
        lat=slice(-89,89)
    )

    #adding prospective features
    albedo = ds.isel(time=0)["albedo"].rio.set_spatial_dims("lon", "lat")
    preccon = ds.isel(time=0)["preccon"].rio.set_spatial_dims("lon", "lat")
    swgdn = ds.isel(time=0)["swgdn"].rio.set_spatial_dims("lon", "lat")
    prectot = ds.isel(time=0)["prectot"].rio.set_spatial_dims("lon", "lat")
    tauthgh = ds.isel(time=0)["tauthgh"].rio.set_spatial_dims("lon", "lat")
    tautmid = ds.isel(time=0)["tautmid"].rio.set_spatial_dims("lon", "lat")
    tautlow = ds.isel(time=0)["tautlow"].rio.set_spatial_dims("lon", "lat")
    tauttot = ds.isel(time=0)["tauttot"].rio.set_spatial_dims("lon", "lat")
    lwtup = ds.isel(time=0)["lwtup"].rio.set_spatial_dims("lon", "lat")
    precanv = ds.isel(time=0)["precanv"].rio.set_spatial_dims("lon", "lat")
    preclsc = ds.isel(time=0)["preclsc"].rio.set_spatial_dims("lon", "lat")

'''

In [None]:
import numpy as np
from netCDF4 import Dataset as netcdf
import scipy.ndimage as ndi
import datetime as dt

utc = dt.timezone.utc

def datetime_to_index(date):
    return int((dt.datetime.timestamp(date)-1116192600)/1800)

def lat_to_index(lat):
    return int((lat+90)/0.0625)

def lon_to_index(lon):
    return int((lon+180)/0.0625)


case = 'c1440_NR'  # for output file name
geosdir = 'https://opendap.nccs.nasa.gov/dods/OSSE/G5NR/Ganymed/7km/0.0625_deg/tavg/tavg30mn_2d_met3_Nx'
data = netcdf(geosdir, 'r')

outdir = '/Users/shashwatraj/Documents/Github/Code-Lab_RL_PriorityObs/Geos5datasets/'
tbthresh = 220   # brightness temperature threshold
sigma = 5.67037e-8

startdate = dt.datetime(2005,6,15, tzinfo = utc)  # initial date
enddate = dt.datetime(2007,6,15, tzinfo = utc)
step = dt.timedelta(minutes = 10)

    
#=== Domain Limits ===
lat1 = -89.
lat2 = 89.
lon1 = -179.8
lon2 = 179.8

lat_start = lat_to_index(lat1)
lat_stop = lat_to_index(lat2)+1
lon_start = lon_to_index(lon1)
lon_stop = lon_to_index(lon2)+1

lat = data.variables['lat'][:]
lon = data.variables['lon'][:]

# select within lat/lon limits
latind = np.logical_and( lat>=lat1, lat<=lat2 )
lonind = np.logical_and( lon>=lon1, lon<=lon2 )

lat = lat[latind]
lon = lon[lonind]
ny = lat.shape[0]
nx = lon.shape[0]

area = 0.*np.empty((ny,nx))
C = 2.*3.14159*6371  # earth circumference [km]                                    
d2r=3.14159/180 # degrees to radians 
lat2d = np.copy(area)
lon2d = np.copy(area)
dlon = lon[2]-lon[1]
for i in range(0,ny):
   area[i,:] = (C*np.cos(lat[i]*d2r)*dlon/360.)*(C*dlon/360.)
   lat2d[i,:] = lat[i]
for i in range(0,nx):
   lon2d[:,i] = lon[i]

date = startdate

print('Starting Loops')
# Loop through time
while date<enddate:
    
    datestr = str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2)
    datestr = datestr + "_"+str(date.hour).zfill(2)+str(date.minute).zfill(2)
    
    time_index = datetime_to_index(date)
    
    date = date + step
    
    #======= Load TB ========
    # Estimate from OLR assuming cloudtops radiate as blackbody

    olr = data.variables['lwtup'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    tb = np.sqrt(np.sqrt(olr/sigma))   # stefan-boltzmann

    prectot = data.variables['prectot'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    preccon = data.variables['preccon'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    precanv = data.variables['precanv'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    preclsc = data.variables['preclsc'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    albedo = data.variables['albedo'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    swgdn = data.variables['swgdn'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    tauthgh = data.variables['tauthgh'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    tautmid = data.variables['tautmid'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    tautlow = data.variables['tautlow'][time_index,lat_start:lat_stop,lon_start:lon_stop]
    #tauttot = data.variables['tauttot'][time_index,lat_start:lat_stop,lon_start:lon_stop]

    #=== define cloud mask as Tb less than 220 K ===
    
    cloudy = tb<tbthresh

    #============ Cluster statistics ===============

    labels, cnum = ndi.label(cloudy)  #labels features in an array/ "Labels" determines the number of clusters

    clstr_area = ndi.sum(area,labels,index=np.arange(1,cnum+1))
    #clstr_prectot = ndi.mean(prectot,labels,index=np.arange(1,cnum+1))
    clstr_preccon = ndi.mean(preccon,labels,index=np.arange(1,cnum+1))
    clstr_precanv = ndi.mean(precanv,labels,index=np.arange(1,cnum+1))
    clstr_preclsc = ndi.mean(preclsc,labels,index=np.arange(1,cnum+1))
    clstr_albedo = ndi.mean(albedo,labels,index=np.arange(1,cnum+1))
    clstr_swgdn = ndi.mean(swgdn,labels,index=np.arange(1,cnum+1))
    clstr_tauthgh = ndi.mean(tauthgh,labels,index=np.arange(1,cnum+1))
    clstr_tautmid = ndi.mean(tautmid,labels,index=np.arange(1,cnum+1))
    clstr_tautlow = ndi.mean(tautlow,labels,index=np.arange(1,cnum+1))
    #clstr_tauttot = ndi.mean(tauttot,labels,index=np.arange(1,cnum+1))

    clstr_lat = ndi.mean(lat2d,labels,index=np.arange(1,cnum+1))
    clstr_lon = ndi.mean(lon2d,labels,index=np.arange(1,cnum+1))
    clstr_tbmin = ndi.minimum(tb,labels,index=np.arange(1,cnum+1))


    print("cluster count: "+str(cnum))


    #===== Save as netcdf =====

    outname = 'cluster_stats_'+case+'_tb'+str(tbthresh)+'K_'+datestr+'.nc4'
    print(" Saving: "+outdir+outname)
    ncwrite_id = netcdf( outdir+outname, 'w', format='NETCDF4' )

    ncwrite_id.createDimension( 'time', None )
    ncwrite_id.createDimension( 'cluster', cnum )

    clusterid = ncwrite_id.createVariable( 'cluster','f4', ('time','cluster',))
    sizesid  = ncwrite_id.createVariable('area','f4', ('time','cluster',) )
    tbminid  = ncwrite_id.createVariable('tbmin','f4', ('time','cluster',) )
    latid    = ncwrite_id.createVariable('lat','f4', ('time','cluster',) )
    lonid    = ncwrite_id.createVariable('lon','f4', ('time','cluster',) )
    albedoid = ncwrite_id.createVariable('albedo','f4', ('time','cluster',) )
    swgdnid  = ncwrite_id.createVariable('swgdn','f4', ('time','cluster',) )
    precconid = ncwrite_id.createVariable('preccon','f4', ('time','cluster',) )
    precanvid = ncwrite_id.createVariable('precanv','f4', ('time','cluster',) )
    preclscid = ncwrite_id.createVariable('preclsc','f4', ('time','cluster',) )
    tauthghid = ncwrite_id.createVariable('tauthgh','f4', ('time','cluster',) )
    tautmidid = ncwrite_id.createVariable('tautmid','f4', ('time','cluster',) )
    tautlowid = ncwrite_id.createVariable('tautlow','f4', ('time','cluster',) )
    #tauttotid = ncwrite_id.createVariable('tauttot','f4', ('time','cluster',) )
    #prectotid = ncwrite_id.createVariable('prectot','f4', ('time','cluster',) )


    clusterid[0,:] = np.arange(cnum)
    tbminid[0,:]   = clstr_tbmin[:cnum]
    sizesid[0,:]   = clstr_area[:cnum]
    precconid[0,:]  = clstr_preccon[:cnum]
    precanvid[0,:]  = clstr_precanv[:cnum]
    preclscid[0,:]  = clstr_preclsc[:cnum]
    albedoid[0,:]  = clstr_albedo[:cnum]
    swgdnid[0,:]   = clstr_swgdn[:cnum]
    tauthghid[0,:] = clstr_tauthgh[:cnum]
    tautmidid[0,:] = clstr_tautmid[:cnum]
    tautlowid[0,:] = clstr_tautlow[:cnum]
    #tauttotid[0,:] = clstr_tauttot[:cnum]
    #prectotid[0,:] = clstr_prectot[:cnum]
    latid[0,:]     = clstr_lat[:cnum]
    lonid[0,:]     = clstr_lon[:cnum]

    ncwrite_id.close()
    print('loop complete')

Starting Loops


Error:DAP DATADDS packet is apparently too short


In [1]:
import geojson
import glob
import os

input_dir = '/Users/shashwatraj/Documents/Github/Code-Lab_RL_PriorityObs/Geos5datasets/'
output_dir = 'Geos5datasets.geojson'


features=[]

for file in (glob.glob(os.path.join(input_dir,"*.nc4"))):
    timestamp = file.split('_')[-1].replace('.nc4', '')

    with netcdf(file,'r') as nc:
        tbmin = nc.variables['tbmin'][:]
        areas = nc.variables['area'][:]
        lat = nc.variables['lat'][:]
        lon = nc.variables['lon'][:]
        albedo = nc.variables['albedo'][:]
        swgdn = nc.variables['swgdn'][:]
        preccon = nc.variables['preccon'][:]
        precanv = nc.variables['precanv'][:]
        preclsc = nc.variables['preclsc'][:]
        tauthgh = nc.variables['tauthgh'][:]
        tautmid = nc.variables['tautmid'][:]
        tautlow = nc.variables['tautlow'][:]
        tauttot = nc.variables['tauttot'][:]
        prectot = nc.variables['prectot'][:]

        point = geojson.point(float(lon), float(lat))

        properties = {
            'timestamp': timestamp,
            'area': float(areas),
            'tbmin': float(tbmin),
            'albedo': float(albedo),
            'swgdn': float(swgdn),
            'preccon': float(preccon),
            'precanv': float(precanv),
            'preclsc': float(preclsc),
            'tauthgh': float(tauthgh),
            'tautmid': float(tautmid),
            'tautlow': float(tautlow),
            'tauttot': float(tauttot),
            'prectot': float(prectot)
        }

        feature = geojson.Feature(geometry=point, properties=properties)
        features.append(feature)

feature_collection = geojson.FeatureCollection(features)

with open(output_dir, 'w') as f:
    geojson.dump(feature_collection, f)

print(f"GeoJSON file created: {output_dir}")


Collecting geojson
  Using cached geojson-3.2.0-py3-none-any.whl.metadata (16 kB)
Using cached geojson-3.2.0-py3-none-any.whl (15 kB)
Installing collected packages: geojson
Successfully installed geojson-3.2.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
GeoJSON file created: Geos5datasets.geojson
