In [1]:
# GLM_05deg_grouping_by_lon_lat.ipynb 

# This programme opens the GLM files and grids the data into 0.5° x 0.5° grids,
# the same as the FLEXPART output. It then saves the output in netCDF format.

# C. Mackay March 2023 (Catherine.Mackay@aero.obs-mip.fr)
# https://github.com/ckmackay/SOFT-IO-LI.git

#Suggestions/improvements to be made:

# could automatically loop over several days if required.


In [None]:
import numpy as np
import xarray as xr
import tqdm
import pathlib
import os.path

In [14]:
idir = "/o3p/macc/glm/OR_GLM-L2-LCFA_G16_s2018154"

In [15]:
#So as to avoid the problem of missing files, get filenames from the input directory and only use these

filenames=[]
s = []
filenames = os.listdir(idir)
print(len(filenames))


23


In [16]:
#latitude = np.linspace(-89.5, 89.5, 180) # 1.0° resolution
#longitude = np.linspace(-178.5, 180.5, 360) # 1.0° resolution

In [17]:
latitude = np.linspace(-89.75, 89.75, 360) # 0.5°resolution
longitude = np.linspace(-179.75, 179.75, 720) # 0.5°resolution

In [18]:
#latitude = np.linspace(-89.75, 90, 720) # 0.25°resolution
#longitude = np.linspace(-179.75, 180, 1440) # 0.25°resolution

In [19]:
for i in range(len(filenames)):
    f = filenames[i].split('_')
    print(filenames[i])
    s = (f[3].split('.'))
    #print(s[0])
    dr = xr.Dataset(data_vars={'flash_energy': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)))), 
                           'num_flash': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)), dtype=int))}, # we want num_flash to be integer
                coords={'latitude': latitude, 
                        'longitude': longitude})
    #Just keep flash and group information
    dr = xr.Dataset(data_vars={'flash_energy': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)))), 
                           'num_flash': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)), dtype=int)),
                          'group_energy': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)))), 
                           'num_group': (['latitude', 'longitude'], np.zeros(shape=(len(latitude), len(longitude)), dtype=int))}, # we want num_flash to be integer
                coords={'latitude': latitude, 
                        'longitude': longitude})
    with xr.open_dataset(str(pathlib.PurePath(idir, 'GLM_array_154_'+s[0]+'.nc'))) as ds:
        ds_with_lat_lon_bins = ds.assign_coords({'latitude': dr.latitude.sel(latitude=ds.flash_lat, method='nearest'), 
                                                 'longitude': dr.longitude.sel(longitude=ds.flash_lon, method='nearest')})
    
        flash_energy_by_lat = ds_with_lat_lon_bins.flash_energy.groupby('latitude')
    
        for lat, flash_energy_for_lat in tqdm.tqdm(flash_energy_by_lat):
            flash_energy_for_lat_by_lon = flash_energy_for_lat.groupby('longitude')
            flash_energy_sum = flash_energy_for_lat_by_lon.sum()
            dr.flash_energy.loc[dict(latitude=lat, longitude=flash_energy_sum.longitude)] = flash_energy_sum
            flash_energy_count = flash_energy_for_lat_by_lon.count()
            dr.num_flash.loc[dict(latitude=lat, longitude=flash_energy_count.longitude)] = flash_energy_count
    dr.to_netcdf('/o3p/macc/test/GLM_array_154_05deg/GLM_array_154_'+s[0]+'_batch_bis.nc')  

  0%|          | 0/89 [00:00<?, ?it/s]

GLM_array_154_10-11.nc


100%|██████████| 89/89 [00:05<00:00, 15.79it/s]
  0%|          | 0/118 [00:00<?, ?it/s]

GLM_array_154_19-20.nc


100%|██████████| 118/118 [00:13<00:00,  8.64it/s]
  0%|          | 0/126 [00:00<?, ?it/s]

GLM_array_154_03-04.nc


100%|██████████| 126/126 [00:11<00:00, 10.73it/s]
  0%|          | 0/133 [00:00<?, ?it/s]

GLM_array_154_00-01.nc


100%|██████████| 133/133 [00:21<00:00,  6.09it/s]
  0%|          | 0/115 [00:00<?, ?it/s]

GLM_array_154_16-17.nc


100%|██████████| 115/115 [00:04<00:00, 25.17it/s]
  0%|          | 0/116 [00:00<?, ?it/s]

GLM_array_154_23-00.nc


100%|██████████| 116/116 [00:22<00:00,  5.15it/s]
  0%|          | 0/114 [00:00<?, ?it/s]

GLM_array_154_20-21.nc


100%|██████████| 114/114 [00:19<00:00,  5.81it/s]
  9%|▉         | 8/91 [00:00<00:01, 77.86it/s]

GLM_array_154_12-13.nc


100%|██████████| 91/91 [00:03<00:00, 23.91it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

GLM_array_154_14-15.nc


100%|██████████| 88/88 [00:03<00:00, 24.90it/s]
  0%|          | 0/112 [00:00<?, ?it/s]

GLM_array_154_05-06.nc


100%|██████████| 112/112 [00:07<00:00, 14.93it/s]
  0%|          | 0/130 [00:00<?, ?it/s]

GLM_array_154_01-02.nc


100%|██████████| 130/130 [00:19<00:00,  6.59it/s]
  0%|          | 0/112 [00:00<?, ?it/s]

GLM_array_154_04-05.nc


100%|██████████| 112/112 [00:08<00:00, 13.92it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

GLM_array_154_06-07.nc


100%|██████████| 107/107 [00:07<00:00, 14.13it/s]
 12%|█▏        | 10/86 [00:00<00:00, 90.57it/s]

GLM_array_154_13-14.nc


100%|██████████| 86/86 [00:03<00:00, 24.50it/s]
  5%|▌         | 6/118 [00:00<00:02, 54.71it/s]

GLM_array_154_17-18.nc


100%|██████████| 118/118 [00:05<00:00, 20.41it/s]
 12%|█▏        | 11/95 [00:00<00:00, 99.33it/s]

GLM_array_154_11-12.nc


100%|██████████| 95/95 [00:04<00:00, 21.38it/s]
  0%|          | 0/115 [00:00<?, ?it/s]

GLM_array_154_21-22.nc


100%|██████████| 115/115 [00:22<00:00,  5.12it/s]


GLM_array_154_02-03.nc


100%|██████████| 129/129 [00:15<00:00,  8.26it/s]
  0%|          | 0/87 [00:00<?, ?it/s]

GLM_array_154_15-16.nc


100%|██████████| 87/87 [00:02<00:00, 31.60it/s]
  4%|▍         | 4/100 [00:00<00:02, 39.70it/s]

GLM_array_154_07-08.nc


100%|██████████| 100/100 [00:07<00:00, 13.44it/s]
  0%|          | 0/113 [00:00<?, ?it/s]

GLM_array_154_22-23.nc


100%|██████████| 113/113 [00:21<00:00,  5.16it/s]
  0%|          | 0/94 [00:00<?, ?it/s]

GLM_array_154_08-09.nc


100%|██████████| 94/94 [00:06<00:00, 14.10it/s]
  0%|          | 0/97 [00:00<?, ?it/s]

GLM_array_154_09-10.nc


100%|██████████| 97/97 [00:06<00:00, 15.63it/s]


In [25]:
# let's compare with the results obtained by other methods:
#dr2 = xr.load_dataset('/home/macc/test/GLM_array_test/test_GLM_array_151_20-21_batch_bis.nc')
#dr3 = xr.load_dataset('/home/macc/test/GLM_array_151/GLM_array_151_20-21_batch_bis.nc')
#xr.testing.assert_allclose(dr, dr2)
#xr.testing.assert_allclose(dr, dr3)
#xr.testing.assert_allclose(dr2, dr3)