In [1]:
import os
import fsspec
import xarray as xr
import numpy as np
import geojson
from google.cloud import storage
from datetime import datetime, timedelta
import pandas as pd
from google.cloud import bigquery
import pandas as pd
import concurrent.futures
from tqdm.notebook import tqdm



In [2]:
##functions for selecting which data to download

def get_blob_names(attime=datetime.utcnow(), bucket_name='gcp-public-data-goes-16'):
    """
    This function returns a list of blob names from the current hour and the previous hour.
    The blob names can be passed to a function that selects the blobs that form complete images.
    """
    if isinstance(attime, str):
        attime = pd.to_datetime(attime)

    # Set up Google Cloud Storage client
    client = storage.Client()
    bucket = client.get_bucket(bucket_name)

    # Create a range of directories to check. The GOES bucket is
    # organized by hour of day.
    blob_names = []
    for i in range(2):  # Get blobs from current hour and previous hour
        current_time = attime - timedelta(hours=i)
        prefix = f'ABI-L2-CMIPC/{current_time.year}/{current_time.timetuple().tm_yday:03d}/{current_time.hour:02d}/'
        blobs = bucket.list_blobs(prefix=prefix)
        blob_names.extend([blob.name for blob in blobs])

    return blob_names

def extract_band_number(blob_name):
    """
    Extracts the band number from a blob name.
    """
    try:
        return int(blob_name.split('_')[1][-2:])
    except ValueError:
        return None

def select_blobs(blob_names):
    """
    Selects the blobs that form complete images.
    """
    # Sort blob names by timestamp
    blob_names.sort(key=lambda name: name.split('_')[3][1:], reverse=True)

    # Initialize a dictionary to store the selected blobs for each band
    selected_blobs = {}

    # Iterate over the blob names
    for name in blob_names:
        # Extract the band number from the blob name
        band = extract_band_number(name)

        # If the band is not in the dictionary yet, add it with a new list
        if band not in selected_blobs:
            selected_blobs[band] = []

        # If there are less than 12 blobs for this band, add the blob name to the list
        if len(selected_blobs[band]) < 12:
            selected_blobs[band].append(name)

    # Check if there are 12 blobs for each band
    for band, blobs in selected_blobs.items():
        if len(blobs) < 12:
            raise Exception(f"Only {len(blobs)} blobs found for band {band}")

    return selected_blobs

def create_median_image(blob_list, fs, bucket_name='gcp-public-data-goes-16'):
    """
    This function creates a single-band image from a list of blob names.
    The pixel values in the image are the median values from the corresponding pixels in the input images.
    """
    # Initialize a list to store the Datasets
    datasets = []

    # Open each blob as a full dataset and load it into memory
    for blob in blob_list:
        with fs.open(f'{bucket_name}/{blob}') as f:
            ds = xr.open_dataset(f).load()
            datasets.append(ds)

    # Concatenate the datasets along a new 'image' dimension
    concated = xr.concat(datasets, dim='image')

    # Compute the median along the 'image' dimension
    median_ds = concated.median(dim='image', keep_attrs=True)

    # Return the median dataset
    return median_ds

def create_multiband_image(band_composites):
    """
    This function creates a multiband image from a dictionary of band composites.
    It adds each composite as a separate variable in the same Dataset.
    """

    # Initialize a dictionary to store the composites with their new variable names
    renamed_composites = {}

    # Initialize a variable to store the attributes
    attributes = None

    # Rename the 'CMI' variable in each composite and add it to the dictionary
    for band, composite in band_composites.items():
        # Drop the 't' coordinate
        # composite = composite.drop('t')
        renamed_composites[f'CMI{band:02d}'] = composite['CMI']

        # If attributes have not been saved yet, save them
        if attributes is None:
            attributes = composite.attrs

    # Merge the composites into a single Dataset
    multiband_image = xr.Dataset(renamed_composites)

    # Assign the saved attributes to the new multiband dataset
    multiband_image.attrs = attributes

    # Return the multiband image
    return multiband_image



In [3]:
#Set env variable for google cloud credentials, used behind the scenes by a couple functions
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/adamhunter/Documents/school projs/firenet/data/credentials/firenet-99-writer.json'
# Use fsspec to create a file system
fs = fsspec.filesystem('gcs', token=os.environ['GOOGLE_APPLICATION_CREDENTIALS'])

In [4]:
# Test the get_blob_names function
blob_names = get_blob_names()
print(blob_names)

# Test the select_blobs function
selected_blobs = select_blobs(blob_names)
print(selected_blobs)

['ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C01_G16_s20240162001174_e20240162003547_c20240162004018.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C01_G16_s20240162006174_e20240162008547_c20240162009014.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C01_G16_s20240162011174_e20240162013547_c20240162014017.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C01_G16_s20240162016174_e20240162018547_c20240162019028.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C02_G16_s20240162001174_e20240162003547_c20240162004046.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C02_G16_s20240162006174_e20240162008547_c20240162009035.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C02_G16_s20240162011174_e20240162013547_c20240162014036.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C02_G16_s20240162016174_e20240162018547_c20240162019046.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C03_G16_s20240162001174_e20240162003547_c20240162004015.nc', 'ABI-L2-CMIPC/2024/016/20/OR_ABI-L2-CMIPC-M6C03_G16_s2

In [5]:
median_images = {}
for band, blobs in selected_blobs.items():
    print(band)
    median_images[band] = create_median_image(blobs, fs)

1
2


KeyboardInterrupt: 

In [None]:
median_images[2]

KeyError: 2

In [None]:
multiband_image = create_multiband_image(median_images)

In [None]:
# # Define the PROJ string
# crs_proj = "+proj=geos +lon_0=-75 +h=35786023 +x_0=0 +y_0=0 +ellps=GRS80 +units=m +no_defs +sweep=x"

# # Set the CRS for the multiband_image dataset
# multiband_image.rio.write_crs(crs_proj, inplace=True)

In [None]:
median_images[2].to_netcdf('median_image_2.nc')
# Remove the 'grid_mapping' key from the attributes of each variable
for var in multiband_image.variables:
    multiband_image[var].attrs.pop('grid_mapping', None)

multiband_image = multiband_image.fillna(0)  # Fill with zeros

# Save the dataset to a netCDF file
multiband_image.to_netcdf('multiband_image.nc')

  median_images[2].to_netcdf('median_image_2.nc')
  median_images[2].to_netcdf('median_image_2.nc')
  multiband_image.to_netcdf('multiband_image.nc')
  multiband_image.to_netcdf('multiband_image.nc')


In [26]:
# Print the attributes of the dataset
print(median_images[2].attrs)

# Print the coordinates of the dataset
print(median_images[2].coords)

{'naming_authority': 'gov.nesdis.noaa', 'Conventions': 'CF-1.7', 'Metadata_Conventions': 'Unidata Dataset Discovery v1.0', 'standard_name_vocabulary': 'CF Standard Name Table (v35, 20 July 2016)', 'institution': 'DOC/NOAA/NESDIS > U.S. Department of Commerce, National Oceanic and Atmospheric Administration, National Environmental Satellite, Data, and Information Services', 'project': 'GOES', 'production_site': 'NSOF', 'production_environment': 'OE', 'spatial_resolution': '0.5km at nadir', 'orbital_slot': 'GOES-East', 'platform_ID': 'G16', 'instrument_type': 'GOES-R Series Advanced Baseline Imager (ABI)', 'scene_id': 'CONUS', 'instrument_ID': 'FM1', 'dataset_name': 'OR_ABI-L2-CMIPC-M6C02_G16_s20240161851174_e20240161853547_c20240161854033.nc', 'iso_series_metadata_id': '8c9e8150-3692-11e3-aa6e-0800200c9a66', 'title': 'ABI L2 Cloud and Moisture Imagery', 'summary': 'Single reflective band Cloud and Moisture Imagery Products are digital maps of clouds, moisture, and atmospheric windows at

In [12]:
import rasterio

# Load the tif file
file_path = '/Users/adamhunter/Documents/school projs/firenet/data/static_layers/LF2020_Elev_220_CONUS/Tif/LC20_Elev_220.tif'
with rasterio.open(file_path) as src:
    elevation_data = src.read(1)



In [13]:
with rasterio.open(file_path) as src:
    elevation_crs = src.crs

In [14]:
import rioxarray

# Reproject the multiband_image to the elevation_data's CRS
multiband_image_reprojected = multiband_image.rio.reproject(elevation_crs)

In [15]:
multiband_image_reprojected

In [16]:
multiband_image_reprojected.to_netcdf('multiband_image_reprojected.nc')


In [17]:
print(multiband_image.isnull().any())

<xarray.Dataset>
Dimensions:                 ()
Coordinates:
    y_image                 float32 0.08624
    x_image                 float32 -0.03136
    goes_imager_projection  int64 0
Data variables:
    CMI01                   bool True
    CMI02                   bool True
