In [None]:
import os
import fsspec
import xarray as xr
import numpy as np
import geojson
from google.cloud import storage
from datetime import datetime, timedelta
import pandas as pd
from google.cloud import bigquery
import pandas as pd
import concurrent.futures
from tqdm.notebook import tqdm



In [None]:
##functions for selecting which data to download

def get_blob_names(attime=datetime.utcnow(), bucket_name='gcp-public-data-goes-16'):
    """
    This function returns a list of blob names from the current hour and the previous hour.
    The blob names can be passed to a function that selects the blobs that form complete images.
    """
    if isinstance(attime, str):
        attime = pd.to_datetime(attime)

    # Set up Google Cloud Storage client
    client = storage.Client()
    bucket = client.get_bucket(bucket_name)

    # Create a range of directories to check. The GOES bucket is
    # organized by hour of day.
    blob_names = []
    for i in range(2):  # Get blobs from current hour and previous hour
        current_time = attime - timedelta(hours=i)
        prefix = f'ABI-L2-CMIPC/{current_time.year}/{current_time.timetuple().tm_yday:03d}/{current_time.hour:02d}/'
        blobs = bucket.list_blobs(prefix=prefix)
        blob_names.extend([blob.name for blob in blobs])

    return blob_names

def extract_band_number(blob_name):
    """
    Extracts the band number from a blob name.
    """
    try:
        return int(blob_name.split('_')[1][-2:])
    except ValueError:
        return None

def select_blobs(blob_names):
    """
    Selects the blobs that form complete images.
    """
    # Sort blob names by timestamp
    blob_names.sort(key=lambda name: name.split('_')[3][1:], reverse=True)

    # Initialize a dictionary to store the selected blobs for each band
    selected_blobs = {}

    # Iterate over the blob names
    for name in blob_names:
        # Extract the band number from the blob name
        band = extract_band_number(name)

        # If the band is not in the dictionary yet, add it with a new list
        if band not in selected_blobs:
            selected_blobs[band] = []

        # If there are less than 12 blobs for this band, add the blob name to the list
        if len(selected_blobs[band]) < 12:
            selected_blobs[band].append(name)

    # Check if there are 12 blobs for each band
    for band, blobs in selected_blobs.items():
        if len(blobs) < 12:
            raise Exception(f"Only {len(blobs)} blobs found for band {band}")

    return selected_blobs

def create_median_image(blob_list, fs, bucket_name='gcp-public-data-goes-16'):
    """
    This function creates a single-band image from a list of blob names.
    The pixel values in the image are the median values from the corresponding pixels in the input images.
    """
    # Initialize a list to store the numpy arrays
    np_arrays = []

    # Open the first blob as a full dataset and load it into memory
    with fs.open(f'{bucket_name}/{blob_list[0]}') as f:
        ds = xr.open_dataset(f).load()
        np_arrays.append(ds['CMI'].values)

    # For the remaining blobs, only load the 'CMI' data array as a numpy array
    for blob in blob_list[1:]:
        with fs.open(f'{bucket_name}/{blob}') as f:
            np_arrays.append(xr.open_dataset(f)['CMI'].values)

    # Compute the median along the new axis to create the median numpy array
    median_np_array = np.nanmedian(np_arrays, axis=0)

    # Replace the 'CMI' data array in the first dataset with the median numpy array
    ds['CMI'] = (('y', 'x'), median_np_array)

    # Return the modified dataset
    return ds

def create_multiband_image(band_composites):
    """
    This function creates a multiband image from a dictionary of band composites.
    It adds each composite as a separate variable in the same Dataset.
    """

    # Initialize a dictionary to store the composites with their new variable names
    renamed_composites = {}

    # Rename the 'CMI' variable in each composite and add it to the dictionary
    for band, composite in band_composites.items():
        # Drop the 't' coordinate
        composite = composite.drop('t')
        renamed_composites[f'CMI{band:02d}'] = composite['CMI']

    # Merge the composites into a single Dataset
    multiband_image = xr.Dataset(renamed_composites)

    # Return the multiband image
    return multiband_image

In [None]:
#Set env variable for google cloud credentials, used behind the scenes by a couple functions
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/adamhunter/Documents/school projs/firenet/data/credentials/firenet-99-writer.json'
# Use fsspec to create a file system
fs = fsspec.filesystem('gcs', token=os.environ['GOOGLE_APPLICATION_CREDENTIALS'])

In [None]:
# Test the get_blob_names function
blob_names = get_blob_names()
print(blob_names)

# Test the select_blobs function
selected_blobs = select_blobs(blob_names)
print(selected_blobs)

In [None]:
median_images = {}
for band, blobs in selected_blobs.items():
    print(band)
    median_images[band] = create_median_image(blobs, fs)


In [None]:
# Create the multiband image
multiband_image = create_multiband_image(median_images)
