In [43]:
import os
import fsspec
import xarray as xr
import numpy as np
import geojson
from google.cloud import storage
from datetime import datetime, timedelta
import pandas as pd
from google.cloud import bigquery
import pandas as pd

In [34]:
# Open the NetCDF file
ds = xr.open_dataset('/Users/adamhunter/Downloads/goes16_abi_conus_lat_lon.nc')

# Print the contents of the dataset
print(ds)

<xarray.Dataset>
Dimensions:    (rows: 1500, columns: 2500)
Dimensions without coordinates: rows, columns
Data variables:
    latitude   (rows, columns) float32 ...
    longitude  (rows, columns) float32 ...
Attributes:
    title:    ABI fixed grid latitude and longitude for GOES-16 ABI CONUS sector
    comment:  Converted from GOES Imager Projection
    created:  File created 17 Apr 2023 by Amy Huff (amy.huff@noaa.gov)


In [35]:
# Extract the latitude and longitude mappings and convert them to float64
lat_mapping = ds['latitude'].values.astype(float)
lon_mapping = ds['longitude'].values.astype(float)

In [51]:
def get_lat_lon_mapping(bucket_name, blob_name):
    fs = fsspec.filesystem('gcs')
    with fs.open(f'{bucket_name}/{blob_name}') as f:
        ds = xr.open_dataset(f)
        lat_mapping = ds['latitude'].values.astype(float)
        lon_mapping = ds['longitude'].values.astype(float)
    return lat_mapping, lon_mapping

def calculate_degrees(file_id):
    
    # Read in GOES ABI fixed grid projection variables and constants
    x_coordinate_1d = file_id.variables['x'][:]  # E/W scanning angle in radians
    y_coordinate_1d = file_id.variables['y'][:]  # N/S elevation angle in radians
    projection_info = file_id.variables['goes_imager_projection']
    lon_origin = projection_info.attrs['longitude_of_projection_origin']
    H = projection_info.attrs['perspective_point_height'] + projection_info.attrs['semi_major_axis']
    r_eq = projection_info.attrs['semi_major_axis']
    r_pol = projection_info.attrs['semi_minor_axis']    
    
    # Create 2D coordinate matrices from 1D coordinate vectors
    x_coordinate_2d, y_coordinate_2d = np.meshgrid(x_coordinate_1d, y_coordinate_1d)
    
    # Equations to calculate latitude and longitude
    lambda_0 = (lon_origin*np.pi)/180.0  
    a_var = np.power(np.sin(x_coordinate_2d),2.0) + (np.power(np.cos(x_coordinate_2d),2.0)*(np.power(np.cos(y_coordinate_2d),2.0)+(((r_eq*r_eq)/(r_pol*r_pol))*np.power(np.sin(y_coordinate_2d),2.0))))
    b_var = -2.0*H*np.cos(x_coordinate_2d)*np.cos(y_coordinate_2d)
    c_var = (H**2.0)-(r_eq**2.0)
    r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var)
    s_x = r_s*np.cos(x_coordinate_2d)*np.cos(y_coordinate_2d)
    s_y = - r_s*np.sin(x_coordinate_2d)
    s_z = r_s*np.cos(x_coordinate_2d)*np.sin(y_coordinate_2d)
    
    # Ignore numpy errors for sqrt of negative number; occurs for GOES-16 ABI CONUS sector data
    np.seterr(all='ignore')
    
    abi_lat = (180.0/np.pi)*(np.arctan(((r_eq*r_eq)/(r_pol*r_pol))*((s_z/np.sqrt(((H-s_x)*(H-s_x))+(s_y*s_y))))))
    abi_lon = (lambda_0 - np.arctan(s_y/(H-s_x)))*(180.0/np.pi)
    print(abi_lat)
    return abi_lat, abi_lon

def get_blob_names(attime=datetime.utcnow(), within=pd.to_timedelta("1H"), bucket_name='gcp-public-data-goes-16'):
    if isinstance(attime, str):
        attime = pd.to_datetime(attime)
    if isinstance(within, str):
        within = pd.to_timedelta(within)

    # Parameter Setup
    start = attime - within
    end = attime + within

    print(f"Start: {start}, End: {end}")

    # Set up Google Cloud Storage client
    client = storage.Client()
    bucket = client.get_bucket(bucket_name)

    # Create a range of directories to check. The GOES bucket is
    # organized by hour of day.
    blob_names = []
    current_time = start
    while current_time <= end:
        prefix = f'ABI-L2-FDCC/{current_time.year}/{current_time.timetuple().tm_yday:03d}/{current_time.hour:02d}/'
        blobs = bucket.list_blobs(prefix=prefix)
        blob_names.extend([blob.name for blob in blobs])
        current_time += timedelta(hours=1)  # Increment current_time by 1 hour

    print(blob_names)
    return blob_names

def select_blobs(blob_names):
    # Sort blob names by timestamp
    blob_names.sort(key=lambda name: name.split('_')[3][1:], reverse=True)  # Extract timestamp after 's'

    # Extract band numbers from blob names
    try:
        band_numbers = [int(name.split('_')[1][-2:]) for name in blob_names]
    except ValueError:
        # If there is only one band and the band number is a string, return the most recent file
        return [blob_names[0]]

    # Get unique band numbers and sort them
    unique_band_numbers = sorted(set(band_numbers))

    # If there is only one unique band number, return the most recent file
    if len(unique_band_numbers) == 1:
        return [blob_names[0]]

    # Find the first continuous sequence that matches the expected band order
    for i in range(len(blob_names) - len(unique_band_numbers) + 1):
        selected = blob_names[i:i+len(unique_band_numbers)]
        band_order = [int(name.split('_')[1][-2:]) for name in selected]
        if band_order == unique_band_numbers:
            break
    else:
        raise Exception("No continuous sequence found that matches the expected band order")
    print(selected)
    return selected

def get_datasets(blob_names, bucket_name='gcp-public-data-goes-16'):
    # Open each blob as an xarray Dataset and store it in the dictionary under the corresponding channel identifier
    datasets = {}
    for name in blob_names:
        channel_id = name.split('_')[1]
        f = fs.open(f'{bucket_name}/{name}')
        ds = xr.open_dataset(f, engine='h5netcdf')
        datasets[channel_id] = ds
    print(datasets)
    return datasets

def generate_geojson_points(ds, lat_mapping, lon_mapping):
    # Process the data to generate GeoJSON
    band_data = ds['DQF'].values
    zero_indices = np.where(band_data == 0)
    zero_lat_lon = lat_mapping[zero_indices], lon_mapping[zero_indices]
    features = []
    for lat, lon in zip(*zero_lat_lon):
        point = geojson.Point((lon, lat))
        features.append(geojson.Feature(geometry=point))
    feature_collection = geojson.FeatureCollection(features)

    # Convert the GeoJSON to a string
    geojson_str = geojson.dumps(feature_collection)

    # Extract the timestamp from the dataset
    timestamp = ds.t.values

    return timestamp, geojson_str

def upload_to_bigquery(prediction_time, goesmask_geojson):
    # Initialize a BigQuery client
    client = bigquery.Client()

    # Specify your dataset and table
    dataset_id = 'geojson_predictions'
    table_id = 'goesmask'

    # Get the table
    table = client.dataset(dataset_id).table(table_id)
    table = client.get_table(table)

    # Convert numpy.datetime64 to datetime and then to string for bigquery
    prediction_time = pd.to_datetime(str(prediction_time)).strftime('%Y-%m-%dT%H:%M:%SZ')

    # Prepare the row to be inserted
    row = {
        'prediction_date': prediction_time,
        'goesmask_geojson': goesmask_geojson,
    }

    # Insert the row
    errors = client.insert_rows_json(table, [row])

    # Check if any errors occurred
    if errors:
        print('Errors:', errors)
    else:
        print('Row inserted successfully.')


def GOES_GEOJSON_UPDATE(request):
    # Use fsspec to create a file system
    fs = fsspec.filesystem('gcs')
    blob_names = get_blob_names()
    selected_blobs = select_blobs(blob_names)
    datasets = get_datasets(selected_blobs, fs)
    if datasets:
        first_ds_key = next(iter(datasets))
    else:
        # Handle the empty case, perhaps log an error or return
        print("No datasets available.")
        return

    first_ds = datasets[first_ds_key]

    # Generate GeoJSON points from the first dataset
    timestamp, geojson_str = generate_geojson_points(first_ds)
    # Upload the generated GeoJSON to BigQuery
    upload_to_bigquery(timestamp, geojson_str)

    return 'Function executed successfully', 200

In [45]:

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/adamhunter/Documents/school projs/firenet/data/credentials/firenet-99-writer.json'
# Use fsspec to create a file system
fs = fsspec.filesystem('gcs', token=os.environ['GOOGLE_APPLICATION_CREDENTIALS'])


In [46]:
attime = "2023-12-10 12:00:00"
blob_names = get_blob_names()

blob_names

Start: 2023-12-11 04:12:20.250872, End: 2023-12-11 06:12:20.250872
['ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450401176_e20233450403548_c20233450404127.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450406176_e20233450408548_c20233450409121.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450411176_e20233450413548_c20233450414129.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450416176_e20233450418548_c20233450419114.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450421176_e20233450423548_c20233450424135.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450426176_e20233450428548_c20233450429118.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450431176_e20233450433548_c20233450434109.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450436176_e20233450438548_c20233450439111.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450441176_e20233450443548_c20233450444113.nc', 'ABI-L2-FDCC/2023/345/04/OR_ABI-

['ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450401176_e20233450403548_c20233450404127.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450406176_e20233450408548_c20233450409121.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450411176_e20233450413548_c20233450414129.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450416176_e20233450418548_c20233450419114.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450421176_e20233450423548_c20233450424135.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450426176_e20233450428548_c20233450429118.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450431176_e20233450433548_c20233450434109.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450436176_e20233450438548_c20233450439111.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450441176_e20233450443548_c20233450444113.nc',
 'ABI-L2-FDCC/2023/345/04/OR_ABI-L2-FDCC-M6_G16_s20233450446176_e20233450448548_c2023345044

In [47]:
selected_blobs = select_blobs(blob_names)
selected_blobs

['ABI-L2-FDCC/2023/345/05/OR_ABI-L2-FDCC-M6_G16_s20233450506176_e20233450508548_c20233450509133.nc']

In [48]:
datasets = get_datasets(selected_blobs)
datasets

{'ABI-L2-FDCC-M6': <xarray.Dataset>
Dimensions:                                           (y: 1500, x: 2500,
                                                       number_of_time_bounds: 2,
                                                       number_of_image_bounds: 2,
                                                       number_of_sunglint_angle_bounds: 2,
                                                       number_of_LZA_bounds: 2,
                                                       number_of_SZA_bounds: 2)
Coordinates:
    t                                                 datetime64[ns] ...
  * y                                                 (y) float64 0.1282 ... ...
  * x                                                 (x) float64 -0.1013 ......
    y_image                                           float32 ...
    x_image                                           float32 ...
    sunglint_angle                                    float32 ...
    local_zenith_angle         

{'ABI-L2-FDCC-M6': <xarray.Dataset>
 Dimensions:                                           (y: 1500, x: 2500,
                                                        number_of_time_bounds: 2,
                                                        number_of_image_bounds: 2,
                                                        number_of_sunglint_angle_bounds: 2,
                                                        number_of_LZA_bounds: 2,
                                                        number_of_SZA_bounds: 2)
 Coordinates:
     t                                                 datetime64[ns] ...
   * y                                                 (y) float64 0.1282 ... ...
   * x                                                 (x) float64 -0.1013 ......
     y_image                                           float32 ...
     x_image                                           float32 ...
     sunglint_angle                                    float32 ...
     local_zenith_

In [52]:
# Extract the first dataset from the datasets dictionary
first_ds_key = next(iter(datasets))
first_ds = datasets[first_ds_key]
lat_mapping, lon_mapping = get_lat_lon_mapping('firenet_reference', 'goes16_abi_conus_lat_lon.nc')

# Generate GeoJSON points from the first dataset
timestamp, geojson_str = generate_geojson_points(first_ds, lat_mapping, lon_mapping)


In [42]:
# Upload the generated GeoJSON to BigQuery
upload_to_bigquery(timestamp, geojson_str)


Row inserted successfully.
