In [1]:
import ee
import xarray as xr
import pandas as pd
import numpy as np
import dask.array as da
import xee
from bbox import * 
from datetime import datetime

# Trigger the authentication flow
ee.Authenticate()

# Initialize the Earth Engine library
ee.Initialize(project='satellite-modeling')

# Define the rectangle that encompasses Louisiana and Texas
bbox = ee.Geometry.Rectangle([lon_min, lat_min, lon_max, lat_max])

start_date = '2023-07-31'
end_date = datetime.now().strftime('%Y-%m-%d')

*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_0JLhFqfSY1uiEaW?source=Init


In [2]:
no2_hours = pd.read_csv("../../data/tempo_data/no2_file_hours.csv")
no2_hours=list(no2_hours['time_hr_ct'])
time_list = np.array(no2_hours, dtype="datetime64[ns]")

In [6]:
##################################################
# Load GEOS-CF Data
##################################################

# List of climate variables to download from NLDAS
variables =['Q2M', 'T2M', 'U10M', 'V10M']

# Load NLDAS hourly data and filter by region and date
dataset = ee.ImageCollection("NASA/GEOS-CF/v1/rpl/tavg1hr") \
            .filterDate(start_date, end_date) \
            .filterBounds(bbox) \
            .select(variables)

ds = xr.open_dataset(dataset, engine='ee', crs='EPSG:4326', scale=0.01)
# Filter the dataset based on the latitude and longitude bounds
ds = ds.sel(lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max))

In [7]:
##################################################
# Change Timestamps
##################################################
# Recreate UTC Time Stamps
ds_time_list = ds["time"].values.tolist()
timestamps_utc = pd.to_datetime(ds_time_list)
timestamps_utc = timestamps_utc.tz_localize('UTC')

# Convert to Central Time (handles daylight saving time automatically)
timestamps_central = timestamps_utc.tz_convert('America/Chicago')
timestamps_central = timestamps_central.tz_localize(None)
timestamps_central=np.array(timestamps_central, dtype="datetime64[ns]")
# Replace the time dimension
ds = ds.assign_coords(time=timestamps_central)

# Remove duplicate hour from time change
ds = ds.isel(time=ds.get_index("time").duplicated() == False)

In [8]:
# Find dates included in both sat and weather data
seta = set(timestamps_central)
setb = set(time_list)
union_set = seta & setb
full_time_list = list(union_set)
filtered_ds = ds.sel(time=full_time_list)

In [11]:
ds_dask = filtered_ds.chunk({'time': 100}) 
ds_dask['temp_celsius'] = ds_dask['T2M'] - 273.15

In [None]:
from dask.diagnostics import ProgressBar

# Convert to dask dataframe for chunking, stops to_netcdf from killing kernel
ds_dask = filtered_ds.chunk({'time': 20}) 

# Use ProgressBar to show the progress
with ProgressBar():
    # Save the Dask-backed xarray to NetCDF in Google Cloud Storage
    ds_dask.to_netcdf('../../data/other_hourly/geos_cf.nc', engine='h5netcdf')
## Other option
# ds_dask.to_zarr('../data/weather_data/weather_data.zarr', mode='w')

In [73]:
# import matplotlib.pyplot as plt
# # Define your latitude and longitude bounds
# lat_min, lat_max = 28.6, 33.4  # Example latitude range
# lon_min, lon_max = -98.9, -88.3  # Example longitude range

# # Select one hour of temperature data (e.g., the first timestamp)
# hour_index = 1  # Change this to select a different hour if desired
# temperature_data = ds['T'].isel(time=hour_index)

# # Plot the data with switched axes
# plt.figure(figsize=(10, 6))

# # Plot with latitude on x-axis and longitude on y-axis
# temperature_data.T.plot(
#     cmap="coolwarm",  # Colormap for temperature visualization
#     cbar_kwargs={'label': 'Temperature (K)'}  # Add color bar label
# )

# # Update axis labels
# plt.xlabel("Latitude")
# plt.ylabel("Longitude")

# plt.show()