# Setup

In [None]:
import ee
ee.Authenticate(force=True)
ee.Initialize(project='ee-mrk2152')
import pandas as pd
import geopandas as gpd
import altair as alt
import numpy as np
from shapely import wkt
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError

In [None]:
# Read the CSV file into a DataFrame
df = pd.read_csv('centroids.csv')

# Convert the 'geometry' column to actual geometric points
df['geometry'] = df['geometry'].apply(wkt.loads)
gdf = gpd.GeoDataFrame(df, geometry='geometry')

# Extract latitude and longitude from the geometry column
gdf['latitude'] = gdf.geometry.y
gdf['longitude'] = gdf.geometry.x

# Code

In [None]:
#PROJECTION DATA

# Create reduce region function
def create_reduce_region_function(geometry, reducer=ee.Reducer.mean(), scale=1000, crs='EPSG:4326', bestEffort=True, maxPixels=1e13, tileScale=4):
    def reduce_region_function(img):
        stat = img.reduceRegion(
            reducer=reducer,
            geometry=geometry,
            scale=scale,
            crs=crs,
            bestEffort=bestEffort,
            maxPixels=maxPixels,
            tileScale=tileScale
        )
        return ee.Feature(geometry, stat).set({'millis': img.date().millis()})
    return reduce_region_function

# Convert feature collection to dictionary
def fc_to_dict(fc):
    prop_names = fc.first().propertyNames()
    prop_lists = fc.reduceColumns(
        reducer=ee.Reducer.toList().repeat(prop_names.size()),
        selectors=prop_names
    ).get('list')
    return ee.Dictionary.fromLists(prop_names, prop_lists)

# Add date information to DataFrame
def add_date_info(df):
    df['Timestamp'] = pd.to_datetime(df['millis'], unit='ms')
    df['Year'] = pd.DatetimeIndex(df['Timestamp']).year
    df['Month'] = pd.DatetimeIndex(df['Timestamp']).month
    df['Day'] = pd.DatetimeIndex(df['Timestamp']).day
    df['DOY'] = pd.DatetimeIndex(df['Timestamp']).dayofyear
    return df

# Filter summer months (june/july/august)
def filter_summer_months(df):
    return df[df['Month'].isin([6, 7, 8])]

# Define the ImageCollection with Earth Engine. Details on RCPs in markdown above. For this sample, I set it to intermediate scenario rcp45 which most
# sources have called the most likely of the set... It declares peak emissions happen at 20240...should double check and get official source on which scenario sleected

dcp_col = (ee.ImageCollection('NASA/NEX-DCP30_ENSEMBLE_STATS')
           .select(['tasmax_median', 'tasmin_median'])
           .filter(
               ee.Filter.And(ee.Filter.eq('scenario', 'rcp45'),
                             ee.Filter.date('2024-07-01', '2074-12-31'))))

# Calculate mean temperature
def calc_mean_temp(img):
    return (img.select('tasmax_median')
            .add(img.select('tasmin_median'))
            .divide(ee.Image.constant(2.0))
            .rename(['Temp-mean'])
            .copyProperties(img, img.propertyNames()))

dcp_col = dcp_col.map(calc_mean_temp)

# Process a single facility
def process_facility(row):
    latitude = row['latitude']
    longitude = row['longitude']
    point = ee.Geometry.Point([longitude, latitude])
    facility_id = row['FACILITYID']
    name = row['NAME']

    # Reduce region function for this point
    reduce_dcp30 = create_reduce_region_function(
        geometry=point, reducer=ee.Reducer.first(), scale=1000, crs='EPSG:4326')

    # Query ee data
    dcp_stat_fc = ee.FeatureCollection(dcp_col.map(reduce_dcp30)).filter(
        ee.Filter.notNull(dcp_col.first().bandNames()))

    dcp_dict = fc_to_dict(dcp_stat_fc).getInfo()
    dcp_df = pd.DataFrame(dcp_dict)

    # Process ee data
    dcp_df = add_date_info(dcp_df)
    dcp_df['Temp-mean'] = dcp_df['Temp-mean'] - 273.15
    dcp_df['Model'] = 'NEX-DCP30'
    dcp_df['Temp-mean'] = (dcp_df['Temp-mean'] * (9/5)) + 32
    dcp_df = filter_summer_months(dcp_df)
    dcp_df = dcp_df.drop(['DOY', 'Day', 'Month', 'millis'], axis=1)
    summer_mean_df = dcp_df.groupby(['Year']).mean(['Temp-mean']).reset_index()

    # Add columns for id
    summer_mean_df['facility_id'] = facility_id
    summer_mean_df['name'] = name
    summer_mean_df['latitude'] = latitude
    summer_mean_df['longitude'] = longitude

    return summer_mean_df

# Read in centroid dataset
df = pd.read_csv('centroids.csv')
df['geometry'] = df['geometry'].apply(wkt.loads)
gdf = gpd.GeoDataFrame(df, geometry='geometry')

# Extract lat lon
gdf['latitude'] = gdf.geometry.y
gdf['longitude'] = gdf.geometry.x

# Lit for results
all_results = []

# Retrieve total number of facilities for print statement
total_facilities = len(gdf)

# Process facilities (with timeout, retries, and logging)
def process_with_timeout(idx, row, retries=3, timeout=30):
    for attempt in range(retries):
        with ThreadPoolExecutor(max_workers=5) as executor:
            future = executor.submit(process_facility, row)
            try:
                summer_mean_df = future.result(timeout=timeout)
                all_results.append(summer_mean_df)
                print(f'Successfully retrieved data for Facility ID {row["FACILITYID"]} (Facility {idx + 1} out of {total_facilities}).')
                break
            except TimeoutError:
                print(f'Timeout retrieving data for Facility ID {row["FACILITYID"]} (Facility {idx + 1} out of {total_facilities}), retrying... ({attempt + 1}/{retries})')
                if attempt < retries - 1:
                    time.sleep(5)  
            except Exception as e:
                print(f'Error retrieving data for Facility ID {row["FACILITYID"]} (Facility {idx + 1} out of {total_facilities}): {e}')
                break

# run process function
for idx, row in gdf.iterrows():
    process_with_timeout(idx, row)

# Concatenate results 
final_df = pd.concat(all_results, ignore_index=True)
final_df.to_csv('nexdcp30.csv')

In [None]:
final_df.head()