In [1]:
import ee
# ee.Authenticate()
ee.Initialize()

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import JSON
import altair as alt

In [503]:
# Define a function to transfer feature properties to a dictionary.
def fc_to_dict(fc):
    prop_names = fc.first().propertyNames()
    prop_lists = fc.reduceColumns(reducer=ee.Reducer.toList().repeat(prop_names.size()),
                                  selectors=prop_names).get('list') 
    return ee.Dictionary.fromLists(prop_names, prop_lists)

In [515]:
# a function for reducing gdd over a geometry 
def create_reduce_func_gdd(geometry, reducer = ee.Reducer.mean(), baseT = 5,temperature_band = 'LST_Day_1km'):
    def gdd_on_img(img):
        gdd = img.select(temperature_band).multiply(.02).subtract(273 + baseT)
        
        # put gdd 0 if it is negative
        z = ee.Image(0)
        gdd = gdd.max(z)
        
        # reducing over the goemetry
        mean_gdd = gdd.reduceRegion(reducer = reducer, geometry = geometry)
        return ee.Feature(geometry, mean_gdd).set({'millis': img.date().millis()})
    return gdd_on_img

In [516]:
# a function for reducing over many regions
def create_mul_reduce_func(geoms_col, reducer = ee.Reducer.mean(), baseT = 5,temperature_band = 'LST_Day_1km'):
    def gdd_on_img(img):
        gdd = img.select(temperature_band).multiply(.02).subtract(273 + baseT)
        
        # put gdd 0 if it is negative
        z = ee.Image(0)
        gdd = gdd.max(z) 
        
        #reduce over regions and filter out the non null ones 
        counties_mean_gdd = gdd.reduceRegions(collection = geoms_col, reducer = reducer)
        counties_mean_gdd = counties_mean_gdd.filter(ee.Filter.notNull(ee.List(['mean'])))
        t = img.date().millis()

        cmeans = counties_mean_gdd.aggregate_array('mean')
        cnames = counties_mean_gdd.aggregate_array('NAME')
        d = ee.Dictionary.fromLists(cnames,cmeans)
        d = d.combine(ee.Dictionary({'millis':t}))
        counties_mean_gdd = counties_mean_gdd.set({'gdd_dict': d})
        return counties_mean_gdd
        
        # prop_names=ee.List(['NAME','mean'])
        # prop_lists = counties_mean_gdd.reduceColumns(reducer=ee.Reducer.toList().repeat(prop_names.size()),selectors=prop_names).get('list') 
        # l = ee.List(prop_lists)
        # d = ee.Dictionary.fromLists(l.get(0), l.get(1))
        # d = d.combine(ee.Dictionary({'millis':t}))
        # return ee.Feature(None,d)
    return gdd_on_img

In [517]:
def time_series_gdd(geometry = None, 
        geoms_col = None, 
        reducer = ee.Reducer.mean(), 
        baseT = 5,
        temperature_band = 'LST_Day_1km',
        date_start ='2020-01-01', 
        duration_month = 2,
        img_col = "MODIS/061/MOD11A1"):
     
    # get the data and filter band and date
    c = ee.ImageCollection(img_col).filterDate(ee.Date(date_start), ee.Date(date_start).advance(duration_month,'month')).select(temperature_band)
    
    # if a geometry collection is provided we want to use reduceRegions and return time series over regions
    if geoms_col != None:
        # build the reducer function
        f = create_mul_reduce_func(geoms_col = geoms_col, reducer = reducer, baseT = baseT,temperature_band = temperature_band) # build the reducer function 
        
        c = c.map(f) # map it on images in imageCollection
        
        # make it a dict and pull it from the server 
        dict_gdd_regions = fc_to_dict(c).getInfo() 
        return dict_gdd_regions['gdd_dict']
    
    # now if geoms_col in None this peice of code will be run, first build the reducer function 
    f = create_reduce_func_gdd(geometry = geometry, reducer = reducer, baseT = baseT, temperature_band = temperature_band)
    
    # apply f and filter out empty features with specific band
    c = ee.FeatureCollection(c.map(f)).filter(ee.Filter.notNull(c.first().bandNames()))
    dict_gdd = fc_to_dict(c).getInfo()
    return dict_gdd
    
        




In [518]:
def dic_to_df(d, multi_areas = False, cumulative = False):
    # Function to add date variables to DataFrame.
    def add_date_info(df):
        df['Timestamp'] = pd.to_datetime(df['millis'], unit='ms')
        df['Year'] = pd.DatetimeIndex(df['Timestamp']).year
        df['Month'] = pd.DatetimeIndex(df['Timestamp']).month
        df['Day'] = pd.DatetimeIndex(df['Timestamp']).day
        df['DOY'] = pd.DatetimeIndex(df['Timestamp']).dayofyear
        return df
    if multi_areas:
        if cumulative:
            df = pd.DataFrame(d)
            df_cum_gdd = df.loc[: , df.columns != 'millis'].cumsum()
            df_cum_gdd = df_cum_gdd.add_prefix('cum_')
            df = pd.concat([df,df_cum_gdd],axis=1)
            df = add_date_info(df)
            df = df.drop(columns='millis')
            return df
        else:
            df = pd.DataFrame(d)
            df = add_date_info(df)
            df = df.drop(columns='millis')
            return df
    else:
        if cumulative:
            df = pd.DataFrame(d)
            df = df.rename(columns={"LST_Day_1km": "gdd"})
            df['cum_gdd'] = df['gdd'].cumsum()
            df = add_date_info(df)
            df = df.drop(columns=['millis','system:index'])
            return df
        else:
            df = pd.DataFrame(d)
            df = df.rename(columns={"LST_Day_1km": "gdd"})
            df = add_date_info(df)
            df = df.drop(columns=['millis','system:index'])
            return df

In [519]:
SD_counties = ee.FeatureCollection('TIGER/2018/Counties').filter(ee.Filter.eq('STATEFP', '46'))
SD_state = SD_counties.union().geometry()

In [521]:
dic = time_series_gdd(geometry =SD_state,duration_month=12)
df = dic_to_df(dic,multi_areas=False, cumulative=True)
df

Unnamed: 0,gdd,cum_gdd,Timestamp,Year,Month,Day,DOY
0,0.003651,0.003651,2020-01-01,2020,1,1,1
1,0.001695,0.005346,2020-01-02,2020,1,2,2
2,0.000000,0.005346,2020-01-03,2020,1,3,3
3,0.011536,0.016882,2020-01-04,2020,1,4,4
4,0.001628,0.018510,2020-01-05,2020,1,5,5
...,...,...,...,...,...,...,...
357,0.025684,4933.549643,2020-12-27,2020,12,27,362
358,0.000000,4933.549643,2020-12-28,2020,12,28,363
359,0.000000,4933.549643,2020-12-29,2020,12,29,364
360,0.000000,4933.549643,2020-12-30,2020,12,30,365


In [514]:
dic = time_series_gdd(geoms_col= SD_counties,duration_month=1)
df = dic_to_df(dic,multi_areas=True, cumulative=True)
df

Unnamed: 0,Aurora,Beadle,Bennett,Bon Homme,Brookings,Brown,Brule,Buffalo,Butte,Campbell,...,cum_Union,cum_Walworth,cum_Yankton,cum_Ziebach,cum_Clay,cum_Lawrence,Timestamp,Year,Month,DOY
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,,,2020-01-01,2020,1,1
1,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,,0.0,0.0,,0.0,0.0,2020-01-02,2020,1,2
2,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,,0.0,0.0,0.0,,,2020-01-03,2020,1,3
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.45855,0.0,...,0.0,0.0,0.0,0.0,0.0,0.284896,2020-01-04,2020,1,4
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.1e-05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.284896,2020-01-05,2020,1,5
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.284896,2020-01-06,2020,1,6
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.284896,2020-01-07,2020,1,7
7,,,0.0,0.0,,,0.0,,0.0,,...,0.0,,0.0,,0.0,0.284896,2020-01-08,2020,1,8
8,0.0,0.0,0.010232,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,,0.0,0.0,0.0,0.284896,2020-01-09,2020,1,9
9,,,0.0,,0.0,0.0,,,0.0,0.0,...,,0.0,,0.0,,0.284896,2020-01-10,2020,1,10


# Visualization with Altair

In [552]:
alt.Chart(df).mark_rect().encode(
    x='Day:O',
    y='Month:O',
    color=alt.Color(
        'gdd:Q', scale=alt.Scale(scheme='redblue', domain=(-10,30))),
    tooltip=[
        alt.Tooltip('Day:O', title='Day'),
        alt.Tooltip('Month:O', title='Month'),
        alt.Tooltip('gdd:Q', title='gdd')
    ]).properties(width=1100, height=400)

In [553]:
alt.Chart(df).mark_bar(size=1).encode(
    x='Timestamp:T',
    y='cum_gdd:Q',
    color=alt.Color(
        'gdd:Q', scale=alt.Scale(scheme='redblue', domain=(-10, 30))),
    tooltip=[
        alt.Tooltip('Timestamp:T', title='Date'),
        alt.Tooltip('gdd:Q', title='gdd')
    ]).properties(width=1100, height=400)