In [1]:
import ee
# ee.Authenticate()
ee.Initialize()

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import JSON
import altair as alt

In [3]:
# Define a function to transfer feature properties to a dictionary.
def fc_to_dict(fc):
    prop_names = fc.first().propertyNames()
    prop_lists = fc.reduceColumns(reducer=ee.Reducer.toList().repeat(prop_names.size()),
                                  selectors=prop_names).get('list') 
    return ee.Dictionary.fromLists(prop_names, prop_lists)

In [5]:
def get_relative_day(img):
    doy = img.date().getRelative('day', 'year')
    return img.set('doy', doy)

In [20]:
def get_mean(img):
    doyCol = ee.ImageCollection.fromImages(img.get('doy_matches'))
    doyCol_reduced = doyCol.reduce(ee.Reducer.median())
    return doyCol_reduced.set({'doy' : img.get('doy')})

In [221]:
date_start = '2000-01-01'
duration_years = 13
col = ee.ImageCollection("MODIS/061/MOD11A1").select('LST_Day_1km').filterDate(ee.Date(date_start), ee.Date(date_start).advance(duration_years,'year'))

In [228]:
def mean_transformer_fixed_doy(col):
    
    # first get the day of the year info
    col = col.map(get_relative_day)
    
    # lets pick a year as a sample 
    distinct_doy = col.filterDate('2004-01-01', '2005-01-01') 
    
    # we should assert that this is 365 number of distinct days of the year
    number_of_days_in_this_year = distinct_doy.aggregate_count('doy').getInfo()
    assert  number_of_days_in_this_year >= 365
    
    # constructing a filter 
    filter_doy = ee.Filter.equals(leftField = 'doy', rightField = 'doy')
    
    # join all collection with the above filter that the doy should be the same
    join = ee.Join.saveAll('doy_matches')
    joinCol = ee.ImageCollection(join.apply(distinctDOY, col, filter_doy))
    
    # this give us the mean value over the years for the specific day of the year
    mean_col = joinCol.map(get_mean)
    
    return mean_col


In [229]:
mean_coll = mean_transformer_fixed_doy(col)

In [230]:
# a function for reducing gdd over a geometry 
def create_reduce_func_gdd(geometry, reducer = ee.Reducer.mean(), baseT = 5, temperature_band = 'LST_Day_1km_median'):
    def gdd_on_img(img):
        gdd = img.select(temperature_band).multiply(.02).subtract(273 + baseT)
        
        # put gdd 0 if it is negative
        z = ee.Image(0)
        gdd = gdd.max(z)
        
        # reducing over the goemetry
        mean_gdd = gdd.reduceRegion(reducer = reducer, geometry = geometry, scale = 1000)
        return ee.Feature(None, mean_gdd).set({'doy' : img.get('doy')})
    return gdd_on_img

In [254]:
# a function for reducing over many regions
def create_mul_reduce_func(geoms_col, reducer = ee.Reducer.mean(), baseT = 5,temperature_band = 'LST_Day_1km_median'):
    def gdd_on_img(img):
        gdd = img.select(temperature_band).multiply(.02).subtract(273 + baseT)
        
        # put gdd 0 if it is negative
        z = ee.Image(0)
        gdd = gdd.max(z) 
        
        #reduce over regions and filter out the non null ones 
        counties_mean_gdd = gdd.reduceRegions(collection = geoms_col, reducer = reducer,scale = 1000)
        counties_mean_gdd = counties_mean_gdd.filter(ee.Filter.notNull(ee.List(['mean'])))
        doy = img.get('doy')

        cmeans = counties_mean_gdd.aggregate_array('mean')
        cnames = counties_mean_gdd.aggregate_array('NAME')
        d = ee.Dictionary.fromLists(cnames,cmeans)
        d = d.combine(ee.Dictionary({'doy':doy}))
        counties_mean_gdd = counties_mean_gdd.set({'gdd_dict': d})
        return counties_mean_gdd
    return gdd_on_img

In [249]:
SD_counties = ee.FeatureCollection('TIGER/2018/Counties').filter(ee.Filter.eq('STATEFP', '46'))
SD_state = SD_counties.union().geometry()
f = create_mul_reduce_func(SD_counties)
res = mean_coll.map(f)

In [250]:
result = fc_to_dict(res).getInfo()

In [296]:
county_names = list(result['gdd_dict'][0].keys())

In [297]:
county_names.remove('doy')

In [271]:
df_counties = pd.DataFrame(result['gdd_dict'])

In [272]:
df_cum_gdd_counties = df_counties.loc[: , df_counties.columns != 'doy'].cumsum()
df_cum_gdd_counties = df_cum_gdd_counties.add_prefix('cum_')
df = pd.concat([df_counties,df_cum_gdd_counties],axis=1)
df['Time'] = pd.to_datetime(df['doy'],unit='D')
df

Unnamed: 0,Aurora,Beadle,Bennett,Bon Homme,Brookings,Brown,Brule,Buffalo,Butte,Campbell,...,cum_Stanley,cum_Sully,cum_Todd,cum_Tripp,cum_Turner,cum_Union,cum_Walworth,cum_Yankton,cum_Ziebach,Time
0,0.000000,0.0,0.000000,0.001164,0.0,0.0,0.000871,0.000000,0.000022,0.000000,...,0.000000,0.000000,0.000000,0.004960,0.000000,0.000000,0.000000,0.000000,0.000000,1970-01-01
1,0.016208,0.0,0.031881,0.000000,0.0,0.0,0.004887,0.002125,0.000179,0.000000,...,0.001107,0.000000,0.062320,0.004960,0.000000,0.000000,0.000000,0.000000,0.018765,1970-01-02
2,0.000000,0.0,2.806725,0.000000,0.0,0.0,0.000000,0.000000,1.566746,0.000000,...,2.002574,0.023062,1.933044,0.818796,0.000000,0.000000,0.000082,0.000000,0.631479,1970-01-03
3,0.000000,0.0,0.117051,0.000000,0.0,0.0,0.006245,0.000000,0.563208,0.000000,...,2.004404,0.023062,1.941207,0.855126,0.000000,0.079670,0.000082,0.000000,0.904675,1970-01-04
4,0.079110,0.0,0.818624,4.105667,0.0,0.0,0.135675,0.000341,0.166951,0.000000,...,2.037129,0.023062,2.593098,1.306616,0.202134,0.084266,0.000082,1.373147,0.904675,1970-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361,0.000000,0.0,0.002356,0.017460,0.0,0.0,0.000000,0.000000,0.000944,0.000000,...,6021.662873,5304.224721,6144.063894,6000.019728,5080.851559,5109.640663,5047.300842,5169.759459,6041.925247,1970-12-28
362,0.000000,0.0,0.696429,1.160155,0.0,0.0,0.000000,0.000000,0.746286,0.000000,...,6021.665271,5304.224721,6144.301283,6000.109153,5081.149500,5109.654701,5047.300842,5173.102246,6041.953515,1970-12-29
363,0.000000,0.0,0.011346,0.000214,0.0,0.0,0.000000,0.000136,0.000000,0.000000,...,6021.678256,5304.224721,6144.306461,6000.140531,5081.171458,5109.656229,5047.300842,5173.140897,6041.953578,1970-12-30
364,0.001063,0.0,0.010925,0.000000,0.0,0.0,0.053564,0.337674,0.000000,0.000014,...,6021.683653,5304.225306,6144.878402,6000.420838,5081.171458,5109.656229,5047.302770,5173.140897,6041.953578,1970-12-31


In [318]:
def plot_gdd_counties(county_name):
    assert county_name in county_names
    return alt.Chart(df).mark_bar(size=3).encode(
    x=alt.X('Time:T'),
           # scale=alt.Scale(domain=[0,365], nice=False)),
    y=f'cum_{county_name}:Q',
    color=alt.Color(
        f'{county_name}:Q', scale=alt.Scale(scheme='redblue', domain=(-10, 30))),
    tooltip=[
        alt.Tooltip('doy:Q', title='doy'),
        alt.Tooltip('Brookings:Q', title='gdd')
    ]).properties(width=1100, height=400)


In [319]:
plot_gdd_counties('Brookings')

In [321]:
f = create_reduce_func_gdd(SD_state)
res = mean_coll.map(f)

In [322]:
result = fc_to_dict(res).getInfo()

In [323]:
df = pd.DataFrame(result)

In [324]:
df = df.rename(columns={"LST_Day_1km_median": "gdd"})
df['cum_gdd'] = df['gdd'].cumsum()
df = df.drop(columns=['system:index'])
df

Unnamed: 0,gdd,doy,cum_gdd
0,0.009691,0,0.009691
1,0.014215,1,0.023906
2,0.927015,2,0.950921
3,0.180240,3,1.131161
4,0.403028,4,1.534189
...,...,...,...
361,0.033337,361,5513.326931
362,0.237955,362,5513.564886
363,0.005061,363,5513.569946
364,0.031307,364,5513.601254


In [325]:
df['Time'] = pd.to_datetime(df['doy'],unit='D')

In [326]:
df

Unnamed: 0,gdd,doy,cum_gdd,Time
0,0.009691,0,0.009691,1970-01-01
1,0.014215,1,0.023906,1970-01-02
2,0.927015,2,0.950921,1970-01-03
3,0.180240,3,1.131161,1970-01-04
4,0.403028,4,1.534189,1970-01-05
...,...,...,...,...
361,0.033337,361,5513.326931,1970-12-28
362,0.237955,362,5513.564886,1970-12-29
363,0.005061,363,5513.569946,1970-12-30
364,0.031307,364,5513.601254,1970-12-31


In [329]:
# Define the degree of the polynomial fits
degree_list = [ 40]

base = alt.Chart(df).mark_circle(color="black", size = 10).encode(
        alt.X("doy"), alt.Y("gdd"),
    tooltip=[
        alt.Tooltip('doy:O', title='DOY'),
        alt.Tooltip('gdd:Q', title='gdd')
    ]
)

polynomial_fit = [
    base.transform_regression(
        "doy", "gdd", method="poly", order=order, as_=["doy", str(order)]
    )
    .mark_line(size = 1.5)
    .transform_fold([str(order)], as_=["degree", "gdd"])
    .encode(alt.Color("degree:N"))
    for order in degree_list
]

alt.layer(base, *polynomial_fit).properties(width=1000, height=400)

In [330]:
alt.Chart(df).mark_bar(size=3).encode(
   x=alt.X('Time:T'),
           # scale=alt.Scale(domain=[0,365], nice=False)),
    y='cum_gdd:Q',
    color=alt.Color(
        'gdd:Q', scale=alt.Scale(scheme='redblue', domain=(-10, 30))),
    tooltip=[
        alt.Tooltip('doy:Q', title='doy'),
        alt.Tooltip('gdd:Q', title='gdd')
    ]).properties(width=1100, height=400)