# Version 4: Parallel Thershold Dormancy Model

Cherry phenology follows a two-stage domrancy process:
1. Winter chilling requirment
2. Spring heat accumulation

Unlike earlier threshold models we tried that required chill completion before heatr accumulation begins, this model allows both to happen in paralell. Bloom is tirggered only when both chill and growing degree days (GDD) requriements are satisfied.

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd


from sklearn.metrics import mean_absolute_error, r2_score

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

In [2]:
temps_path = Path("../Data/full_temps_dc.csv")
temps = pd.read_csv(temps_path)

temps['date'] = pd.to_datetime(temps['date'])
temps['year'] = temps['date'].dt.year
temps["month"] = temps["date"].dt.month

temps["season_year"] = temps['year']
temps.loc[temps['month'] >= 10, "season_year"] += 1


bloom_path = Path("../Data/blooms_dc.csv")
bloom= pd.read_csv(bloom_path)
bloom = bloom[bloom['year'] > 1942]

temps['date'] = pd.to_datetime(temps['date'])
temps['tavg'] = (temps['tmin'] + temps['tmax']) / 2
temps['doy'] = temps['date'].dt.dayofyear

### Chill Accumulation 
Calculates cumulative chill exposure during dormancy by counting days where mean temperature is below a specified threshold. Chill accumulation is necessary to release endodormancy. 

In [3]:
def accumulate_chill(df, chill_threshold):
    df = df.copy()
    df["chill_units"] = (df["tavg"] < chill_threshold).astype(int)
    df["chill_cum"] = df["chill_units"].cumsum()
    return df

#accumulate_chill()

### Heat Accumulation

Calculates cumulative heat accumulation using Growing Degree Days (GDD). Heat units are computed as the postive difference between daily mean temperature and a biological base temperature. 

In [4]:
def accumulate_heat(df, base_temp=4.0):
    df = df.copy()
    df["GDD_units"] = np.maximum(0, df["tavg"] - base_temp)
    df["GDD_cum"] = df["GDD_units"].cumsum()
    return df

### Parallel Bloom Trigger

Simulation allows chill and heat accumulaion to occur simulaneously. Bloom is triggered when both the chill requirement and gdd requirmenet are satisfied, reflecting the dormancy processs.

In [5]:
def simulate_bloom_parallel(
    df,
    chill_threshold,
    chill_requirement,
    GDD_requirement,
    base_temp=5
):
    chill = 0
    heat = 0

    for _, row in df.sort_values("date").iterrows():

        t = row["tavg"]

        # chill accumulation
        if t < chill_threshold:
            chill += 1

        # heat (accumulation
        if row['date'].month >= 1 and t > base_temp:
            heat += (t - base_temp)

        # bloom triggers once BOTH satisfied
        if chill >= chill_requirement and heat >= GDD_requirement:
            return row["doy"]

    return None

### Rolling Origin Cross Validation


In [6]:
def rolling_origin_cv_parallel(
    temps,
    bloom,
    chill_threshold,
    chill_requirement,
    GDD_requirement
):
    years = sorted(bloom["year"].unique())
    
    predictions = []
    actuals = []
    
    for i in range(10, len(years)):
        
        train_years = years[:i]
        test_year = years[i]
        
        test_df = temps[temps["season_year"] == test_year]
        
        pred = simulate_bloom_parallel(
            test_df,
            chill_threshold,
            chill_requirement,
            GDD_requirement
        )
        
        if pred is not None:
            predictions.append(pred)
            actuals.append(
                bloom.loc[bloom["year"] == test_year, "bloom_doy"].values[0]
            )
    
    mae = mean_absolute_error(actuals, predictions)
    r2 = r2_score(actuals, predictions)
    
    return mae, r2

In [7]:
predictions = []
actuals = []

for year in bloom["year"]:
    
    df_year = temps[temps["season_year"] == year]
    
    pred = simulate_bloom_parallel(
        df_year,
        chill_threshold=7.2,
        chill_requirement=90,
        GDD_requirement=300
    )
    
    if pred is not None:
        predictions.append(pred)
        actuals.append(
            bloom.loc[bloom["year"] == year, "bloom_doy"].values[0]
        )

mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)

print("MAE:", mae)
print("R²:", r2)

MAE: 28.90909090909091
R²: -21.314411450924606


Very poor results, likely because heat accumulation is occuring very early in the model based on the thresholds set. Which is artificually inflting the heat totals, causing large prediction biases. 