In [27]:
from pyPhenology import models, utils
import numpy as np

observations, predictors = utils.load_test_data(name='vaccinium', phenophase='flowers')


In [2]:
observations

Unnamed: 0,species,site_id,year,doy,phenophase
48,vaccinium corymbosum,1,1998,122,501
49,vaccinium corymbosum,1,1998,122,501
50,vaccinium corymbosum,1,1991,124,501
51,vaccinium corymbosum,1,1991,124,501
52,vaccinium corymbosum,1,1998,126,501
53,vaccinium corymbosum,1,2000,128,501
54,vaccinium corymbosum,1,2000,128,501
55,vaccinium corymbosum,1,1991,128,501
56,vaccinium corymbosum,1,2001,128,501
57,vaccinium corymbosum,1,2001,128,501


In [3]:
predictors

Unnamed: 0,site_id,temperature,year,doy,latitude,longitude,daylength
0,1,13.10,1990,-65,42.5429,-72.2011,10.24
1,1,13.26,1990,-64,42.5429,-72.2011,10.20
2,1,12.30,1990,-63,42.5429,-72.2011,10.16
3,1,12.15,1990,-62,42.5429,-72.2011,10.11
4,1,13.00,1990,-61,42.5429,-72.2011,10.07
...,...,...,...,...,...,...,...
4351,1,7.93,2001,293,42.5429,-72.2011,10.55
4352,1,10.76,2001,294,42.5429,-72.2011,10.51
4353,1,8.45,2001,295,42.5429,-72.2011,10.46
4354,1,9.43,2001,296,42.5429,-72.2011,10.42


In [30]:
model = models.ThermalTime()

In [31]:
model.fit(observations, predictors)

In [32]:
model.get_params()

{'t1': 86.44914381180284, 'T': 8.069648395744633, 'F': 104.41146537072774}

In [33]:
model.predict()

array([106, 106, 106, 106, 106, 106, 106, 106, 122, 118, 118, 118, 118,
       119, 119, 119, 119, 124, 122, 122, 122, 120, 120, 120, 120, 120,
       120, 123, 120, 120, 120, 120, 121, 121, 121, 121, 123, 123, 123,
       123, 120, 120, 123, 123, 123, 124, 124, 124])

In [34]:
model.score()

2.9154759474226504

In [35]:
model.save_params(filename='trained_models/blueberry_model.json')

RuntimeWarning: File trained_models/blueberry_model.json exists. User overwrite=True to overwite

In [11]:
model = utils.load_saved_model(filename='trained_models/blueberry_model.json')

In [12]:
model.predict(to_predict=observations, predictors=predictors)

array([126, 126, 127, 127, 126, 129, 129, 127, 132, 132, 133, 133, 132,
       132, 130, 130, 130, 129, 127, 126, 132, 130, 129, 132, 132, 133,
       133, 138, 138, 141, 141, 142, 132, 141, 141, 139, 139, 139, 139,
       138, 138, 141, 141, 141, 141, 142, 142, 142])

In [22]:
species_dict = {
    "apple": [0, 1, 2]
}

current_doy = 2
current_doy2 = 4

def ripeness(species, doy):
    if species not in species_dict:
        return False
    elif doy in species_dict[species]:
        return True
    else:
        return False

In [16]:
ripeness("apple", 4)

False

In [18]:
ripeness("apple", 2)

True

In [23]:
ripeness("peach", 4)

False

**Compare Multiple Models**

In [3]:
m1 = models.ThermalTime()
m2 = models.FallCooling()
m3 = models.M1()
m4 = models.MSB()

ensemble = models.Ensemble(core_models=[m1,m2,m3,m4])
ensemble.fit(observations, predictors)

In [4]:
model_preds = ensemble.predict(observations, predictors, aggregation="none")

In [5]:
ensemble.score()

12.849894616947902

In [11]:
len(model_preds[0])

48

In [7]:
len(observations)

48

Model predictions seem to be for each site. figure out a way to average per site / species? 

Model predicts day of flowering. 

In [36]:
## This is the species/site model prediction function. 

def aic(obs, pred, n_param):
        return len(obs) * np.log(np.mean((obs - pred)**2)) + 2*(n_param + 1)

observations, predictors = utils.load_test_data(name='vaccinium',
                                                phenophase='budburst')

default_models = [models.ThermalTime(), models.FallCooling(), models.M1(), models.MSB()]

default_model_names = ['ThermalTime', "FallCooling", "M1", "MSB"]

def get_site_ripeness(observations, predictors, test_percent, site_id, species, models=default_model_names):
    # filter out train / test
    observations_test = observations.sample(frac=test_percent)
    observations_train = observations.drop(observations_test.index)
    
    # set up model comparisons
    best_aic=np.inf
    best_base_model = None
    best_base_model_name = None

    # iterate through all models
    for model_name in models:
        print("running model {m}".format(m=model_name))
        
        Model = utils.load_model(model_name)
        model = Model()
        model.fit(observations_train, predictors, optimizer_params='practical')
        
        # predict from test observations
        print("making predictions for model {m}".format(m=model_name))        
        preds = model.predict(observations_test, predictors)
        
        # score model
        model_aic = aic(obs = observations_test.doy.values,
                        pred=preds,
                        n_param = len(model.get_params()))

        if model_aic < best_aic:
            best_model = model
            best_model_name = model_name
            best_aic = model_aic

        print('model {m} got an aic of {a}'.format(m=model_name,a=model_aic))

    print('Best model: {m}'.format(m=best_model_name))
    print('Best model paramters:')
    print(best_model.get_params())
    
    ripeness_data = observations_test
    ripeness_data['flowering_day'] = preds
    final_ripeness_data = ripeness_data[(ripeness_data.species == species) & (ripeness_data.site_id == site_id)]
    
    mean_maturation = np.mean(final_ripeness_data['flowering_day'])
    
    prediction_dict = {
        "trained_model": best_model,
        "model_aic": best_aic,
        "species_site_flowering days": list(final_ripeness_data['flowering_day']),
        "mean_flowering_day": np.mean(final_ripeness_data['flowering_day'])
    }
    
    print(prediction_dict)
    
    return prediction_dict
    
    

In [12]:
def predict_ripeness(prediction_dict, doy):
    if doy >= prediction_dict['mean_flowering_day']:
        return True
    else:
        return False

In [38]:
blueberry_models = get_site_ripeness(observations, predictors, 0.5, 1, "vaccinium corymbosum")

running model ThermalTime
making predictions for model ThermalTime
model ThermalTime got an aic of 64.8109667391588
running model FallCooling
making predictions for model FallCooling
model FallCooling got an aic of 208.43842199782785
running model M1
making predictions for model M1
model M1 got an aic of 66.8109667391588
running model MSB
making predictions for model MSB
model MSB got an aic of 80.72890070436407
Best model: ThermalTime
Best model paramters:
{'t1': 85.00087855554551, 'T': 7.873571778015514, 'F': 113.53965982088499}
{'trained_model': <pyPhenology.models.thermaltime.ThermalTime object at 0x169bd3050>, 'model_aic': 64.8109667391588, 'species_site_flowering days': [124, 117, 114, 114, 121, 124, 123, 124, 117, 106, 114, 122, 119, 116, 123, 122, 110, 123, 116, 106, 119, 106, 122, 116], 'mean_flowering_day': 117.41666666666667}


In [41]:
predict_ripeness(blueberry_models, 200)

True

TODO: implement basic gaussian curve for a few days after the ripening date. ask claudia about this. 