In [1]:
from model_helpers import *

import cfgrib
import xarray as xr

import pandas as pd
import numpy as np

from pyPhenology import models, utils

from tqdm import trange, tqdm

import matplotlib.pyplot as plt

from warnings import warn
import warnings

warnings.filterwarnings('ignore')

high_cutoff_year = 2022
low_cutoff_year = 2010

In [57]:
def make_test_df(train_df):
    #print(train_df)
    species_sites = train_df['site_id'].unique()
        
    #print(species_sites)
    
    site_ripenesses = []

    for site in species_sites:
        site_df = train_df[train_df['site_id'] == site]

        site_ripenesses.append({
            'site_id': site,
            'doy': np.mean(site_df['doy'])
        })

    species_test_df = pd.DataFrame(site_ripenesses)
    species_test_df['year'] = high_cutoff_year
    
    return species_test_df

# More specific to our uses.
def train_ripeness_small(observations, predictors, test_observations, test_predictors, model_name = 'ThermalTime'):

    print("running model {m}".format(m=model_name))
    Model = utils.load_model(model_name)
    model = Model()
    model.fit(observations, predictors, optimizer_params='practical')
    
    print(model)
    
    print("making predictions for model {m}".format(m=model_name))        
    preds = model.predict(test_observations, test_predictors)

    #print(preds)
    test_days = test_observations.doy.values
    #print(test_days)
   
    # Various error types
    model_mae = mae(test_days, preds)
    model_rmse = rmse(test_days, preds)
    median_error = np.median(np.abs(test_days - preds))

    print('model {m} got a MAE of {a}'.format(m=model_name,a=model_mae))
    print('model {m} got an RMSE of {a}'.format(m=model_name,a=model_rmse))
    print('model {m}\'s median error is: {a}'.format(m=model_name,a=median_error))

    print("Ripeness Day: {}".format(np.mean(preds)))
    
    ripeness_data = test_observations
    ripeness_data['flowering_day'] = preds
    
    ripeness_dict = {
        'model_object': model,
        'MAE': model_mae,
        'RMSE': model_rmse,
        'Median Error': median_error,
        'prediction_df': ripeness_dict,
    }
    
    return ripeness_dict

Idea for corrections: take the lower error between the base error and the year-transformed error.


Best Approach is:
- High time resolution, correcting for missing data by using averaged data from previous years.
- Add European Weather data.
- No southern hemisphere. 
- Corrected error (i.e. date wrapping). 

Best reporting statistic: what portion of results lie under X.
For example, 80% of results lie under 1 STD, 95% lie under 2 STD. 
Can make a "confidence score" from this – percentile error? Ex. This is less error than 90% of things.  

In [28]:
# Load in high-res weather data
print("loading weather data")
grib_data = cfgrib.open_datasets('../data/monthly_weather_data.grib')

core_data = grib_data[0]

print("Loading Plant Data")
formatted_plants = pd.read_csv("../data/model_training_data/all_plants_formatted.csv", index_col=0)

formatted_plants['rounded_lat'] = np.round(formatted_plants['latitude'], 1)
formatted_plants['rounded_lon'] = np.round(formatted_plants['lon_360'], 1)

rounded_sites = formatted_plants[['site_id', 'rounded_lat', 'rounded_lon']].drop_duplicates()

site_x_vals = xr.DataArray(rounded_sites['rounded_lat'], dims=['site'])
site_y_vals = xr.DataArray(rounded_sites['rounded_lon'], dims=['site'])

print("filtering weather data")
full_weather_data = core_data.sel(latitude=site_x_vals, longitude=site_y_vals, method='nearest').to_dataframe().dropna()

print("formatting weather data")
formatted_weather = format_weather_data(full_weather_data)

formatted_weather['latitude'] = np.round(formatted_weather['latitude'], 1)
formatted_weather['longitude'] = np.round(formatted_weather['longitude'], 1)

print("adding site IDs to weather data")
rounded_sites['coordstring'] = rounded_sites['rounded_lat'].astype(str) + rounded_sites['rounded_lon'].astype(str)
formatted_weather['coordstring'] = formatted_weather['latitude'].astype(str) + formatted_weather['longitude'].astype(str)

## Add Site ID to the weather data
weather_with_sites = pd.merge(formatted_weather, rounded_sites[['coordstring', 'site_id']], on='coordstring')#.drop('coordstring', axis=1)
## Separate into training data and testing data

# filter out current year
print("separating weather data")
weather_with_sites = weather_with_sites[weather_with_sites['year'] != 2023]

weather_training = weather_with_sites[weather_with_sites['year'] < high_cutoff_year]
weather_test = weather_with_sites[weather_with_sites['year'] >= high_cutoff_year]

# final formatting steps for plants
print("formatting plant data")
species_list = formatted_plants['formatted_sci_name'].unique()
formatted_plants.drop('species', axis=1, inplace=True)

# correct for missing sites
weather_sites = weather_with_sites['site_id'].unique()

print("filtering plant data")
filtered_plants = formatted_plants[(formatted_plants['site_id'].isin(weather_sites)) & 
                                   (formatted_plants['year'] != 2023) &
                                   (formatted_plants['latitude'] > 0) &
                                    (formatted_plants['doy'] >= 60)]


loading weather data
Loading Plant Data
filtering weather data
formatting weather data
formatting date columns
correcting leap years
adding site IDs to weather data
separating weather data
formatting plant data
filtering plant data


TODO: make the europe data rounded to .1 degrees. 

In [4]:
euro_data = load_euro_weather_data(euro_path, '../data/high_res_euro_stations.csv')

In [5]:
## Merge both weather data. 

## If site is in europe data, replace it with the european version. 

# Create a list of "mutual sites".
euro_coords = euro_data['coordstring'].unique()

mutual_sites = weather_with_sites[weather_with_sites['coordstring'].isin(euro_coords)][['site_id', 'coordstring']].drop_duplicates()
# Get those sites from europe
mutual_sites_euro = euro_data[euro_data['coordstring'].isin(mutual_sites['coordstring'])]
mutual_sites_euro = mutual_sites_euro.merge(mutual_sites, on='coordstring')

mutual_sites_euro['temperature'] += 272.5

# Remove those sites from monthly 
unmutual_monthly = weather_with_sites[~weather_with_sites['site_id'].isin(mutual_sites)]

# rbind the two (a union essentially?)
merged_euro = pd.concat([mutual_sites_euro, unmutual_monthly]).drop('station', axis=1).drop_duplicates()

merged_euro['temperature'] = np.round(merged_euro['temperature'], 1)


In [6]:

weather_training = merged_euro[merged_euro['year'] < high_cutoff_year]
weather_test = merged_euro[merged_euro['year'] >= high_cutoff_year]

In [55]:
## Train models

species_prediction_dict = {}

for s in tqdm(species_list):
    print(s)
    species_train_df = filtered_plants.query('formatted_sci_name == "{}" and year < {}'.format(s, high_cutoff_year))
    
    if len(species_train_df) == 0:
        continue
    
    species_test_df = filtered_plants.query('formatted_sci_name == "{}" and year >= {}'.format(s, high_cutoff_year))
    
   # print(species_train_df, species_test_df)
    
    if len(species_test_df) == 0:
        # make predictions and compare to the mean ripeness day at each site
        species_test_df = make_test_df(species_train_df)
    
    if len(species_test_df) == 0:
        print("No test data for {}".format(s))
        #print(species_test_df)
        
    predictions = train_ripeness_small(species_train_df, weather_training,
                        species_test_df, weather_test)
    
    species_prediction_dict[s] = predictions

  0%|                                                                                                                                                                              | 0/97 [00:00<?, ?it/s]

Rubus
running model ThermalTime


  1%|█▋                                                                                                                                                                    | 1/97 [00:03<05:30,  3.44s/it]

making predictions for model ThermalTime
[183 183 183 183 183 183 183 183 183 183 183 183 183 183 183 214 183 183
 183 214 183 214 214 183 183 183 183 183 183 183]
[183. 181. 190. 193. 165. 165. 184. 181. 178. 184. 246. 202. 288. 192.
 173. 214. 159. 191. 191. 196. 187. 214. 216. 136. 305. 305. 305. 274.
 191. 195.]
model ThermalTime got a MAE of 28.6
model ThermalTime got an RMSE of 49.24970389081881
model ThermalTime's median error is: 9.5
Ripeness Day: 187.13333333333333
Rubus occidentalis
running model ThermalTime


  2%|███▍                                                                                                                                                                  | 2/97 [00:07<06:21,  4.01s/it]

making predictions for model ThermalTime
[183 183 183 183 183 183 214 214 214 183]
[164. 189. 189. 189. 184. 189. 191. 191. 191. 187.]
model ThermalTime got a MAE of 11.7
model ThermalTime got an RMSE of 14.522396496446445
model ThermalTime's median error is: 6.0
Ripeness Day: 192.3
Ficus
running model ThermalTime


  3%|█████▏                                                                                                                                                                | 3/97 [00:11<05:44,  3.67s/it]

making predictions for model ThermalTime
[245 245 245 245 245 245 245 245 245 245 245]
[240. 210. 227. 229. 229. 248. 270. 270. 281. 239. 300.]
model ThermalTime got a MAE of 21.818181818181817
model ThermalTime got an RMSE of 26.460948928219075
model ThermalTime's median error is: 18.0
Ripeness Day: 245.0
Ficus auriculata
Ficus carica
running model ThermalTime


  5%|████████▌                                                                                                                                                             | 5/97 [00:13<03:29,  2.27s/it]

making predictions for model ThermalTime
[245 245 245 275 275 245 245 245 245 245]
[180. 232. 236. 278. 248. 258. 258. 248. 205. 335.]
model ThermalTime got a MAE of 27.6
model ThermalTime got an RMSE of 39.06404996924922
model ThermalTime's median error is: 13.0
Ripeness Day: 251.0
Ficus citrifolia
running model ThermalTime


  6%|██████████▎                                                                                                                                                           | 6/97 [00:18<04:42,  3.10s/it]

making predictions for model ThermalTime
[245 245]
[201. 280.]
model ThermalTime got a MAE of 39.5
model ThermalTime got an RMSE of 39.75550276376844
model ThermalTime's median error is: 39.5
Ripeness Day: 245.0
Ficus macrophylla
running model ThermalTime


  7%|███████████▉                                                                                                                                                          | 7/97 [00:21<04:25,  2.94s/it]

making predictions for model ThermalTime
[336]
[329.]
model ThermalTime got a MAE of 7.0
model ThermalTime got an RMSE of 7.0
model ThermalTime's median error is: 7.0
Ripeness Day: 336.0
Ficus sycomorus
Olea europaea
running model ThermalTime


  9%|███████████████▍                                                                                                                                                      | 9/97 [00:23<03:07,  2.13s/it]

making predictions for model ThermalTime
[245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245
 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245
 245 245 245 245 245 245 245 245 245 245]
[356. 313. 327. 236. 342. 285. 257. 276. 307. 338. 325. 300. 283. 355.
 293. 277. 298. 297. 263. 337. 336. 346. 286. 308. 284. 262. 290. 265.
 299. 291. 272. 255. 256.  73. 287. 256. 277. 265. 263. 257. 299. 290.
 303. 318. 276. 180.]
model ThermalTime got a MAE of 51.76086956521739
model ThermalTime got an RMSE of 61.76480635724381
model ThermalTime's median error is: 45.5
Ripeness Day: 245.0
Olea europea
running model ThermalTime


 10%|█████████████████                                                                                                                                                    | 10/97 [00:28<04:00,  2.76s/it]

making predictions for model ThermalTime
[275 275 306 275 275 275 275 275 275 306 275 275 275 306 275 306 306 275
 275]
[294.         282.         329.         292.         197.66666667
 284.5        211.         273.5        292.5        304.
 285.5        312.         315.5        295.         294.
 296.         334.         309.5        328.        ]
model ThermalTime got a MAE of 25.333333333333336
model ThermalTime got an RMSE of 32.59959996884402
model ThermalTime's median error is: 19.0
Ripeness Day: 283.1578947368421
Olea
running model ThermalTime


 11%|██████████████████▋                                                                                                                                                  | 11/97 [00:31<04:16,  2.98s/it]

making predictions for model ThermalTime
[306 306 306 306 306 306 306 306 306 306 306 306 306]
[230. 232. 274. 274. 323. 323. 333. 338. 338. 340. 340. 343. 357.]
model ThermalTime got a MAE of 38.07692307692308
model ThermalTime got an RMSE of 42.00457850502271
model ThermalTime's median error is: 32.0
Ripeness Day: 306.0
Morus rubra
running model ThermalTime


 12%|████████████████████▍                                                                                                                                                | 12/97 [00:35<04:18,  3.04s/it]

making predictions for model ThermalTime
[183 183 153 153 153 153 153 153 183 153 153 153]
[198. 190. 173. 171. 170. 170. 162. 159. 156. 156. 147. 137.]
model ThermalTime got a MAE of 13.416666666666666
model ThermalTime got an RMSE of 15.063753405664428
model ThermalTime's median error is: 15.5
Ripeness Day: 160.5
Morus
running model ThermalTime


 13%|██████████████████████                                                                                                                                               | 13/97 [00:38<04:35,  3.28s/it]

making predictions for model ThermalTime
[153 153 183 183 183 183 183 183 183 153 153 183 153 183 183 183 153 153
 183 183 183 183 183 183 153 183 153 153 183 183 183 183 183 183 183 183
 183 183 183 183 183 183 183 153 183 153 183 183 183 183 153 183 153 153
 153 153 153 183 183 183 153 183 153 183 153 183 183 153 153 153]
[290. 290. 232. 220. 215. 205. 202. 201. 200. 196. 190. 188. 187. 186.
 186. 185. 184. 184. 182. 182. 182. 181. 181. 180. 180. 177. 176. 175.
 174. 174. 172. 172. 172. 171. 170. 170. 170. 170. 169. 169. 168. 167.
 167. 167. 167. 167. 166. 165. 163. 163. 161. 160. 159. 159. 159. 158.
 158. 158. 158. 157. 156. 153. 152. 151. 142. 134. 134. 133. 130. 116.]
model ThermalTime got a MAE of 20.62857142857143
model ThermalTime got an RMSE of 31.23596827834403
model ThermalTime's median error is: 15.5
Ripeness Day: 172.71428571428572
Morus macroura
running model ThermalTime


 14%|███████████████████████▊                                                                                                                                             | 14/97 [00:42<04:38,  3.35s/it]

making predictions for model ThermalTime
[153]
[148.]
model ThermalTime got a MAE of 5.0
model ThermalTime got an RMSE of 5.0
model ThermalTime's median error is: 5.0
Ripeness Day: 153.0
Morus alba
running model ThermalTime


 15%|█████████████████████████▌                                                                                                                                           | 15/97 [00:46<04:50,  3.54s/it]

making predictions for model ThermalTime
[183 153 153 153 183 183 183 183 183 183 183 183 183 183 183 183 183 183
 183 183 183 153 153 183 153 153 153]
[204. 200. 200. 195. 189. 189. 189. 185. 185. 180. 180. 180. 178. 178.
 178. 178. 176. 176. 173. 168. 166. 164. 162. 160. 154. 138. 120.]
model ThermalTime got a MAE of 13.185185185185185
model ThermalTime got an RMSE of 18.879736344675063
model ThermalTime's median error is: 7.0
Ripeness Day: 174.11111111111111
Morus nigra
running model ThermalTime


 16%|███████████████████████████▏                                                                                                                                         | 16/97 [00:49<04:45,  3.52s/it]

making predictions for model ThermalTime
[153 183 183 183 183 183 183 183 183 183 183 183 183 183 183 183 183 183
 183 183 183 183 183 183 183 183 183 153 183 183 183 183 183 183 183 183
 183 183 183 183 183 153 183 183 183 153 183 183 153 183 183 153 183 153
 153 153 153 153 153 153 153 153 153 153 153 153 153 153 153 153]
[291. 220. 204. 198. 197. 191. 191. 190. 188. 187. 186. 186. 184. 184.
 183. 182. 182. 182. 182. 179. 179. 179. 178. 177. 176. 176. 174. 173.
 172. 172. 171. 171. 170. 170. 170. 170. 170. 170. 170. 169. 168. 167.
 166. 166. 160. 158. 158. 158. 157. 156. 156. 156. 154. 152. 151. 150.
 149. 148. 148. 143. 142. 140. 140. 140. 136. 136. 134. 133. 133. 132.]
model ThermalTime got a MAE of 13.014285714285714
model ThermalTime got an RMSE of 21.45726650145087
model ThermalTime's median error is: 11.5
Ripeness Day: 173.14285714285714
Morus australis
running model ThermalTime


 18%|████████████████████████████▉                                                                                                                                        | 17/97 [00:52<04:16,  3.21s/it]

making predictions for model ThermalTime
[153]
[134.]
model ThermalTime got a MAE of 19.0
model ThermalTime got an RMSE of 19.0
model ThermalTime's median error is: 19.0
Ripeness Day: 153.0
Amelanchier alnifolia
running model ThermalTime


 19%|██████████████████████████████▌                                                                                                                                      | 18/97 [00:54<03:57,  3.01s/it]

making predictions for model ThermalTime
[214 245 245 245 245 214 214 214 214 214 214]
[206. 250. 222. 217. 236. 216. 188. 206. 203. 198. 212.]
model ThermalTime got a MAE of 12.545454545454545
model ThermalTime got an RMSE of 15.397756625265549
model ThermalTime's median error is: 9.0
Ripeness Day: 225.27272727272728
Amelanchier arborea
running model ThermalTime


 20%|████████████████████████████████▎                                                                                                                                    | 19/97 [00:58<04:09,  3.19s/it]

making predictions for model ThermalTime
[183 183 183 183 183 183 183 153 153 153 153 153 153 153 153 153 153 183
 153 183 183 183 183 183]
[158. 181. 176. 195. 215. 167. 155. 200. 155. 151. 238. 264. 165. 290.
 284. 181. 247. 163. 172. 171. 150. 138. 134. 142.]
model ThermalTime got a MAE of 41.25
model ThermalTime got an RMSE of 57.17079674099356
model ThermalTime's median error is: 28.0
Ripeness Day: 169.25
Amelanchier canadensis
running model ThermalTime


 21%|██████████████████████████████████                                                                                                                                   | 20/97 [01:02<04:12,  3.28s/it]

making predictions for model ThermalTime
[183 183 183 183 183 183 183 183 183 183 183 183 214 183 183]
[158. 173. 179. 169. 176. 186. 221. 201. 209. 193. 167. 161. 174. 180.
 173.]
model ThermalTime got a MAE of 16.4
model ThermalTime got an RMSE of 19.979989989987484
model ThermalTime's median error is: 14.0
Ripeness Day: 185.06666666666666
Amelanchier grandiflora
running model ThermalTime


 22%|███████████████████████████████████▋                                                                                                                                 | 21/97 [01:04<03:52,  3.06s/it]

making predictions for model ThermalTime
[183 183 183 183 183 183 183 183 183 183 183 183 183 183 183 183 183 183]
[168. 189. 194. 180. 173. 165. 159. 189. 173. 194. 165. 180. 161. 155.
 165. 169. 178. 167.]
model ThermalTime got a MAE of 13.222222222222221
model ThermalTime got an RMSE of 15.0
model ThermalTime's median error is: 12.5
Ripeness Day: 183.0
Amelanchier laevis
running model ThermalTime


 23%|█████████████████████████████████████▍                                                                                                                               | 22/97 [01:06<03:34,  2.86s/it]

making predictions for model ThermalTime
[183 183 183 183 183 183 183 214 214 214 183 183 183 183 183 183 183 183
 183 183 183 183 183 183 183]
[186. 159. 168. 174. 196. 190. 196. 167. 144. 150. 152. 158. 168. 168.
 168. 163. 189. 167. 173. 184. 173. 175. 177. 177. 181.]
model ThermalTime got a MAE of 18.04
model ThermalTime got an RMSE of 25.121305698549985
model ThermalTime's median error is: 13.0
Ripeness Day: 186.72
Amelanchier utahensis
running model ThermalTime


 24%|███████████████████████████████████████                                                                                                                              | 23/97 [01:09<03:21,  2.72s/it]

making predictions for model ThermalTime
[245 245]
[276.   214.25]
model ThermalTime got a MAE of 30.875
model ThermalTime got an RMSE of 30.875253035400373
model ThermalTime's median error is: 30.875
Ripeness Day: 245.0
Amelanchier
running model ThermalTime


 25%|████████████████████████████████████████▊                                                                                                                            | 24/97 [01:12<03:33,  2.92s/it]

making predictions for model ThermalTime
[153 153 153 183 153 153 183 153 153 153 153 153 153 153 183 183 153 183
 153 153 153 153 153 153 183 183 183 153 183 183 183 183 183 183 183 183
 153 183 183 183 183 183 153 183 183 183 183 183 153 153 153 183 153 153
 183 183 183 183 183 153 183 153 153 183 183 183 183 153 153 183 183 153
 183 153 183 183]
[157. 157. 157. 174. 157. 157. 165. 156. 154. 154. 154. 154. 154. 159.
 162. 152. 158. 157. 159. 159. 159. 159. 159. 160. 160. 178. 179. 179.
 179. 165. 194. 196. 171. 205. 176. 184. 157. 164. 166. 166. 167. 167.
 167. 167. 168. 169. 169. 169. 171. 170. 170. 171. 167. 167. 172. 175.
 180. 178. 230. 183. 174. 178. 159. 180. 173. 171. 158. 159. 159. 162.
 167. 168. 169. 147. 152. 156.]
model ThermalTime got a MAE of 12.25
model ThermalTime got an RMSE of 15.15490193407991
model ThermalTime's median error is: 11.5
Ripeness Day: 169.57894736842104
Citrus
running model ThermalTime


 26%|██████████████████████████████████████████▌                                                                                                                          | 25/97 [01:14<03:15,  2.71s/it]

making predictions for model ThermalTime
[214 214 214 214 214 214 245 214 214 214 214 214 214 214 214 214 214 214
 214 214 214 214 214 214 214 214 214 214 214 214 214 214 214 214 214 214
 214 214 214 214 214 214 214 214 214 214 214 214 214 214 214 214 214 214
 214 245 214 214 214 214 214 214 214 214 214 214 214 214 214]
[181. 138. 148. 169.  72.  75.  91. 169. 169. 169. 169. 169. 169. 181.
 173. 234. 274. 345. 283. 283. 283. 306. 323. 323. 323. 323. 323. 323.
 333. 333. 333. 338. 340. 343. 357. 356.  72.  72.  72.  88. 148. 164.
 324. 103. 349. 349. 303. 116. 137. 137.  71.  71.  87.  99. 168. 182.
 317. 340. 323. 330. 333. 333. 338.  72.  72.  72. 181. 315. 180.]
model ThermalTime got a MAE of 97.55072463768116
model ThermalTime got an RMSE of 104.66115096700939
model ThermalTime's median error is: 109.0
Ripeness Day: 214.8985507246377
Citrus jambhiri
running model ThermalTime


 27%|████████████████████████████████████████████▏                                                                                                                        | 26/97 [01:17<03:11,  2.69s/it]

making predictions for model ThermalTime
[336]
[333.]
model ThermalTime got a MAE of 3.0
model ThermalTime got an RMSE of 3.0
model ThermalTime's median error is: 3.0
Ripeness Day: 336.0
Citrus limon
running model ThermalTime


 28%|█████████████████████████████████████████████▉                                                                                                                       | 27/97 [01:20<03:01,  2.60s/it]

making predictions for model ThermalTime
[214 245]
[177. 303.]
model ThermalTime got a MAE of 47.5
model ThermalTime got an RMSE of 48.646685395821166
model ThermalTime's median error is: 47.5
Ripeness Day: 229.5
Rubus idaeus
running model ThermalTime


 29%|███████████████████████████████████████████████▋                                                                                                                     | 28/97 [01:23<03:10,  2.77s/it]

making predictions for model ThermalTime
[214 214 214 214 214 214 214 214 214 214 214 214 245 245 214 214]
[224. 212. 216. 206. 229. 207. 226. 213. 200. 188. 187. 282. 227. 227.
 291. 311.]
model ThermalTime got a MAE of 25.125
model ThermalTime got an RMSE of 37.750827805493216
model ThermalTime's median error is: 14.5
Ripeness Day: 217.875
Rubus ellipticus
Rubus hayata
running model ThermalTime


 31%|███████████████████████████████████████████████████                                                                                                                  | 30/97 [01:25<02:19,  2.08s/it]

making predictions for model ThermalTime
[214 214 214 245 214 214]
[208. 225. 235. 205. 228. 208.]
model ThermalTime got a MAE of 16.333333333333332
model ThermalTime got an RMSE of 20.12461179749811
model ThermalTime's median error is: 12.5
Ripeness Day: 219.16666666666666
Rubus odoratus
Rubus phoenicolasius
running model ThermalTime


 33%|██████████████████████████████████████████████████████▍                                                                                                              | 32/97 [01:28<01:56,  1.79s/it]

making predictions for model ThermalTime
[183 183 183 183 183]
[193. 188. 192. 164. 188.]
model ThermalTime got a MAE of 9.6
model ThermalTime got an RMSE of 10.881176406988354
model ThermalTime's median error is: 9.0
Ripeness Day: 183.0
Rubus rolfei
running model ThermalTime


 34%|████████████████████████████████████████████████████████▏                                                                                                            | 33/97 [01:30<02:05,  1.96s/it]

making predictions for model ThermalTime
[214]
[203.]
model ThermalTime got a MAE of 11.0
model ThermalTime got an RMSE of 11.0
model ThermalTime's median error is: 11.0
Ripeness Day: 214.0
Citrus aurantium
running model ThermalTime


 34%|████████████████████████████████████████████████████████▏                                                                                                            | 33/97 [01:33<03:01,  2.83s/it]


KeyboardInterrupt: 

In [51]:
apple_train_df = filtered_plants[(filtered_plants['genus'] == 'Malus') & (filtered_plants['year'] < 2022)]
apple_test_df = filtered_plants[(filtered_plants['genus'] == 'Malus') & (filtered_plants['year'] == 2022)]

Model = utils.load_model('ThermalTime')
model = Model()
model.fit(apple_train_df, weather_training, optimizer_params='practical')

print(model)

print("making predictions for model {m}".format(m='ThermalTime'))        
preds = model.predict(apple_test_df, weather_test)


<pyPhenology.models.thermaltime.ThermalTime object at 0x16af50ed0>
making predictions for model ThermalTime


In [52]:
print(preds)

[245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245
 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245
 245 245 245 245 245 275 245 245 245 275 275 245 275 275 245 245 275 275
 275 245 245 275 245 245 245 245 245 245 245 245 245 245 245 245 245 245
 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245
 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245
 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245
 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 245 275 245
 245 245 245 275 245 245 245 245 245 245 245 245 245 245 275 275 275 245
 245 245 245 245 245 275 275 275 245 245 245 245 245 245 275 245 245 245
 245 245 245 245 245 275 245 245 245 245 245 245 245 245 245 245 245 245
 275 245 245 245 245 245 245 275 245 245 245 245 245 245 245 245 275 275
 275 275 275 245 245 245 245 245 245 245 245 245 245 245 245 245 245 275
 245 245 245 245 245 245 245 245 245 245 245 245 24

In [31]:
specific_test_df = merged_euro[(merged_euro['site_id'].isin(apple_test_df['site_id'])) & (merged_euro['year'] == 2022)]

In [53]:
model.predict(apple_test_df, weather_test)

array([245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 275, 245, 245, 245, 275, 275, 245, 275, 275, 245, 245,
       275, 275, 275, 245, 245, 275, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 275,
       245, 245, 245, 245, 275, 245, 245, 245, 245, 245, 245, 245, 245,
       245, 245, 275, 275, 275, 245, 245, 245, 245, 245, 245, 275, 275,
       275, 245, 245, 245, 245, 245, 245, 275, 245, 245, 245, 24

In [54]:
model.get_params()

{'t1': 185.64310747459587, 'T': 14.178133503486173, 'F': 574.1517057222219}

In [40]:
#print(preds)
test_days = apple_test_df.doy.values
print(test_days)

[313. 355. 334.  63. 191. 273. 270. 203. 231. 217. 252. 238. 224. 280.
 259. 196. 210. 189. 295. 288. 302. 280. 342. 294. 348. 287. 308. 301.
 324. 317. 333. 124. 288. 231. 266. 240. 282. 238. 246. 256. 253. 309.
 309. 289. 321. 323. 304. 303. 288. 324. 297. 328. 316. 310. 302. 294.
 316. 295. 301. 271. 277. 291. 310. 269. 264. 272. 284. 270. 253. 264.
 282. 289. 275. 333. 293. 296. 325. 300. 309. 319. 315. 268.  63. 303.
 173. 172. 243. 250. 257. 278. 285. 292. 264. 271. 311. 273. 342. 318.
 301. 313. 280. 336. 308. 292. 306. 294. 283. 329. 285. 297. 324. 304.
 274. 292. 290. 322. 298. 313. 319. 316. 326. 310. 306. 295. 266. 231.
 222. 138. 250. 236. 323. 252. 236. 182. 186. 186. 186. 203. 295.  85.
 178. 240. 234. 235. 257. 232. 235. 236. 235. 240. 233. 233. 240. 240.
 241. 241. 241. 241. 242. 242. 242. 244. 243. 245. 246. 247. 247. 248.
 248. 249. 248. 249. 250. 250. 251. 251. 251. 251. 215. 214. 216. 220.
 225. 225. 225. 225. 225. 229. 229. 233. 215. 231. 231. 232. 232. 233.
 233. 

In [None]:
genus_prediction_dict = {}

for s in tqdm(filtered_plants['genus'].unique()):
    print(s)
    species_train_df = filtered_plants.query('genus == "{}" and year < {}'.format(s, high_cutoff_year))
    
    if len(species_train_df) == 0:
        continue
    
    species_test_df = filtered_plants.query('genus == "{}" and year >= {}'.format(s, high_cutoff_year))
    
    if len(species_test_df) == 0:
        # make predictions and compare to the mean ripeness day at each site
        species_test_df = make_test_df(species_train_df)
    
    if len(species_test_df) == 0:
        print("No test data for {}".format(s))
        #print(species_test_df)
        
    predictions = train_ripeness_small(species_train_df, weather_training,
                        species_test_df, weather_test)
    
    genus_prediction_dict[s] = predictions