In [2]:
import pandas as pd
import numpy as np
from skimpy import clean_columns
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime


- [Guide for Generating PDFs](https://towardsdatascience.com/how-to-create-pdf-reports-with-python-the-essential-guide-c08dd3ebf2ee)  
- [Dynamic PDF Generation](https://advicement.io/dynamic-documents-api/documentation/getting-started)

### Climate Data

In [98]:
climate = pd.read_csv('Data/2023_scm_climate.csv')
climate["date"] = pd.to_datetime(climate["date"], format='%m/%d/%y')
climate["week"] = climate["date"].dt.strftime("%U").astype(int) 
climate

Unnamed: 0,recordId,longitude,latitude,elevation,date,rainInches,minTempF,aveTempF,maxTempF,week
0,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-01,0.12,36.6,43.8,51.1,1
1,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-02,0.00,36.0,38.8,41.5,1
2,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-03,0.00,31.5,39.4,47.2,1
3,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-04,0.53,36.7,44.9,53.1,1
4,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-05,0.35,37.5,46.9,56.2,1
...,...,...,...,...,...,...,...,...,...,...
10131,ZUE_REE,-76.9833,42.7834,669,2023-06-26,0.06,66.3,74.2,82.2,26
10132,ZUE_REE,-76.9833,42.7834,669,2023-06-27,1.24,63.4,72.7,82.1,26
10133,ZUE_REE,-76.9833,42.7834,669,2023-06-28,0.10,60.4,70.2,80.0,26
10134,ZUE_REE,-76.9833,42.7834,669,2023-06-29,0.01,58.7,62.8,66.9,26


### Landscape Data

In [132]:
landscape_500 = pd.read_csv('../2022/Landscape/2022_scm_cdl_500m.csv')
landscape_500["Prop_Corn"] = landscape_500['Corn'] / landscape_500['Total']
landscape_500

Unnamed: 0,Site_ID,Corn,Open Water,Sweet Corn,Developed/Open Space,Developed/Low Intensity,Developed/Med Intensity,Developed/High Intensity,Barren,Deciduous Forest,...,Ag_Prop,Nat,Nat_Prop,Semi_Nat,Semi_Nat_Prop,Dev,Dev_Prop,Other,Other_Prop,Prop_Corn
0,STA_LOT,97,0,0,36,28,23,3,6,14,...,0.493040,31,0.022711,52,0.038095,96,0.070330,0,0.000000,0.071062
1,STA_PAD,297,1,0,63,37,5,0,1,16,...,0.431702,21,0.015340,138,0.100804,106,0.077429,1,0.000730,0.216947
2,STA_CRA,24,0,0,74,18,4,0,3,65,...,0.321402,137,0.100073,155,0.113221,99,0.072316,0,0.000000,0.017531
3,POV_DUN,6,0,0,64,12,0,0,0,220,...,0.011730,391,0.286657,301,0.220674,76,0.055718,0,0.000000,0.004399
4,DEG_FES_OVO4,341,0,0,46,46,3,0,0,81,...,0.421053,98,0.071637,72,0.052632,95,0.069444,0,0.000000,0.249269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,YOU,3,0,0,58,36,7,0,0,306,...,0.005844,495,0.361578,235,0.171658,101,0.073776,0,0.000000,0.002191
76,HAT,79,0,0,43,28,11,14,10,287,...,0.079063,456,0.333821,183,0.133968,106,0.077599,0,0.000000,0.057833
77,MOO,248,0,0,31,16,4,0,0,85,...,0.352381,131,0.095971,176,0.128938,51,0.037363,0,0.000000,0.181685
78,URI,207,12,0,11,44,14,1,0,120,...,0.434942,126,0.092105,47,0.034357,70,0.051170,12,0.008772,0.151316


In [133]:
landscape_1000 = pd.read_csv('../2022/Landscape/2022_scm_cdl_1000m.csv')
landscape_1000["Prop_Corn"] = landscape_1000['Corn'] / landscape_1000['Total']
landscape_1000

Unnamed: 0,Site_ID,Corn,Open Water,Sweet Corn,Developed/Open Space,Developed/Low Intensity,Developed/Med Intensity,Developed/High Intensity,Barren,Deciduous Forest,...,Ag_Prop,Nat,Nat_Prop,Semi_Nat,Semi_Nat_Prop,Dev,Dev_Prop,Other,Other_Prop,Prop_Corn
0,STA_LOT,599,1,3,114,65,30,3,7,179,...,0.534363,256,0.057309,280,0.062682,219,0.049026,1,0.000224,0.134094
1,STA_PAD,907,1,0,398,318,31,6,1,353,...,0.344913,521,0.116764,617,0.138279,754,0.168983,1,0.000224,0.203272
2,STA_CRA,180,0,0,163,42,11,2,5,650,...,0.319516,1152,0.258122,498,0.111584,223,0.049966,0,0.000000,0.040332
3,POV_DUN,10,4,0,315,105,26,5,6,960,...,0.020838,1678,0.375980,913,0.204571,457,0.102397,4,0.000896,0.002241
4,DEG_FES_OVO4,962,0,0,151,89,12,1,0,899,...,0.404970,989,0.221401,273,0.061115,253,0.056638,0,0.000000,0.215357
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,YOU,10,9,1,287,173,41,2,2,931,...,0.005822,1771,0.396552,1020,0.228392,505,0.113077,9,0.002015,0.002239
76,HAT,166,1,8,117,117,59,62,28,1204,...,0.145954,1756,0.392490,612,0.136790,383,0.085606,1,0.000224,0.037103
77,MOO,706,6,2,192,102,22,1,1,545,...,0.317013,792,0.177062,734,0.164096,318,0.071093,6,0.001341,0.157836
78,URI,710,15,0,58,91,21,1,0,344,...,0.581302,397,0.088794,208,0.046522,171,0.038246,15,0.003355,0.158801


In [134]:
landscape_2000 = pd.read_csv('../2022/Landscape/2022_scm_cdl_2000m.csv')
landscape_2000["Prop_Corn"] = landscape_2000['Corn'] / landscape_2000['Total']
landscape_2000

Unnamed: 0,Site_ID,Corn,Open Water,Sweet Corn,Developed/Open Space,Developed/Low Intensity,Developed/Med Intensity,Developed/High Intensity,Barren,Deciduous Forest,...,Ag_Prop,Nat,Nat_Prop,Semi_Nat,Semi_Nat_Prop,Dev,Dev_Prop,Other,Other_Prop,Prop_Corn
0,STA_LOT,2857,3,10,810,506,134,30,17,1228,...,0.542664,1958,0.123393,1109,0.069889,1497,0.094341,3,0.000189,0.180048
1,STA_PAD,2626,5,1,1124,715,63,6,9,2721,...,0.288983,3923,0.247258,3059,0.192802,1917,0.120824,5,0.000315,0.165511
2,STA_CRA,1550,11,0,689,217,34,7,7,3241,...,0.273644,6034,0.380190,2069,0.130364,954,0.060110,11,0.000693,0.097662
3,POV_DUN,88,78,2,1221,408,94,21,123,3835,...,0.032850,7209,0.454540,3400,0.214376,1867,0.117718,78,0.004918,0.005549
4,DEG_FES_OVO4,2245,0,2,756,267,31,9,5,4703,...,0.298525,5608,0.353416,1838,0.115831,1068,0.067305,0,0.000000,0.141480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,YOU,75,25,3,886,664,112,5,5,4110,...,0.009959,7733,0.487425,3870,0.243933,1672,0.105389,25,0.001576,0.004727
76,HAT,496,1,8,680,640,220,113,31,4763,...,0.212669,5805,0.365900,2360,0.148755,1684,0.106146,1,0.000063,0.031264
77,MOO,2316,11,6,490,507,133,23,3,2524,...,0.276132,3751,0.236477,3202,0.201866,1156,0.072879,11,0.000693,0.146009
78,URI,2497,19,263,257,372,75,21,10,2178,...,0.530947,2785,0.175533,1365,0.086033,735,0.046325,19,0.001198,0.157381


### Adult Abundance Count Data

In [117]:
scm_counts = pd.read_csv("Data/2023_scm_counts.csv")
scm_counts = clean_columns(scm_counts)
scm_counts["date"] = pd.to_datetime(scm_counts["date"], format='%m/%d/%y')
scm_counts["week"] = scm_counts["date"].dt.strftime("%U").astype(int) 
scm_counts


Unnamed: 0,card_id,data_collector,site,date,n_scm_i_m,n_scm_i_f,n_scm_o_m,n_scm_o_f,n_d_florilega_i,n_d_florilega_o,initials,notes,week
0,2001,Anna DiPaola,DIP_CUR,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,YVH,,11
1,2002,Anna DiPaola,DIP_CUR,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,YVH,,11
2,2003,Anna DiPaola,POV_DUN,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,YVH,,11
3,2004,Anna DiPaola,POV_DUN,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,YVH,,11
4,2005,Anna DiPaola,DIP_FLE,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,YVH,,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
421,2422,Roberto Regaldo,WIS_KEN_1,2023-05-05,,,,,,,,,18
422,2423,Ken Wise,WIS_COO_1,2023-05-03,,,,,,,,,18
423,2424,Ken Wise,WIS_COO_1,2023-05-03,,,,,,,,,18
424,2425,Ken Wise,WIS_COO_2,2023-05-03,,,,,,,,,18


### GDD Data

In [119]:
gdd = pd.read_csv("Data/2023_scm_gdd.csv")
gdd = clean_columns(gdd)
gdd["date"] = pd.to_datetime(gdd["date"], format='%Y-%m-%d')
gdd["week"] = gdd["date"].dt.strftime("%U").astype(int) 
gdd


Unnamed: 0,record_id,date,temp_max_f,temp_min_f,temp_mean_f,gdd_fahrenheit_simple,gdd_fahrenheit_sine,gdd_fahrenheit_simple_cumsum,gdd_fahrenheit_sine_cum_sum,temp_max_c,temp_min_c,temp_mean_c,gdd_celsius_simple,gdd_celsius_sine,gdd_celsius_simple_cum_sum,gdd_celsius_sine_cum_sum,week
0,POV_DUN,2023-01-01,51,36,43.5,4.5,5.083043,4.5,5.083043,10.555556,2.222222,6.388889,2.500000,2.823913,2.500000,2.823913,1
1,POV_DUN,2023-01-02,43,37,40.0,1.0,1.509001,5.5,6.592044,6.111111,2.777778,4.444444,0.555556,0.838334,3.055556,3.662247,1
2,POV_DUN,2023-01-03,48,29,38.5,0.0,2.779669,5.5,9.371713,8.888889,-1.666667,3.611111,0.000000,1.544261,3.055556,5.206507,1
3,POV_DUN,2023-01-04,52,38,45.0,6.0,6.115838,11.5,15.487552,11.111111,3.333333,7.222222,3.333333,3.397688,6.388889,8.604195,1
4,POV_DUN,2023-01-05,57,37,47.0,8.0,8.273364,19.5,23.760915,13.888889,2.777778,8.333333,4.444444,4.596313,10.833333,13.200508,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10131,ONE_STA_1,2023-06-26,80,65,72.5,33.5,33.500000,1438.0,1496.852715,26.666667,18.333333,22.500000,18.611111,18.611111,798.888889,831.584842,26
10132,ONE_STA_1,2023-06-27,85,65,75.0,36.0,36.000000,1474.0,1532.852715,29.444444,18.333333,23.888889,20.000000,20.000000,818.888889,851.584842,26
10133,ONE_STA_1,2023-06-28,80,62,71.0,32.0,32.000000,1506.0,1564.852715,26.666667,16.666667,21.666667,17.777778,17.777778,836.666667,869.362619,26
10134,ONE_STA_1,2023-06-29,67,59,63.0,24.0,24.000000,1530.0,1588.852715,19.444444,15.000000,17.222222,13.333333,13.333333,850.000000,882.695953,26


### Wire Mesh Data (Risk)

In [55]:
wire_mesh = pd.read_csv("Data/2023_wire_mesh.csv")
wire_mesh = clean_columns(wire_mesh)
wire_mesh["number_wireworm"] = (
    wire_mesh["number_wireworm"].replace(">10", 10).astype(float)
)
wire_mesh["total_phorid_maggots"] = (
    wire_mesh["total_phorid_maggots"].replace(">10", 10).astype(float)
)
wire_mesh["total_phorid_pupae"] = (
    wire_mesh["total_phorid_pupae"].replace(">10", 10).astype(float)
)

wire_mesh


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes
0,1.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,1.0,1.0,
1,2.0,3011.0,R,Loose,,,,0.0,0.0,0.0,,1.0,2.0,
2,3.0,3011.0,R,Total,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,
3,4.0,3009.0,R,Corn,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,5.0,3009.0,R,Loose,,,,0.0,0.0,0.0,,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5446,5447.0,,,Loose,,,,,,,,,,
5447,5448.0,,,Total,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
5448,5449.0,,,Bean,,,,,,,,,,
5449,5450.0,,,Loose,,,,,,,,,,


### Wire Mesh Intake

Contains mapping for container ID and field ID. 

In [56]:
wire_mesh_intake = pd.read_csv("Data/2023_wire_mesh_intake.csv")
wire_mesh_intake = clean_columns(wire_mesh_intake)
wire_mesh_intake


Unnamed: 0,container_number,container_type,project,record_id,collection_date,notes
0,3001,deli cup,R,POV_DUN,4/18/23,
1,3002,deli cup,R,POV_DUN,4/18/23,
2,3003,deli cup,R,DIP_CUR,4/18/23,
3,3004,deli cup,R,DIP_CUR,4/18/23,
4,3005,deli cup,R,DIP_PAT,4/18/23,
...,...,...,...,...,...,...
2135,5134,sushi container,R,SMI_CAN,6/12/23,
2136,5135,sushi container,R,SMI_KEL,6/27/23,
2137,5136,sushi container,R,SMI_KEL,6/27/23,
2138,5137,sushi container,R,SMI_KEL,6/27/23,


In [57]:
wire_mesh = wire_mesh.merge(
    wire_mesh_intake[["container_number", "container_type", 'collection_date', "record_id"]],
    on="container_number",
    how="left",
)
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id
0,1.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,1.0,1.0,,deli cup,4/17/23,GAB_ALL
1,2.0,3011.0,R,Loose,,,,0.0,0.0,0.0,,1.0,2.0,,deli cup,4/17/23,GAB_ALL
2,3.0,3011.0,R,Total,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,4/17/23,GAB_ALL
3,4.0,3009.0,R,Corn,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/17/23,GAB_STE
4,5.0,3009.0,R,Loose,,,,0.0,0.0,0.0,,0.0,0.0,,deli cup,4/17/23,GAB_STE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5455,5447.0,,,Loose,,,,,,,,,,,,,
5456,5448.0,,,Total,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5457,5449.0,,,Bean,,,,,,,,,,,,,
5458,5450.0,,,Loose,,,,,,,,,,,,,


In [58]:
bean_containers = wire_mesh[wire_mesh['seed_type']=='Bean']
bean_containers = bean_containers['container_number']
bean_containers

0       3011.0
6       3068.0
12      3010.0
18      3003.0
27      3042.0
         ...  
5445       NaN
5448       NaN
5451       NaN
5454       NaN
5457       NaN
Name: container_number, Length: 1048, dtype: float64

In [59]:
corn_containers = wire_mesh[wire_mesh['seed_type']=='Corn']
corn_containers = corn_containers['container_number']
corn_containers

3       3009.0
9       3067.0
15      3012.0
21      3005.0
24      3004.0
         ...  
5427    5066.0
5430    4757.0
5433    4844.0
5436    5041.0
5439    5110.0
Name: container_number, Length: 772, dtype: float64

In [60]:
bean_data = wire_mesh[wire_mesh['container_number'].isin(bean_containers)]
bean_data = wire_mesh.loc[wire_mesh['seed_type'] == 'Total']
bean_data.loc[:, 'seed_type'] = 'Bean'
bean_data

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id
2,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,4/17/23,GAB_ALL
5,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/17/23,GAB_STE
8,9.0,3068.0,R,Bean,8.0,0.0,0.0,0.0,5.0,1.0,2.0,7.0,9.0,,deli cup,4/18/23,SMI_JOH_2
11,12.0,3067.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/18/23,SMI_JOH_2
14,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,19.0,,deli cup,4/17/23,GAB_STE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5447,5439.0,,,Bean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5450,5442.0,,,Bean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5453,5445.0,,,Bean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5456,5448.0,,,Bean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,


In [61]:
corn_data = wire_mesh[wire_mesh['container_number'].isin(corn_containers)]
corn_data = wire_mesh.loc[wire_mesh['seed_type'] == 'Total']
corn_data.loc[:, 'seed_type'] = 'Corn'
corn_data

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id
2,3.0,3011.0,R,Corn,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,4/17/23,GAB_ALL
5,6.0,3009.0,R,Corn,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/17/23,GAB_STE
8,9.0,3068.0,R,Corn,8.0,0.0,0.0,0.0,5.0,1.0,2.0,7.0,9.0,,deli cup,4/18/23,SMI_JOH_2
11,12.0,3067.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/18/23,SMI_JOH_2
14,15.0,3010.0,R,Corn,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,19.0,,deli cup,4/17/23,GAB_STE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5447,5439.0,,,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5450,5442.0,,,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5453,5445.0,,,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5456,5448.0,,,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,


In [122]:
wire_mesh = pd.concat([bean_data, corn_data], axis=0) 
wire_mesh = wire_mesh.loc[wire_mesh['project'] == 'R']
wire_mesh["collection_date"] = pd.to_datetime(wire_mesh["collection_date"], format='%m/%d/%y')
wire_mesh["week"] = wire_mesh["collection_date"].dt.strftime("%U").astype(int) 
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id,week
2,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,2023-04-17,GAB_ALL,16
5,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-04-17,GAB_STE,16
8,9.0,3068.0,R,Bean,8.0,0.0,0.0,0.0,5.0,1.0,2.0,7.0,9.0,,deli cup,2023-04-18,SMI_JOH_2,16
11,12.0,3067.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-04-18,SMI_JOH_2,16
14,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,19.0,,deli cup,2023-04-17,GAB_STE,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,,deli cup,2023-06-06,DIP_FLE,23
5237,5229.0,5034.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-06-06,DIP_FLE,23
5411,5403.0,4214.0,R,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-05-19,WIS_KEN,20
5441,5433.0,5110.0,R,Corn,10.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,deli cup,2023-06-12,SMI_CAN,24


# Adult Abundance

Farms with first emergence data - 

- DIP_CUR
- DIP_FLE
- POV_DUN
- GAB_STE

In [6]:
first_emergence_farms = ["DIP_CUR", "DIP_FLE", "POV_DUN", "GAB_STE"]
first_emergence_farms


['DIP_CUR', 'DIP_FLE', 'POV_DUN', 'GAB_STE']

In [7]:
for farm in first_emergence_farms:
    count_tbl_name = farm + "_counts"
    gdd_tbl_name = farm + "_gdd"

    count_tbl_name = scm_counts[scm_counts["site"] == farm]
    gdd_tbl_name = gdd.loc[gdd["record_id"] == farm]

    pred_avg = gdd_tbl_name.loc[gdd_tbl_name["gdd_fahrenheit_simple_cumsum"] >= 301][
        "date"
    ].values[0]
    pred_sin = gdd_tbl_name.loc[gdd_tbl_name["gdd_fahrenheit_sine_cum_sum"] >= 222][
        "date"
    ].values[0]
    actual = count_tbl_name.loc[
        (count_tbl_name["n_scm_i_m"] >= 1)
        | (count_tbl_name["n_scm_o_m"] >= 1)
        | (count_tbl_name["n_scm_o_f"] >= 1)
        | (count_tbl_name["n_scm_o_f"] >= 1)
    ]["date"].values[0]

    print("The actual first emergence date at " + farm + " is " + actual)

    print(
        "The predicted first emergence date using the simple average model at "
        + farm
        + " is "
        + pred_avg
    )
    print(
        "The predicted date using the simple average model was "
        + str(
            (
                datetime.strptime(pred_avg, "%Y-%m-%d").date()
                - datetime.strptime(actual, "%Y-%m-%d").date()
            ).days
        )
        + " days late"
    )

    print(
        "The predicted first emergence date using the sine wave model at "
        + farm
        + " is "
        + pred_sin
    )
    print(
        "The predicted date using the sine wave model was "
        + str(
            (
                datetime.strptime(pred_sin, "%Y-%m-%d").date()
                - datetime.strptime(actual, "%Y-%m-%d").date()
            ).days
        )
        + " days late"
    )

    print(" ")


The actual first emergence date at DIP_CUR is 2023-04-06
The predicted first emergence date using the simple average model at DIP_CUR is 2023-04-23
The predicted date using the simple average model was 17 days late
The predicted first emergence date using the sine wave model at DIP_CUR is 2023-04-14
The predicted date using the sine wave model was 8 days late
 
The actual first emergence date at DIP_FLE is 2023-03-28
The predicted first emergence date using the simple average model at DIP_FLE is 2023-04-21
The predicted date using the simple average model was 24 days late
The predicted first emergence date using the sine wave model at DIP_FLE is 2023-04-12
The predicted date using the sine wave model was 15 days late
 
The actual first emergence date at POV_DUN is 2023-03-28
The predicted first emergence date using the simple average model at POV_DUN is 2023-04-22
The predicted date using the simple average model was 25 days late
The predicted first emergence date using the sine wave m

## New GDD Thresholds

Based on average cumulative GDD for four sites with first emergence dates. 

Simple Average Model - GDD Average for Actual Emergence

In [8]:
np.mean([54, 62.5, 94, 58.5])


67.25

Sine Wave Model - GDD Average for Actual Emergence

In [9]:
np.mean([109.4494181, 118.5207495, 147.4329546, 130.9217006])


126.5812057