In [490]:
import pandas as pd
import numpy as np
from skimpy import clean_columns
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime


- [Guide for Generating PDFs](https://towardsdatascience.com/how-to-create-pdf-reports-with-python-the-essential-guide-c08dd3ebf2ee)  
- [Dynamic PDF Generation](https://advicement.io/dynamic-documents-api/documentation/getting-started)

### Climate Data

In [491]:
climate = pd.read_csv("Data/2023_scm_climate.csv")
climate = clean_columns(climate)
climate["date"] = pd.to_datetime(climate["date"], format="%m/%d/%y")
climate["week"] = climate["date"].dt.strftime("%U").astype(int)
climate


Unnamed: 0,record_id,longitude,latitude,elevation,date,rain_inches,min_temp_f,ave_temp_f,max_temp_f,week
0,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-01,0.12,36.6,43.8,51.1,1
1,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-02,0.00,36.0,38.8,41.5,1
2,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-03,0.00,31.5,39.4,47.2,1
3,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-04,0.53,36.7,44.9,53.1,1
4,DEG_DIE_HILL,-76.1495,42.6025,1339,2023-01-05,0.35,37.5,46.9,56.2,1
...,...,...,...,...,...,...,...,...,...,...
10131,ZUE_REE,-76.9833,42.7834,669,2023-06-26,0.06,66.3,74.2,82.2,26
10132,ZUE_REE,-76.9833,42.7834,669,2023-06-27,1.24,63.4,72.7,82.1,26
10133,ZUE_REE,-76.9833,42.7834,669,2023-06-28,0.10,60.4,70.2,80.0,26
10134,ZUE_REE,-76.9833,42.7834,669,2023-06-29,0.01,58.7,62.8,66.9,26


### Weekly Climate

In [492]:
weekly_climate = (
    climate[
        ["record_id", "week", "rain_inches", "min_temp_f", "ave_temp_f", "max_temp_f"]
    ]
    .groupby(["record_id", "week"])
    .mean()
)
weekly_climate = weekly_climate.reset_index()
weekly_climate = clean_columns(weekly_climate)
weekly_climate["next_week"] = weekly_climate["week"] + 1
weekly_climate


Unnamed: 0,record_id,week,rain_inches,min_temp_f,ave_temp_f,max_temp_f,next_week
0,DEG_DIE_HILL,1,0.154286,34.642857,41.114286,47.557143,2
1,DEG_DIE_HILL,2,0.112857,23.842857,29.471429,35.042857,3
2,DEG_DIE_HILL,3,0.177143,22.714286,27.600000,32.457143,4
3,DEG_DIE_HILL,4,0.111429,24.714286,28.757143,32.828571,5
4,DEG_DIE_HILL,5,0.020000,9.642857,18.814286,27.971429,6
...,...,...,...,...,...,...,...
1451,ZUE_REE,22,0.000000,54.914286,68.785714,82.628571,23
1452,ZUE_REE,23,0.024286,50.285714,58.514286,66.771429,24
1453,ZUE_REE,24,0.410000,56.385714,63.528571,70.671429,25
1454,ZUE_REE,25,0.024286,59.000000,67.857143,76.728571,26


### Landscape Data

In [493]:
landscape_500 = pd.read_csv("Landscape/2023_scm_cdl_500m.csv")
landscape_500["Prop_Corn"] = landscape_500["Corn"] / landscape_500["Total"]
landscape_500 = clean_columns(landscape_500).drop("unnamed_0", axis=1)
landscape_500


Unnamed: 0,site_id,corn,open_water,sweet_corn,developed_open_space,developed_low_intensity,developed_med_intensity,developed_high_intensity,barren,deciduous_forest,...,ag_prop,nat,nat_prop,semi_nat,semi_nat_prop,dev,dev_prop,other,other_prop,prop_corn
0,POV_DUN,6,0,0,62,11,0,0,0,230,...,0.00951,3,0.002195,289,0.211412,0,0.0,0.0,0.0,0.004389
1,DIP_FLE,386,0,0,21,22,5,0,0,135,...,0.365764,1,0.000732,146,0.106803,0,0.0,0.0,0.0,0.28237
2,DIP_CUR,292,0,0,55,21,3,0,0,87,...,0.338203,0,0.0,140,0.102264,0,0.0,0.0,0.0,0.213294
3,GAB_STE,136,0,0,14,19,12,5,3,195,...,0.144217,3,0.002196,113,0.082723,3,0.002196,0.0,0.0,0.099561
4,MIL_YOD_1,158,0,0,0,0,0,0,0,331,...,0.326754,1,0.000731,53,0.038743,0,0.0,0.0,0.0,0.115497
5,MIL_CHA_1,430,0,0,67,10,9,0,0,50,...,0.475842,1,0.000732,67,0.049048,0,0.0,0.0,0.0,0.314788
6,MIL_MCC_1,293,0,0,3,1,0,0,0,193,...,0.36896,0,0.0,37,0.027086,0,0.0,0.0,0.0,0.214495
7,KAT_SAU_1,152,0,0,28,7,4,1,0,205,...,0.20365,0,0.0,93,0.067883,0,0.0,0.0,0.0,0.110949
8,KAT_SAU_2,172,0,0,54,7,3,0,0,67,...,0.141082,1,0.000731,132,0.096491,0,0.0,0.0,0.0,0.125731
9,KAT_TEE_1,201,0,0,54,11,5,0,0,282,...,0.158279,0,0.0,114,0.083151,0,0.0,0.0,0.0,0.146608


In [494]:
landscape_1000 = pd.read_csv("Landscape/2023_scm_cdl_1000m.csv")
landscape_1000["Prop_Corn"] = landscape_1000["Corn"] / landscape_1000["Total"]
landscape_1000 = clean_columns(landscape_1000).drop("unnamed_0", axis=1)
landscape_1000


Unnamed: 0,site_id,corn,open_water,sweet_corn,developed_open_space,developed_low_intensity,developed_med_intensity,developed_high_intensity,barren,deciduous_forest,...,ag_prop,nat,nat_prop,semi_nat,semi_nat_prop,dev,dev_prop,other,other_prop,prop_corn
0,POV_DUN,10,4,0,317,105,26,5,6,972,...,0.018784,14,0.003131,915,0.204606,6,0.001342,0.0,0.0,0.002236
1,DIP_FLE,1080,2,14,45,82,12,3,0,442,...,0.468414,3,0.000672,402,0.090054,0,0.0,0.0,0.0,0.241935
2,DIP_CUR,1205,0,0,149,66,35,18,2,488,...,0.403988,6,0.001344,484,0.108447,2,0.000448,0.0,0.0,0.269998
3,GAB_STE,359,0,0,42,39,16,5,18,1321,...,0.151814,22,0.004926,413,0.092476,18,0.00403,0.0,0.0,0.080385
4,MIL_YOD_1,745,0,0,131,17,6,1,1,953,...,0.349473,4,0.000897,627,0.140551,1,0.000224,0.0,0.0,0.167003
5,MIL_CHA_1,1480,0,1,151,21,22,1,0,534,...,0.521505,1,0.000224,321,0.071909,0,0.0,0.0,0.0,0.331541
6,MIL_MCC_1,891,0,0,127,17,3,0,0,700,...,0.379789,2,0.000448,420,0.094107,0,0.0,0.0,0.0,0.199641
7,KAT_SAU_1,340,0,0,213,33,7,3,1,1155,...,0.113524,2,0.000448,481,0.107703,1,0.000224,0.0,0.0,0.076131
8,KAT_SAU_2,632,1,0,253,33,12,2,1,699,...,0.165062,2,0.000448,572,0.128108,1,0.000224,0.0,0.0,0.141545
9,KAT_TEE_1,565,11,0,183,44,17,1,0,1084,...,0.148869,0,0.0,605,0.135438,0,0.0,0.0,0.0,0.126483


In [495]:
landscape_2000 = pd.read_csv("Landscape/2023_scm_cdl_2000m.csv")
landscape_2000["Prop_Corn"] = landscape_2000["Corn"] / landscape_2000["Total"]
landscape_2000 = clean_columns(landscape_2000).drop("unnamed_0", axis=1)
landscape_2000


Unnamed: 0,site_id,corn,open_water,sweet_corn,developed_open_space,developed_low_intensity,developed_med_intensity,developed_high_intensity,barren,deciduous_forest,...,ag_prop,nat,nat_prop,semi_nat,semi_nat_prop,dev,dev_prop,other,other_prop,prop_corn
0,POV_DUN,88,78,2,1218,407,94,21,123,3831,...,0.032158,153,0.009648,3407,0.214831,123,0.007756,0.0,0.0,0.005549
1,DIP_FLE,3139,3,32,379,543,90,9,5,3040,...,0.35879,24,0.001512,1581,0.099622,5,0.000315,0.0,0.0,0.197795
2,DIP_CUR,3123,3,2,640,282,91,63,6,3298,...,0.261459,30,0.001891,2735,0.172436,6,0.000378,0.0,0.0,0.196898
3,GAB_STE,1697,147,4,209,243,127,18,39,4855,...,0.18258,52,0.003277,2556,0.161089,39,0.002458,0.0,0.0,0.106952
4,MIL_YOD_1,2028,2,0,526,97,45,5,3,3858,...,0.314264,17,0.001071,2828,0.178175,3,0.000189,0.0,0.0,0.127772
5,MIL_CHA_1,4277,0,2,572,118,61,8,4,3123,...,0.475455,10,0.00063,1405,0.088537,4,0.000252,0.0,0.0,0.269519
6,MIL_MCC_1,2122,32,0,691,546,294,58,5,3019,...,0.29671,18,0.001135,2356,0.148512,5,0.000315,0.0,0.0,0.133762
7,KAT_SAU_1,1178,1,0,719,67,22,5,1,5814,...,0.091861,7,0.000441,1869,0.117836,1,6.3e-05,0.0,0.0,0.07427
8,KAT_SAU_2,1894,1,2,722,73,31,5,2,4581,...,0.144568,12,0.000757,2242,0.141353,2,0.000126,0.0,0.0,0.119412
9,KAT_TEE_1,1027,12,0,539,85,48,4,0,6324,...,0.091734,1,6.3e-05,1520,0.095833,0,0.0,0.0,0.0,0.06475


### Soil

In [496]:
soil = pd.read_csv("Data/2023_scm_soil.csv")
soil = clean_columns(soil)
soil


Unnamed: 0,record_id,ny_soils_0,ny_soils_01,muid,hsg,hsgint,muserial,hsga,hsgb,hsgc,hsgd,hsgw
0,POV_DUN,2404,4346,NY134,A,1,6801,58,27,0,15,0
1,DIP_FLE,2190,4201,NY131,A,1,6798,0,14,63,23,0
2,DIP_CUR,2085,5669,NY134,A,1,6801,58,27,0,15,0
3,GAB_STE,1376,5311,NY084,A,1,6752,62,23,13,2,0
4,SMI_CRO,1779,5454,NY131,A,1,6798,0,14,63,23,0
5,SMI_CAN,1511,5436,NY128,B,2,6795,0,90,6,4,0
6,SMI_KEL,1659,5444,NY007,C,3,6681,0,40,48,12,0
7,SMI_COB,2029,3921,NY099,A,1,6767,54,28,6,12,0
8,SMI_DOU,1349,3760,NY007,C,3,6681,0,40,48,12,0
9,SMI_MCC,1401,3769,NY142,D,4,6809,0,34,31,35,0


### Management

In [497]:
management = pd.read_csv("Data/2023_management.csv")
management = clean_columns(management)
management


Unnamed: 0,field_id,cover_crop,manure
0,POV_DUN,No,No
1,DIP_FLE,Yes,No
2,DIP_CUR,Yes,No
3,GAB_STE,No,Yes
4,SMI_CRO,No,No
5,SMI_CAN,No,Yes
6,SMI_KEL,Yes,Yes
7,SMI_COB,Yes,No
8,SMI_DOU,No,Yes
9,SMI_MCC,No,No


### Planting Date

In [498]:
planting_date = pd.read_csv("Data/2023_scm_planting_dates.csv")
planting_date = clean_columns(planting_date)
planting_date["planting_date"] = pd.to_datetime(
    planting_date["planting_date"], format="%m/%d/%y"
)
planting_date = planting_date.drop_duplicates("field_id")
planting_date["start_week"] = planting_date["planting_date"] - pd.Timedelta(7, unit="d")
planting_date


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  planting_date["start_week"] = planting_date["planting_date"] - pd.Timedelta(7, unit="d")


Unnamed: 0,field_id,planting_date,start_week
0,LET_TAB,2023-05-07,2023-04-30
2,GAB_ALL,2023-05-09,2023-05-02
4,SMI_CRO,2023-05-09,2023-05-02
6,SMI_DOU,2023-05-10,2023-05-03
8,DEG_PER_STRIP,2023-05-10,2023-05-03
10,DIP_PAT,2023-05-11,2023-05-04
12,KAT_TEE_2,2023-05-11,2023-05-04
16,KAT_TEE_1,2023-05-13,2023-05-06
18,SMI_COB,2023-05-13,2023-05-06
20,SMI_JOH_1,2023-05-13,2023-05-06


Average climate data for the week leading up to planting date, per site. 

In [499]:
planting_temps = climate.merge(
    planting_date, left_on=["record_id"], right_on=["field_id"], how="left"
)
planting_temps = planting_temps.dropna()
planting_temps = planting_temps.loc[
    (planting_temps["date"] >= planting_temps["start_week"])
    & (planting_temps["date"] <= planting_temps["planting_date"])
]
planting_temps = planting_temps.drop(
    ["date", "week", "start_week", "longitude", "latitude", "elevation", "field_id"],
    axis=1,
)
planting_temps = planting_temps.groupby("record_id").mean()
# planting_temps.to_csv('Data/planting_date_climate.csv')
planting_temps


Unnamed: 0_level_0,rain_inches,min_temp_f,ave_temp_f,max_temp_f,planting_date
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DEG_FOU_WOOD,0.0,36.75,51.4625,66.1625,2023-05-18
DEG_PER_STRIP,0.0875,36.325,46.5125,56.725,2023-05-10
DEG_VIL,0.03375,39.4375,52.85,66.275,2023-05-15
DIP_CUR,0.0,38.1875,51.75,65.3,2023-05-19
DIP_FLE,0.02875,41.725,54.275,66.7875,2023-05-23
DIP_PAT,0.04625,39.9625,50.825,61.7,2023-05-11
GAB_ALL,0.05,43.275,53.5625,63.85,2023-05-09
GAB_STE,0.03125,43.7,56.375,69.075,2023-05-23
KAT_BAR,0.02125,37.9125,53.3875,68.8625,2023-05-22
KAT_HAB_2,0.0,43.2125,57.7,72.125,2023-05-29


Average climate data for the week leading up to planting date, across all sites. 

In [500]:
planting_temps.mean(numeric_only=True)


rain_inches     0.028468
min_temp_f     40.874597
ave_temp_f     53.856855
max_temp_f     66.839113
dtype: float64

Average climate data for the week leading up to planting date, per site for early planting dates (before 5/15).

In [501]:
early_planting_temps = planting_temps.loc[
    planting_temps["planting_date"] <= "2023-05-15"
]
early_planting_temps


Unnamed: 0_level_0,rain_inches,min_temp_f,ave_temp_f,max_temp_f,planting_date
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DEG_PER_STRIP,0.0875,36.325,46.5125,56.725,2023-05-10
DEG_VIL,0.03375,39.4375,52.85,66.275,2023-05-15
DIP_PAT,0.04625,39.9625,50.825,61.7,2023-05-11
GAB_ALL,0.05,43.275,53.5625,63.85,2023-05-09
KAT_TEE_1,0.01,39.6375,54.0,68.325,2023-05-13
KAT_TEE_2,0.0375,37.175,48.9375,60.675,2023-05-11
LET_TAB,0.145,37.175,45.7375,54.3375,2023-05-07
ONE_STA_1,0.0075,42.2,56.25,70.3125,2023-05-15
SEA_HOO,0.02125,45.8875,57.6125,69.3,2023-05-15
SMI_COB,0.00875,40.1875,54.625,69.0875,2023-05-13


Average climate data for the week leading up to planting date, across all sites for early planting dates (before 5/15).

In [502]:
early_planting_temps.mean(numeric_only=True)


rain_inches     0.044609
min_temp_f     40.720312
ave_temp_f     52.632031
max_temp_f     64.550000
dtype: float64

In [503]:
early_planting_temps["range"] = (
    early_planting_temps["max_temp_f"] - early_planting_temps["min_temp_f"]
)
early_planting_temps


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  early_planting_temps["range"] = (


Unnamed: 0_level_0,rain_inches,min_temp_f,ave_temp_f,max_temp_f,planting_date,range
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DEG_PER_STRIP,0.0875,36.325,46.5125,56.725,2023-05-10,20.4
DEG_VIL,0.03375,39.4375,52.85,66.275,2023-05-15,26.8375
DIP_PAT,0.04625,39.9625,50.825,61.7,2023-05-11,21.7375
GAB_ALL,0.05,43.275,53.5625,63.85,2023-05-09,20.575
KAT_TEE_1,0.01,39.6375,54.0,68.325,2023-05-13,28.6875
KAT_TEE_2,0.0375,37.175,48.9375,60.675,2023-05-11,23.5
LET_TAB,0.145,37.175,45.7375,54.3375,2023-05-07,17.1625
ONE_STA_1,0.0075,42.2,56.25,70.3125,2023-05-15,28.1125
SEA_HOO,0.02125,45.8875,57.6125,69.3,2023-05-15,23.4125
SMI_COB,0.00875,40.1875,54.625,69.0875,2023-05-13,28.9


In [504]:
np.mean(early_planting_temps["range"])


23.829687500000006

### Adult Abundance Count Data

In [505]:
scm_counts = pd.read_csv("Data/2023_scm_counts.csv")
scm_counts = clean_columns(scm_counts)
scm_counts["date"] = pd.to_datetime(scm_counts["date"], format="%m/%d/%y")
scm_counts["week"] = scm_counts["date"].dt.strftime("%U").astype(int)
scm_counts = scm_counts.drop(['initials', 'notes'], axis=1)
scm_counts

Unnamed: 0,card_id,data_collector,site,date,n_scm_i_m,n_scm_i_f,n_scm_o_m,n_scm_o_f,n_d_florilega_i,n_d_florilega_o,week
0,2001,Anna DiPaola,DIP_CUR,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,11
1,2002,Anna DiPaola,DIP_CUR,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,11
2,2003,Anna DiPaola,POV_DUN,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,11
3,2004,Anna DiPaola,POV_DUN,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,11
4,2005,Anna DiPaola,DIP_FLE,2023-03-13,0.0,0.0,0.0,0.0,0.0,0.0,11
...,...,...,...,...,...,...,...,...,...,...,...
421,2422,Roberto Regaldo,WIS_KEN_1,2023-05-05,,,,,,,18
422,2423,Ken Wise,WIS_COO_1,2023-05-03,,,,,,,18
423,2424,Ken Wise,WIS_COO_1,2023-05-03,,,,,,,18
424,2425,Ken Wise,WIS_COO_2,2023-05-03,,,,,,,18


### GDD Data

In [506]:
gdd = pd.read_csv("Data/2023_scm_gdd.csv")
gdd = clean_columns(gdd)
gdd["date"] = pd.to_datetime(gdd["date"], format="%Y-%m-%d")
gdd["week"] = gdd["date"].dt.strftime("%U").astype(int)
gdd


Unnamed: 0,record_id,date,temp_max_f,temp_min_f,temp_mean_f,gdd_fahrenheit_simple,gdd_fahrenheit_sine,gdd_fahrenheit_simple_cumsum,gdd_fahrenheit_sine_cum_sum,temp_max_c,temp_min_c,temp_mean_c,gdd_celsius_simple,gdd_celsius_sine,gdd_celsius_simple_cum_sum,gdd_celsius_sine_cum_sum,week
0,POV_DUN,2023-01-01,51,36,43.5,4.5,5.083043,4.5,5.083043,10.555556,2.222222,6.388889,2.500000,2.823913,2.500000,2.823913,1
1,POV_DUN,2023-01-02,43,37,40.0,1.0,1.509001,5.5,6.592044,6.111111,2.777778,4.444444,0.555556,0.838334,3.055556,3.662247,1
2,POV_DUN,2023-01-03,48,29,38.5,0.0,2.779669,5.5,9.371713,8.888889,-1.666667,3.611111,0.000000,1.544261,3.055556,5.206507,1
3,POV_DUN,2023-01-04,52,38,45.0,6.0,6.115838,11.5,15.487552,11.111111,3.333333,7.222222,3.333333,3.397688,6.388889,8.604195,1
4,POV_DUN,2023-01-05,57,37,47.0,8.0,8.273364,19.5,23.760915,13.888889,2.777778,8.333333,4.444444,4.596313,10.833333,13.200508,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10131,ONE_STA_1,2023-06-26,80,65,72.5,33.5,33.500000,1438.0,1496.852715,26.666667,18.333333,22.500000,18.611111,18.611111,798.888889,831.584842,26
10132,ONE_STA_1,2023-06-27,85,65,75.0,36.0,36.000000,1474.0,1532.852715,29.444444,18.333333,23.888889,20.000000,20.000000,818.888889,851.584842,26
10133,ONE_STA_1,2023-06-28,80,62,71.0,32.0,32.000000,1506.0,1564.852715,26.666667,16.666667,21.666667,17.777778,17.777778,836.666667,869.362619,26
10134,ONE_STA_1,2023-06-29,67,59,63.0,24.0,24.000000,1530.0,1588.852715,19.444444,15.000000,17.222222,13.333333,13.333333,850.000000,882.695953,26


### Wire Mesh Data (Risk)

In [507]:
wire_mesh = pd.read_csv("Data/2023_wire_mesh.csv")
wire_mesh = clean_columns(wire_mesh)
wire_mesh["number_wireworm"] = (
    wire_mesh["number_wireworm"].replace(">10", 10).astype(float)
)
wire_mesh["total_phorid_maggots"] = (
    wire_mesh["total_phorid_maggots"].replace(">10", 10).astype(float)
)
wire_mesh["total_phorid_pupae"] = (
    wire_mesh["total_phorid_pupae"].replace(">10", 10).astype(float)
)

wire_mesh


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes
0,1.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,1.0,1.0,
1,2.0,3011.0,R,Loose,,,,0.0,0.0,0.0,,1.0,2.0,
2,3.0,3011.0,R,Total,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,
3,4.0,3009.0,R,Corn,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,5.0,3009.0,R,Loose,,,,0.0,0.0,0.0,,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5446,5447.0,,,Loose,,,,,,,,,,
5447,5448.0,,,Total,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
5448,5449.0,,,Bean,,,,,,,,,,
5449,5450.0,,,Loose,,,,,,,,,,


### Wire Mesh Intake

Contains mapping for container ID and field ID. 

In [508]:
wire_mesh_intake = pd.read_csv("Data/2023_wire_mesh_intake.csv")
wire_mesh_intake = clean_columns(wire_mesh_intake)
wire_mesh_intake = wire_mesh_intake.loc[wire_mesh_intake["project"] == "R"]
wire_mesh_intake


Unnamed: 0,container_number,container_type,project,record_id,collection_date,notes
0,3001,deli cup,R,POV_DUN,4/18/23,
1,3002,deli cup,R,POV_DUN,4/18/23,
2,3003,deli cup,R,DIP_CUR,4/18/23,
3,3004,deli cup,R,DIP_CUR,4/18/23,
4,3005,deli cup,R,DIP_PAT,4/18/23,
...,...,...,...,...,...,...
2135,5134,sushi container,R,SMI_CAN,6/12/23,
2136,5135,sushi container,R,SMI_KEL,6/27/23,
2137,5136,sushi container,R,SMI_KEL,6/27/23,
2138,5137,sushi container,R,SMI_KEL,6/27/23,


In [509]:
wire_mesh = wire_mesh.merge(
    wire_mesh_intake[
        ["container_number", "container_type", "collection_date", "record_id"]
    ],
    on="container_number",
    how="left",
)
wire_mesh


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id
0,1.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,1.0,1.0,,deli cup,4/17/23,GAB_ALL
1,2.0,3011.0,R,Loose,,,,0.0,0.0,0.0,,1.0,2.0,,deli cup,4/17/23,GAB_ALL
2,3.0,3011.0,R,Total,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,4/17/23,GAB_ALL
3,4.0,3009.0,R,Corn,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/17/23,GAB_STE
4,5.0,3009.0,R,Loose,,,,0.0,0.0,0.0,,0.0,0.0,,deli cup,4/17/23,GAB_STE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5455,5447.0,,,Loose,,,,,,,,,,,,,
5456,5448.0,,,Total,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5457,5449.0,,,Bean,,,,,,,,,,,,,
5458,5450.0,,,Loose,,,,,,,,,,,,,


In [510]:
bean_containers = wire_mesh[wire_mesh["seed_type"] == "Bean"]
bean_containers = bean_containers["container_number"]
bean_containers


0       3011.0
6       3068.0
12      3010.0
18      3003.0
27      3042.0
         ...  
5445       NaN
5448       NaN
5451       NaN
5454       NaN
5457       NaN
Name: container_number, Length: 1048, dtype: float64

In [511]:
corn_containers = wire_mesh[wire_mesh["seed_type"] == "Corn"]
corn_containers = corn_containers["container_number"]
corn_containers


3       3009.0
9       3067.0
15      3012.0
21      3005.0
24      3004.0
         ...  
5427    5066.0
5430    4757.0
5433    4844.0
5436    5041.0
5439    5110.0
Name: container_number, Length: 772, dtype: float64

In [512]:
bean_data = wire_mesh[wire_mesh["container_number"].isin(bean_containers)]
bean_data = wire_mesh.loc[wire_mesh["seed_type"] == "Total"]
bean_data.loc[:, "seed_type"] = "Bean"
bean_data


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id
2,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,4/17/23,GAB_ALL
5,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/17/23,GAB_STE
8,9.0,3068.0,R,Bean,8.0,0.0,0.0,0.0,5.0,1.0,2.0,7.0,9.0,,deli cup,4/18/23,SMI_JOH_2
11,12.0,3067.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/18/23,SMI_JOH_2
14,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,19.0,,deli cup,4/17/23,GAB_STE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5447,5439.0,,,Bean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5450,5442.0,,,Bean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5453,5445.0,,,Bean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5456,5448.0,,,Bean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,


In [513]:
corn_data = wire_mesh[wire_mesh["container_number"].isin(corn_containers)]
corn_data = wire_mesh.loc[wire_mesh["seed_type"] == "Total"]
corn_data.loc[:, "seed_type"] = "Corn"
corn_data


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id
2,3.0,3011.0,R,Corn,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,4/17/23,GAB_ALL
5,6.0,3009.0,R,Corn,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/17/23,GAB_STE
8,9.0,3068.0,R,Corn,8.0,0.0,0.0,0.0,5.0,1.0,2.0,7.0,9.0,,deli cup,4/18/23,SMI_JOH_2
11,12.0,3067.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,4/18/23,SMI_JOH_2
14,15.0,3010.0,R,Corn,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,19.0,,deli cup,4/17/23,GAB_STE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5447,5439.0,,,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5450,5442.0,,,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5453,5445.0,,,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
5456,5448.0,,,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,


In [514]:
wire_mesh = pd.concat([bean_data, corn_data], axis=0)
wire_mesh = wire_mesh.loc[wire_mesh["project"] == "R"]
wire_mesh["collection_date"] = pd.to_datetime(
    wire_mesh["collection_date"], format="%m/%d/%y"
)
# wire_mesh["week"] = wire_mesh["collection_date"].dt.strftime("%U").astype(int)
wire_mesh


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id
2,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,2023-04-17,GAB_ALL
5,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-04-17,GAB_STE
8,9.0,3068.0,R,Bean,8.0,0.0,0.0,0.0,5.0,1.0,2.0,7.0,9.0,,deli cup,2023-04-18,SMI_JOH_2
11,12.0,3067.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-04-18,SMI_JOH_2
14,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,19.0,,deli cup,2023-04-17,GAB_STE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,,deli cup,2023-06-06,DIP_FLE
5237,5229.0,5034.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-06-06,DIP_FLE
5411,5403.0,4214.0,R,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-05-19,WIS_KEN
5441,5433.0,5110.0,R,Corn,10.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,deli cup,2023-06-12,SMI_CAN


In [515]:
wire_mesh = wire_mesh.merge(
    management, left_on="record_id", right_on="field_id", how="left"
)
wire_mesh


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,phorid_seeds,total_phorid_maggots,total_phorid_pupae,notes,container_type,collection_date,record_id,field_id,cover_crop,manure
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,1.0,2.0,3.0,,deli cup,2023-04-17,GAB_ALL,GAB_ALL,No,Yes
1,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-04-17,GAB_STE,GAB_STE,No,Yes
2,9.0,3068.0,R,Bean,8.0,0.0,0.0,0.0,5.0,1.0,2.0,7.0,9.0,,deli cup,2023-04-18,SMI_JOH_2,SMI_JOH_2,Yes,No
3,12.0,3067.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-04-18,SMI_JOH_2,SMI_JOH_2,Yes,No
4,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,19.0,,deli cup,2023-04-17,GAB_STE,GAB_STE,No,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,,deli cup,2023-06-06,DIP_FLE,DIP_FLE,Yes,No
566,5229.0,5034.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-06-06,DIP_FLE,DIP_FLE,Yes,No
567,5403.0,4214.0,R,Corn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,deli cup,2023-05-19,WIS_KEN,WIS_KEN,Yes,No
568,5433.0,5110.0,R,Corn,10.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,deli cup,2023-06-12,SMI_CAN,SMI_CAN,No,Yes


In [516]:
wire_mesh = wire_mesh.merge(soil, on="record_id", how="left")
wire_mesh


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,ny_soils_01,muid,hsg,hsgint,muserial,hsga,hsgb,hsgc,hsgd,hsgw
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,5295.0,NY138,C,3.0,6805.0,0.0,11.0,63.0,26.0,0.0
1,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,5311.0,NY084,A,1.0,6752.0,62.0,23.0,13.0,2.0,0.0
2,9.0,3068.0,R,Bean,8.0,0.0,0.0,0.0,5.0,1.0,...,5478.0,NY142,D,4.0,6809.0,0.0,34.0,31.0,35.0,0.0
3,12.0,3067.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,...,5478.0,NY142,D,4.0,6809.0,0.0,34.0,31.0,35.0,0.0
4,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,5311.0,NY084,A,1.0,6752.0,62.0,23.0,13.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,...,4201.0,NY131,A,1.0,6798.0,0.0,14.0,63.0,23.0,0.0
566,5229.0,5034.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,4201.0,NY131,A,1.0,6798.0,0.0,14.0,63.0,23.0,0.0
567,5403.0,4214.0,R,Corn,0.0,0.0,0.0,0.0,0.0,0.0,...,5799.0,NY062,C,3.0,6734.0,6.0,0.0,90.0,4.0,0.0
568,5433.0,5110.0,R,Corn,10.0,0.0,0.0,0.0,0.0,1.0,...,5436.0,NY128,B,2.0,6795.0,0.0,90.0,6.0,4.0,0.0


In [517]:
wire_mesh["one_week"] = wire_mesh["collection_date"] - pd.Timedelta(7, unit="d")
wire_mesh["two_weeks"] = wire_mesh["collection_date"] - pd.Timedelta(14, unit="d")
wire_mesh


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,hsg,hsgint,muserial,hsga,hsgb,hsgc,hsgd,hsgw,one_week,two_weeks
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,C,3.0,6805.0,0.0,11.0,63.0,26.0,0.0,2023-04-10,2023-04-03
1,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,A,1.0,6752.0,62.0,23.0,13.0,2.0,0.0,2023-04-10,2023-04-03
2,9.0,3068.0,R,Bean,8.0,0.0,0.0,0.0,5.0,1.0,...,D,4.0,6809.0,0.0,34.0,31.0,35.0,0.0,2023-04-11,2023-04-04
3,12.0,3067.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,...,D,4.0,6809.0,0.0,34.0,31.0,35.0,0.0,2023-04-11,2023-04-04
4,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,A,1.0,6752.0,62.0,23.0,13.0,2.0,0.0,2023-04-10,2023-04-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,...,A,1.0,6798.0,0.0,14.0,63.0,23.0,0.0,2023-05-30,2023-05-23
566,5229.0,5034.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,A,1.0,6798.0,0.0,14.0,63.0,23.0,0.0,2023-05-30,2023-05-23
567,5403.0,4214.0,R,Corn,0.0,0.0,0.0,0.0,0.0,0.0,...,C,3.0,6734.0,6.0,0.0,90.0,4.0,0.0,2023-05-12,2023-05-05
568,5433.0,5110.0,R,Corn,10.0,0.0,0.0,0.0,0.0,1.0,...,B,2.0,6795.0,0.0,90.0,6.0,4.0,0.0,2023-06-05,2023-05-29


In [518]:
wire_mesh_dates = wire_mesh[["record_id", "one_week", "two_weeks"]]
wire_mesh_dates


Unnamed: 0,record_id,one_week,two_weeks
0,GAB_ALL,2023-04-10,2023-04-03
1,GAB_STE,2023-04-10,2023-04-03
2,SMI_JOH_2,2023-04-11,2023-04-04
3,SMI_JOH_2,2023-04-11,2023-04-04
4,GAB_STE,2023-04-10,2023-04-03
...,...,...,...
565,DIP_FLE,2023-05-30,2023-05-23
566,DIP_FLE,2023-05-30,2023-05-23
567,WIS_KEN,2023-05-12,2023-05-05
568,SMI_CAN,2023-06-05,2023-05-29


In [519]:
df = climate.merge(
    wire_mesh, left_on=["record_id"], right_on=["record_id"], how="left"
)[
    [
        "record_id",
        "date",
        "rain_inches",
        "min_temp_f",
        "ave_temp_f",
        "max_temp_f",
        "one_week",
        "two_weeks",
        "collection_date",
        'week'
    ]
]
df = df.dropna()
df = df.loc[(df['date'] >= df['one_week']) & (df['date'] <= df['collection_date'])].groupby(['record_id', 'collection_date'], as_index = False).mean(['rain_inches', 'min_temp_f', 'ave_temp_f', 'max_temp_f'])
df = df.rename(columns={"rain_inches": "rain_inches_one_week", "min_temp_f": "min_temp_f_one_week", "ave_temp_f": "ave_temp_f_one_week", "max_temp_f": "max_temp_f_one_week"})
wire_mesh = wire_mesh.merge(df, left_on=['record_id', 'collection_date'], right_on=['record_id', 'collection_date'])
wire_mesh


Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,hsgc,hsgd,hsgw,one_week,two_weeks,rain_inches_one_week,min_temp_f_one_week,ave_temp_f_one_week,max_temp_f_one_week,week
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,63.0,26.0,0.0,2023-04-10,2023-04-03,0.01000,48.5000,62.6875,76.9000,15.250
1,18.0,3012.0,R,Bean,10.0,0.0,2.0,0.0,0.0,1.0,...,63.0,26.0,0.0,2023-04-10,2023-04-03,0.01000,48.5000,62.6875,76.9000,15.250
2,3.0,3011.0,R,Corn,8.0,1.0,2.0,1.0,2.0,0.0,...,63.0,26.0,0.0,2023-04-10,2023-04-03,0.01000,48.5000,62.6875,76.9000,15.250
3,18.0,3012.0,R,Corn,10.0,0.0,2.0,0.0,0.0,1.0,...,63.0,26.0,0.0,2023-04-10,2023-04-03,0.01000,48.5000,62.6875,76.9000,15.250
4,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,13.0,2.0,0.0,2023-04-10,2023-04-03,0.00250,47.0250,61.6500,76.2875,15.250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459,5043.0,5117.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,63.0,23.0,0.0,2023-06-05,2023-05-29,0.04125,49.6500,59.8250,70.0000,23.250
460,5184.0,5033.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,...,63.0,23.0,0.0,2023-05-30,2023-05-23,0.00625,52.2875,65.9875,79.6875,22.375
461,5229.0,5034.0,R,Bean,10.0,0.0,0.0,0.0,0.0,0.0,...,63.0,23.0,0.0,2023-05-30,2023-05-23,0.00625,52.2875,65.9875,79.6875,22.375
462,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,...,63.0,23.0,0.0,2023-05-30,2023-05-23,0.00625,52.2875,65.9875,79.6875,22.375


In [520]:
df = climate.merge(
    wire_mesh, left_on=["record_id"], right_on=["record_id"], how="left"
)[
    [
        "record_id",
        "date",
        "rain_inches",
        "min_temp_f",
        "ave_temp_f",
        "max_temp_f",
        "one_week",
        "two_weeks",
        "collection_date",
    ]
]
df = df.dropna()
df = df.loc[(df['date'] >= df['two_weeks']) & (df['date'] <= df['collection_date'])].groupby(['record_id', 'collection_date'], as_index = False).mean(['rain_inches', 'min_temp_f', 'ave_temp_f', 'max_temp_f'])
df = df.rename(columns={"rain_inches": "rain_inches_two_weeks", "min_temp_f": "min_temp_f_two_weeks", "ave_temp_f": "ave_temp_f_two_weeks", "max_temp_f": "max_temp_f_two_weeks"})
wire_mesh = wire_mesh.merge(df, left_on=['record_id', 'collection_date'], right_on=['record_id', 'collection_date'])
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,two_weeks,rain_inches_one_week,min_temp_f_one_week,ave_temp_f_one_week,max_temp_f_one_week,week,rain_inches_two_weeks,min_temp_f_two_weeks,ave_temp_f_two_weeks,max_temp_f_two_weeks
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,2023-04-03,0.01000,48.5000,62.6875,76.9000,15.250,0.036000,42.300000,54.346667,66.400000
1,18.0,3012.0,R,Bean,10.0,0.0,2.0,0.0,0.0,1.0,...,2023-04-03,0.01000,48.5000,62.6875,76.9000,15.250,0.036000,42.300000,54.346667,66.400000
2,3.0,3011.0,R,Corn,8.0,1.0,2.0,1.0,2.0,0.0,...,2023-04-03,0.01000,48.5000,62.6875,76.9000,15.250,0.036000,42.300000,54.346667,66.400000
3,18.0,3012.0,R,Corn,10.0,0.0,2.0,0.0,0.0,1.0,...,2023-04-03,0.01000,48.5000,62.6875,76.9000,15.250,0.036000,42.300000,54.346667,66.400000
4,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,2023-04-03,0.00250,47.0250,61.6500,76.2875,15.250,0.039333,40.573333,53.080000,65.600000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459,5043.0,5117.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,2023-05-29,0.04125,49.6500,59.8250,70.0000,23.250,0.030000,51.073333,63.846667,76.633333
460,5184.0,5033.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,...,2023-05-23,0.00625,52.2875,65.9875,79.6875,22.375,0.003333,47.646667,61.553333,75.466667
461,5229.0,5034.0,R,Bean,10.0,0.0,0.0,0.0,0.0,0.0,...,2023-05-23,0.00625,52.2875,65.9875,79.6875,22.375,0.003333,47.646667,61.553333,75.466667
462,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,...,2023-05-23,0.00625,52.2875,65.9875,79.6875,22.375,0.003333,47.646667,61.553333,75.466667


In [521]:
wire_mesh = wire_mesh.merge(gdd[['record_id', 'gdd_fahrenheit_simple_cumsum', 'gdd_fahrenheit_sine_cum_sum', 'date']], left_on=['record_id', 'collection_date'], right_on=['record_id', 'date'], how='left')
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,ave_temp_f_one_week,max_temp_f_one_week,week,rain_inches_two_weeks,min_temp_f_two_weeks,ave_temp_f_two_weeks,max_temp_f_two_weeks,gdd_fahrenheit_simple_cumsum,gdd_fahrenheit_sine_cum_sum,date
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,62.6875,76.9000,15.250,0.036000,42.300000,54.346667,66.400000,335.0,428.290955,2023-04-17
1,18.0,3012.0,R,Bean,10.0,0.0,2.0,0.0,0.0,1.0,...,62.6875,76.9000,15.250,0.036000,42.300000,54.346667,66.400000,335.0,428.290955,2023-04-17
2,3.0,3011.0,R,Corn,8.0,1.0,2.0,1.0,2.0,0.0,...,62.6875,76.9000,15.250,0.036000,42.300000,54.346667,66.400000,335.0,428.290955,2023-04-17
3,18.0,3012.0,R,Corn,10.0,0.0,2.0,0.0,0.0,1.0,...,62.6875,76.9000,15.250,0.036000,42.300000,54.346667,66.400000,335.0,428.290955,2023-04-17
4,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,61.6500,76.2875,15.250,0.039333,40.573333,53.080000,65.600000,293.0,388.979519,2023-04-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459,5043.0,5117.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,59.8250,70.0000,23.250,0.030000,51.073333,63.846667,76.633333,1277.0,1364.873638,2023-06-12
460,5184.0,5033.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,...,65.9875,79.6875,22.375,0.003333,47.646667,61.553333,75.466667,1029.5,1116.484498,2023-06-06
461,5229.0,5034.0,R,Bean,10.0,0.0,0.0,0.0,0.0,0.0,...,65.9875,79.6875,22.375,0.003333,47.646667,61.553333,75.466667,1029.5,1116.484498,2023-06-06
462,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,...,65.9875,79.6875,22.375,0.003333,47.646667,61.553333,75.466667,1029.5,1116.484498,2023-06-06


In [522]:
wire_mesh['week'] = wire_mesh['week'].astype('int')
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,ave_temp_f_one_week,max_temp_f_one_week,week,rain_inches_two_weeks,min_temp_f_two_weeks,ave_temp_f_two_weeks,max_temp_f_two_weeks,gdd_fahrenheit_simple_cumsum,gdd_fahrenheit_sine_cum_sum,date
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,62.6875,76.9000,15,0.036000,42.300000,54.346667,66.400000,335.0,428.290955,2023-04-17
1,18.0,3012.0,R,Bean,10.0,0.0,2.0,0.0,0.0,1.0,...,62.6875,76.9000,15,0.036000,42.300000,54.346667,66.400000,335.0,428.290955,2023-04-17
2,3.0,3011.0,R,Corn,8.0,1.0,2.0,1.0,2.0,0.0,...,62.6875,76.9000,15,0.036000,42.300000,54.346667,66.400000,335.0,428.290955,2023-04-17
3,18.0,3012.0,R,Corn,10.0,0.0,2.0,0.0,0.0,1.0,...,62.6875,76.9000,15,0.036000,42.300000,54.346667,66.400000,335.0,428.290955,2023-04-17
4,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,61.6500,76.2875,15,0.039333,40.573333,53.080000,65.600000,293.0,388.979519,2023-04-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459,5043.0,5117.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,59.8250,70.0000,23,0.030000,51.073333,63.846667,76.633333,1277.0,1364.873638,2023-06-12
460,5184.0,5033.0,R,Bean,10.0,0.0,3.0,0.0,0.0,0.0,...,65.9875,79.6875,22,0.003333,47.646667,61.553333,75.466667,1029.5,1116.484498,2023-06-06
461,5229.0,5034.0,R,Bean,10.0,0.0,0.0,0.0,0.0,0.0,...,65.9875,79.6875,22,0.003333,47.646667,61.553333,75.466667,1029.5,1116.484498,2023-06-06
462,5184.0,5033.0,R,Corn,10.0,0.0,3.0,0.0,0.0,0.0,...,65.9875,79.6875,22,0.003333,47.646667,61.553333,75.466667,1029.5,1116.484498,2023-06-06


In [523]:
wire_mesh = wire_mesh.merge(scm_counts, left_on=['field_id', 'week'], right_on=['site', 'week'], how='outer', indicator=True)
wire_mesh = wire_mesh[wire_mesh['_merge'] == 'both']
wire_mesh = wire_mesh.drop(columns=['date_y', '_merge'])
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,date_x,card_id,data_collector,site,n_scm_i_m,n_scm_i_f,n_scm_o_m,n_scm_o_f,n_d_florilega_i,n_d_florilega_o
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,2023-04-17,2069.0,Aaron Gabriel,GAB_ALL,,,,,,
1,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,2023-04-17,2070.0,Aaron Gabriel,GAB_ALL,19.0,63.0,17.0,42.0,0.0,0.0
2,18.0,3012.0,R,Bean,10.0,0.0,2.0,0.0,0.0,1.0,...,2023-04-17,2069.0,Aaron Gabriel,GAB_ALL,,,,,,
3,18.0,3012.0,R,Bean,10.0,0.0,2.0,0.0,0.0,1.0,...,2023-04-17,2070.0,Aaron Gabriel,GAB_ALL,19.0,63.0,17.0,42.0,0.0,0.0
4,3.0,3011.0,R,Corn,8.0,1.0,2.0,1.0,2.0,0.0,...,2023-04-17,2069.0,Aaron Gabriel,GAB_ALL,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,2952.0,4100.0,R,Bean,10.0,0.0,0.0,0.0,0.0,0.0,...,2023-05-16,2386.0,Jodi Letham,LET_NOB,,,,,,
538,2949.0,4102.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,2023-05-16,2385.0,Jodi Letham,LET_NOB,,,,,,
539,2949.0,4102.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,2023-05-16,2386.0,Jodi Letham,LET_NOB,,,,,,
540,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,2023-05-16,2385.0,Jodi Letham,LET_NOB,,,,,,


In [524]:
wire_mesh['last_week'] = wire_mesh['week'] - 1
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,card_id,data_collector,site,n_scm_i_m,n_scm_i_f,n_scm_o_m,n_scm_o_f,n_d_florilega_i,n_d_florilega_o,last_week
0,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,2069.0,Aaron Gabriel,GAB_ALL,,,,,,,14
1,3.0,3011.0,R,Bean,8.0,1.0,2.0,1.0,2.0,0.0,...,2070.0,Aaron Gabriel,GAB_ALL,19.0,63.0,17.0,42.0,0.0,0.0,14
2,18.0,3012.0,R,Bean,10.0,0.0,2.0,0.0,0.0,1.0,...,2069.0,Aaron Gabriel,GAB_ALL,,,,,,,14
3,18.0,3012.0,R,Bean,10.0,0.0,2.0,0.0,0.0,1.0,...,2070.0,Aaron Gabriel,GAB_ALL,19.0,63.0,17.0,42.0,0.0,0.0,14
4,3.0,3011.0,R,Corn,8.0,1.0,2.0,1.0,2.0,0.0,...,2069.0,Aaron Gabriel,GAB_ALL,,,,,,,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,2952.0,4100.0,R,Bean,10.0,0.0,0.0,0.0,0.0,0.0,...,2386.0,Jodi Letham,LET_NOB,,,,,,,18
538,2949.0,4102.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,2385.0,Jodi Letham,LET_NOB,,,,,,,18
539,2949.0,4102.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,2386.0,Jodi Letham,LET_NOB,,,,,,,18
540,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,2385.0,Jodi Letham,LET_NOB,,,,,,,18


In [525]:
wire_mesh = wire_mesh.merge(scm_counts, left_on=['field_id', 'last_week'], right_on=['site', 'week'], how='outer', indicator=True)
wire_mesh = wire_mesh[wire_mesh['_merge'] == 'both']
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,site_y,date,n_scm_i_m_y,n_scm_i_f_y,n_scm_o_m_y,n_scm_o_f_y,n_d_florilega_i_y,n_d_florilega_o_y,week_y,_merge
8,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
9,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,17.0,5.0,20.0,8.0,0.0,0.0,14.0,both
10,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
11,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,17.0,5.0,20.0,8.0,0.0,0.0,14.0,both
12,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647,2949.0,4102.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,11.0,5.0,4.0,2.0,0.0,0.0,18.0,both
648,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,,,,,,,18.0,both
649,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,11.0,5.0,4.0,2.0,0.0,0.0,18.0,both
650,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,,,,,,,18.0,both


In [526]:
wire_mesh = wire_mesh.rename(columns={'n_scm_i_m_x' : 'n_scm_i_m_current', 'n_scm_i_f_x' : 'n_scm_i_f_current', 'n_scm_o_m_x' : 'n_scm_o_m_current', 'n_scm_o_f_x' : 'n_scm_o_f_current', 'n_d_florilega_i_x' : 'n_d_florilega_i_current', 'n_d_florilega_o_x' : 'n_d_florilega_o_current'})
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,site_y,date,n_scm_i_m_y,n_scm_i_f_y,n_scm_o_m_y,n_scm_o_f_y,n_d_florilega_i_y,n_d_florilega_o_y,week_y,_merge
8,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
9,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,17.0,5.0,20.0,8.0,0.0,0.0,14.0,both
10,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
11,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,17.0,5.0,20.0,8.0,0.0,0.0,14.0,both
12,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647,2949.0,4102.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,11.0,5.0,4.0,2.0,0.0,0.0,18.0,both
648,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,,,,,,,18.0,both
649,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,11.0,5.0,4.0,2.0,0.0,0.0,18.0,both
650,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,,,,,,,18.0,both


In [527]:
wire_mesh = wire_mesh.rename(columns={'n_scm_i_m_y' : 'n_scm_i_m_2', 'n_scm_i_f_y' : 'n_scm_i_f_2', 'n_scm_o_m_y' : 'n_scm_o_m_2', 'n_scm_o_f_y' : 'n_scm_o_f_2', 'n_d_florilega_i_y' : 'n_d_florilega_i_2', 'n_d_florilega_o_y' : 'n_d_florilega_o_2'})
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,site_y,date,n_scm_i_m_2,n_scm_i_f_2,n_scm_o_m_2,n_scm_o_f_2,n_d_florilega_i_2,n_d_florilega_o_2,week_y,_merge
8,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
9,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,17.0,5.0,20.0,8.0,0.0,0.0,14.0,both
10,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
11,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,17.0,5.0,20.0,8.0,0.0,0.0,14.0,both
12,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,GAB_STE,2023-04-03,,,,,,,14.0,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647,2949.0,4102.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,11.0,5.0,4.0,2.0,0.0,0.0,18.0,both
648,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,,,,,,,18.0,both
649,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,11.0,5.0,4.0,2.0,0.0,0.0,18.0,both
650,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,LET_NOB,2023-05-01,,,,,,,18.0,both


In [528]:
wire_mesh = wire_mesh.drop(columns=['_merge', 'week_y', 'date', 'site_y', 'card_id_y', 'data_collector_y', 'site_x', 'data_collector_x', 'week_x', 'field_id', 'card_id_x', 'date_x'])
wire_mesh

Unnamed: 0,id,container_number,project,seed_type,number_seeds,scm_seeds,other_seeds,total_scm_maggots,total_scm_pupae,number_wireworm,...,n_scm_o_f_current,n_d_florilega_i_current,n_d_florilega_o_current,last_week,n_scm_i_m_2,n_scm_i_f_2,n_scm_o_m_2,n_scm_o_f_2,n_d_florilega_i_2,n_d_florilega_o_2
8,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,,,,14.0,,,,,,
9,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,,,,14.0,17.0,5.0,20.0,8.0,0.0,0.0
10,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,10.0,0.0,0.0,14.0,,,,,,
11,6.0,3009.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,10.0,0.0,0.0,14.0,17.0,5.0,20.0,8.0,0.0,0.0
12,15.0,3010.0,R,Bean,10.0,0.0,1.0,0.0,0.0,0.0,...,,,,14.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647,2949.0,4102.0,R,Corn,10.0,0.0,2.0,0.0,0.0,0.0,...,,,,18.0,11.0,5.0,4.0,2.0,0.0,0.0
648,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,,,,18.0,,,,,,
649,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,,,,18.0,11.0,5.0,4.0,2.0,0.0,0.0
650,2952.0,4100.0,R,Corn,10.0,0.0,0.0,0.0,0.0,0.0,...,,,,18.0,,,,,,


In [529]:
wire_mesh.columns

Index(['id', 'container_number', 'project', 'seed_type', 'number_seeds',
       'scm_seeds', 'other_seeds', 'total_scm_maggots', 'total_scm_pupae',
       'number_wireworm', 'phorid_seeds', 'total_phorid_maggots',
       'total_phorid_pupae', 'notes', 'container_type', 'collection_date',
       'record_id', 'cover_crop', 'manure', 'ny_soils_0', 'ny_soils_01',
       'muid', 'hsg', 'hsgint', 'muserial', 'hsga', 'hsgb', 'hsgc', 'hsgd',
       'hsgw', 'one_week', 'two_weeks', 'rain_inches_one_week',
       'min_temp_f_one_week', 'ave_temp_f_one_week', 'max_temp_f_one_week',
       'rain_inches_two_weeks', 'min_temp_f_two_weeks', 'ave_temp_f_two_weeks',
       'max_temp_f_two_weeks', 'gdd_fahrenheit_simple_cumsum',
       'gdd_fahrenheit_sine_cum_sum', 'n_scm_i_m_current', 'n_scm_i_f_current',
       'n_scm_o_m_current', 'n_scm_o_f_current', 'n_d_florilega_i_current',
       'n_d_florilega_o_current', 'last_week', 'n_scm_i_m_2', 'n_scm_i_f_2',
       'n_scm_o_m_2', 'n_scm_o_f_2', 'n_d_flor

# Risk Assessment Random Forest

# Adult Abundance

Farms with first emergence data - 

- DIP_CUR
- DIP_FLE
- POV_DUN
- GAB_STE

In [109]:
first_emergence_farms = ["DIP_CUR", "DIP_FLE", "POV_DUN", "GAB_STE"]
first_emergence_farms


['DIP_CUR', 'DIP_FLE', 'POV_DUN', 'GAB_STE']

2023-04-23T00:00:00.000000000

%Y-%m-%dT%H:%M:%S.%f

In [110]:
for farm in first_emergence_farms:
    count_tbl_name = farm + "_counts"
    gdd_tbl_name = farm + "_gdd"

    count_tbl_name = scm_counts[scm_counts["site"] == farm]
    gdd_tbl_name = gdd.loc[gdd["record_id"] == farm]

    pred_avg = gdd_tbl_name.loc[gdd_tbl_name["gdd_fahrenheit_simple_cumsum"] >= 296][
        "date"
    ].values[0]

    pred_sin = gdd_tbl_name.loc[gdd_tbl_name["gdd_fahrenheit_sine_cum_sum"] >= 361][
        "date"
    ].values[0]

    actual = count_tbl_name.loc[
        (count_tbl_name["n_scm_i_m"] >= 1)
        | (count_tbl_name["n_scm_o_m"] >= 1)
        | (count_tbl_name["n_scm_o_f"] >= 1)
        | (count_tbl_name["n_scm_o_f"] >= 1)
    ]["date"].values[0]

    print(
        "The actual first emergence date at " + farm + " is",
        datetime.strptime(str(actual), "%Y-%m-%dT%H:%M:%S.%f000").date(),
    )

    print(
        "The predicted first emergence date using the simple average model at "
        + farm
        + " is",
        datetime.strptime(str(pred_avg), "%Y-%m-%dT%H:%M:%S.%f000").date(),
    )
    print(
        "The predicted date using the simple average model was "
        + str(
            (
                datetime.strptime(str(pred_avg), "%Y-%m-%dT%H:%M:%S.%f000").date()
                - datetime.strptime(str(actual), "%Y-%m-%dT%H:%M:%S.%f000").date()
            ).days
        )
        + " days late"
    )

    print(
        "The predicted first emergence date using the sine wave model at "
        + farm
        + " is",
        datetime.strptime(str(pred_sin), "%Y-%m-%dT%H:%M:%S.%f000").date(),
    )
    print(
        "The predicted date using the sine wave model was "
        + str(
            (
                datetime.strptime(str(pred_sin), "%Y-%m-%dT%H:%M:%S.%f000").date()
                - datetime.strptime(str(actual), "%Y-%m-%dT%H:%M:%S.%f000").date()
            ).days
        )
        + " days late"
    )

    print(" ")


The actual first emergence date at DIP_CUR is 2023-04-06
The predicted first emergence date using the simple average model at DIP_CUR is 2023-04-23
The predicted date using the simple average model was 17 days late
The predicted first emergence date using the sine wave model at DIP_CUR is 2023-04-23
The predicted date using the sine wave model was 17 days late
 
The actual first emergence date at DIP_FLE is 2023-03-28
The predicted first emergence date using the simple average model at DIP_FLE is 2023-04-18
The predicted date using the simple average model was 21 days late
The predicted first emergence date using the sine wave model at DIP_FLE is 2023-04-17
The predicted date using the sine wave model was 20 days late
 
The actual first emergence date at POV_DUN is 2023-03-28
The predicted first emergence date using the simple average model at POV_DUN is 2023-04-22
The predicted date using the simple average model was 25 days late
The predicted first emergence date using the sine wave 

## New GDD Thresholds

Based on average cumulative GDD for four sites with first emergence dates. 

Simple Average Model - GDD Average for Actual Emergence

In [111]:
np.mean([54, 62.5, 94, 58.5])


67.25

In [112]:
np.mean([55, 61.5, 53.5, 54.5])


56.125

Sine Wave Model - GDD Average for Actual Emergence

In [113]:
np.mean([54, 94, 62.5, 58.5])


67.25

In [114]:
np.mean([109.4494181, 147.4329546, 118.5207495, 130.9217006])


126.5812057

In [115]:
for farm in first_emergence_farms:
    count_tbl_name = farm + "_counts"
    gdd_tbl_name = farm + "_gdd"

    count_tbl_name = scm_counts[scm_counts["site"] == farm]
    gdd_tbl_name = gdd.loc[gdd["record_id"] == farm]

    pred_avg = gdd_tbl_name.loc[gdd_tbl_name["gdd_fahrenheit_simple_cumsum"] >= 67][
        "date"
    ].values[0]

    pred_sin = gdd_tbl_name.loc[gdd_tbl_name["gdd_fahrenheit_sine_cum_sum"] >= 127][
        "date"
    ].values[0]

    actual = count_tbl_name.loc[
        (count_tbl_name["n_scm_i_m"] >= 1)
        | (count_tbl_name["n_scm_o_m"] >= 1)
        | (count_tbl_name["n_scm_o_f"] >= 1)
        | (count_tbl_name["n_scm_o_f"] >= 1)
    ]["date"].values[0]

    print(
        "The actual first emergence date at " + farm + " is",
        datetime.strptime(str(actual), "%Y-%m-%dT%H:%M:%S.%f000").date(),
    )

    print(
        "The predicted first emergence date using the simple average model at "
        + farm
        + " is",
        datetime.strptime(str(pred_avg), "%Y-%m-%dT%H:%M:%S.%f000").date(),
    )
    print(
        "The predicted date using the simple average model was "
        + str(
            (
                datetime.strptime(str(pred_avg), "%Y-%m-%dT%H:%M:%S.%f000").date()
                - datetime.strptime(str(actual), "%Y-%m-%dT%H:%M:%S.%f000").date()
            ).days
        )
        + " days late"
    )

    print(
        "The predicted first emergence date using the sine wave model at "
        + farm
        + " is",
        datetime.strptime(str(pred_sin), "%Y-%m-%dT%H:%M:%S.%f000").date(),
    )
    print(
        "The predicted date using the sine wave model was "
        + str(
            (
                datetime.strptime(str(pred_sin), "%Y-%m-%dT%H:%M:%S.%f000").date()
                - datetime.strptime(str(actual), "%Y-%m-%dT%H:%M:%S.%f000").date()
            ).days
        )
        + " days late"
    )

    print(" ")


The actual first emergence date at DIP_CUR is 2023-04-06
The predicted first emergence date using the simple average model at DIP_CUR is 2023-04-05
The predicted date using the simple average model was -1 days late
The predicted first emergence date using the sine wave model at DIP_CUR is 2023-04-05
The predicted date using the sine wave model was -1 days late
 
The actual first emergence date at DIP_FLE is 2023-03-28
The predicted first emergence date using the simple average model at DIP_FLE is 2023-04-01
The predicted date using the simple average model was 4 days late
The predicted first emergence date using the sine wave model at DIP_FLE is 2023-04-01
The predicted date using the sine wave model was 4 days late
 
The actual first emergence date at POV_DUN is 2023-03-28
The predicted first emergence date using the simple average model at POV_DUN is 2023-04-04
The predicted date using the simple average model was 7 days late
The predicted first emergence date using the sine wave mod