In [10]:
import pandas as pd
from skimpy import clean_columns

# Pest Data

In [3]:
# Read CSV file. 

raw_data = pd.read_csv('NYSIPM_Sweet_Corn_1993_2022.csv')
raw_data

Unnamed: 0,Site,USDA hardiness Zone,Lat,Lat-Decimal,Long,Long-Decimal,Year,Week,Date,ECB-E,ECB-Z,CEW,FAW,WBC
0,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,21,5/25/21,,,,,
1,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,22,6/1/21,,,,,
2,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,23,6/8/21,,,,,
3,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,24,6/15/21,,,,,
4,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,25,6/22/21,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12507,Williamson,6b,"43°14'21.11""N",43.239197,"77°13'32.10""W",-77.225583,2022,35,8/30/22,0,0,39,0,0
12508,Williamson,6b,"43°14'21.11""N",43.239197,"77°13'32.10""W",-77.225583,2022,36,9/6/22,0,0,36,0,0
12509,Williamson,6b,"43°14'21.11""N",43.239197,"77°13'32.10""W",-77.225583,2022,37,9/13/22,0,0,22,0,0
12510,Williamson,6b,"43°14'21.11""N",43.239197,"77°13'32.10""W",-77.225583,2022,38,9/20/22,,,,,


In [4]:
# Subset columns from original dataframe. 

raw_data = raw_data.loc[:, ['Site', 
                   'Lat-Decimal', 
                   'Long-Decimal', 
                   'Year', 
                   'Week',
                   'Date',
                   'ECB-E',
                   'ECB-Z',
                   'CEW',
                   'FAW',
                   'WBC']]

raw_data

Unnamed: 0,Site,Lat-Decimal,Long-Decimal,Year,Week,Date,ECB-E,ECB-Z,CEW,FAW,WBC
0,Accord,41.782336,-74.245633,2021,21,5/25/21,,,,,
1,Accord,41.782336,-74.245633,2021,22,6/1/21,,,,,
2,Accord,41.782336,-74.245633,2021,23,6/8/21,,,,,
3,Accord,41.782336,-74.245633,2021,24,6/15/21,,,,,
4,Accord,41.782336,-74.245633,2021,25,6/22/21,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...
12507,Williamson,43.239197,-77.225583,2022,35,8/30/22,0,0,39,0,0
12508,Williamson,43.239197,-77.225583,2022,36,9/6/22,0,0,36,0,0
12509,Williamson,43.239197,-77.225583,2022,37,9/13/22,0,0,22,0,0
12510,Williamson,43.239197,-77.225583,2022,38,9/20/22,,,,,


In [5]:
raw_data['Site'] = raw_data.loc[:, 'Site'].astype('category')

In [6]:
raw_data['Date'] = pd.to_datetime(raw_data['Date'])
raw_data['Day'] = raw_data[ "Date"].dt.dayofyear

In [7]:
raw_data.dtypes

Site                  category
Lat-Decimal            float64
Long-Decimal           float64
Year                     int64
Week                     int64
Date            datetime64[ns]
ECB-E                   object
ECB-Z                   object
CEW                     object
FAW                     object
WBC                     object
Day                      int64
dtype: object

In [8]:
pest_df = pd.melt(raw_data, id_vars = ['Site', 
                            'Lat-Decimal', 
                            'Long-Decimal',
                            'Year', 
                            'Week', 
                            'Date', 
                            'Day'], 
                 value_vars = ['ECB-E', 
                                 'ECB-Z',
                                 'CEW', 
                                 'FAW', 
                                 'WBC'])

In [11]:
pest_df = clean_columns(pest_df)
pest_df

Unnamed: 0,site,lat_decimal,long_decimal,year,week,date,day,variable,value
0,Accord,41.782336,-74.245633,2021,21,2021-05-25,145,ECB-E,
1,Accord,41.782336,-74.245633,2021,22,2021-06-01,152,ECB-E,
2,Accord,41.782336,-74.245633,2021,23,2021-06-08,159,ECB-E,
3,Accord,41.782336,-74.245633,2021,24,2021-06-15,166,ECB-E,
4,Accord,41.782336,-74.245633,2021,25,2021-06-22,173,ECB-E,0
...,...,...,...,...,...,...,...,...,...
62555,Williamson,43.239197,-77.225583,2022,35,2022-08-30,242,WBC,0
62556,Williamson,43.239197,-77.225583,2022,36,2022-09-06,249,WBC,0
62557,Williamson,43.239197,-77.225583,2022,37,2022-09-13,256,WBC,0
62558,Williamson,43.239197,-77.225583,2022,38,2022-09-20,263,WBC,


# Landscape Data

In [12]:
corn_5km_2002 = pd.read_csv('Landscape/final_cdl_corn_5km_2002.csv', index_col = 0)
corn_5km_2002

Unnamed: 0,site,total_corn,corn,prop_corn,total_land
1,Accord,1497,1497,0.017930,83491
2,Adams_Center,484,484,0.005605,86351
3,Afton,3029,3029,0.036023,84085
4,Athens,1103,1103,0.013105,84169
5,Amsterdam,319,319,0.003746,85156
...,...,...,...,...,...
71,Ticonderoga,1941,1941,0.022472,86376
72,Tivoli,4814,4814,0.057421,83837
73,Unadilla,1176,1176,0.013963,84222
74,Waterport,10975,10975,0.128273,85560


In [13]:
corn_5km_2012 = pd.read_csv('Landscape/final_cdl_corn_5km_2012.csv', index_col = 0)
corn_5km_2012

Unnamed: 0,site,total_corn,corn,sweet_corn,prop_corn,prop_field_corn,prop_sweet_corn,total_land
1,Accord,1784,1748,36,0.022438,0.021985,0.000453,79509
2,Adams_Center,11726,11725,1,0.142584,0.142572,0.000012,82239
3,Afton,4120,4119,1,0.051451,0.051439,0.000012,80076
4,Athens,1784,1754,30,0.022257,0.021883,0.000374,80155
5,Amsterdam,1643,1630,13,0.020260,0.020100,0.000160,81096
...,...,...,...,...,...,...,...,...
71,Ticonderoga,2505,2504,1,0.030452,0.030440,0.000012,82261
72,Tivoli,2772,2499,273,0.034716,0.031297,0.003419,79847
73,Unadilla,2177,2177,0,0.027140,0.027140,0.000000,80215
74,Waterport,17798,17410,388,0.218413,0.213651,0.004761,81488


In [14]:
corn_5km_2022 = pd.read_csv('Landscape/final_cdl_corn_5km_2022.csv', index_col = 0)
corn_5km_2022

Unnamed: 0,site,total_corn,corn,sweet_corn,prop_corn,prop_field_corn,prop_sweet_corn,total_land
1,Accord,1613,1313,300,0.020287,0.016514,0.003773,79509
2,Adams_Center,11862,11862,0,0.144238,0.144238,0.000000,82239
3,Afton,3462,3462,0,0.043234,0.043234,0.000000,80076
4,Athens,1659,1639,20,0.020697,0.020448,0.000250,80155
5,Amsterdam,1514,1510,4,0.018669,0.018620,0.000049,81096
...,...,...,...,...,...,...,...,...
71,Ticonderoga,842,835,7,0.010236,0.010151,0.000085,82261
72,Tivoli,1973,1912,61,0.024710,0.023946,0.000764,79847
73,Unadilla,2034,2030,4,0.025357,0.025307,0.000050,80215
74,Waterport,15377,14803,574,0.188703,0.181659,0.007044,81488


# Climate Data

In [17]:
climate_data = pd.read_csv('../../../Desktop/Cornell/Long_Term_Pest_Data/Climate_1993_2022.csv')
climate_data

Unnamed: 0,Site,USDA.hardiness.Zone,Lat.Decimal,Long.Decimal,date,pr_prism,tmin_prism,tmax_prism,etr,pet,pr,rmax,rmin,sph,srad,th,tmax,tmin,vpd,vs
0,Accord,6a,41.782336,-74.245633,1993-05-25,0.0000,13.692000,23.0340,8.7,6.4,0.000000,86.0,37.7,0.00854,318.2,305.0,27.657434,12.492121,1.21,5.6
1,Accord,6a,41.782336,-74.245633,1993-05-26,0.0000,11.234000,28.6420,6.6,5.0,0.000000,81.9,37.0,0.00570,312.1,302.0,20.887568,8.292505,0.87,4.2
2,Accord,6a,41.782336,-74.245633,1993-05-27,0.0000,7.580000,21.1530,7.1,5.3,0.000000,69.8,35.6,0.00506,327.9,242.0,20.166956,6.782190,0.86,5.3
3,Accord,6a,41.782336,-74.245633,1993-05-28,0.0000,7.463000,21.0410,6.5,4.8,0.446154,70.6,43.0,0.00732,226.7,308.0,26.987452,6.222833,1.08,3.6
4,Accord,6a,41.782336,-74.245633,1993-05-29,0.8900,10.419000,25.4010,7.0,5.1,0.000000,81.0,37.3,0.00594,299.7,330.0,21.357538,6.412592,0.80,6.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
836009,Williamson,6b,43.222800,-77.231800,2022-09-23,2.8590,7.630200,17.8939,3.1,2.3,0.000000,92.2,54.2,0.00517,177.3,327.0,12.850000,4.750000,0.34,5.9
836010,Williamson,6b,43.222800,-77.231800,2022-09-24,0.0000,5.158500,12.0188,3.5,2.6,0.000000,90.4,44.6,0.00635,183.1,276.0,18.750000,8.050000,0.60,2.7
836011,Williamson,6b,43.222800,-77.231800,2022-09-25,0.0000,8.500500,18.3006,2.1,1.5,8.900000,100.0,64.6,0.00828,68.0,191.0,17.650000,9.550000,0.27,4.3
836012,Williamson,6b,43.222800,-77.231800,2022-09-26,7.7450,10.713300,17.3443,3.7,2.5,3.500000,87.7,57.4,0.00777,114.3,221.0,18.350000,11.550000,0.49,6.0


In [20]:
climate_data = climate_data[['Site',
                             'Lat.Decimal', 
                             'Long.Decimal', 
                             'date',
                             'pr_prism', 
                             'tmin_prism', 
                             'tmax_prism', 
                             'etr', 
                             'rmax',
                             'rmin', 
                             'th', 
                             'tmin', 
                             'vs']]

climate_data

Unnamed: 0,Site,Lat.Decimal,Long.Decimal,date,pr_prism,tmin_prism,tmax_prism,etr,rmax,rmin,th,tmin,vs
0,Accord,41.782336,-74.245633,1993-05-25,0.0000,13.692000,23.0340,8.7,86.0,37.7,305.0,12.492121,5.6
1,Accord,41.782336,-74.245633,1993-05-26,0.0000,11.234000,28.6420,6.6,81.9,37.0,302.0,8.292505,4.2
2,Accord,41.782336,-74.245633,1993-05-27,0.0000,7.580000,21.1530,7.1,69.8,35.6,242.0,6.782190,5.3
3,Accord,41.782336,-74.245633,1993-05-28,0.0000,7.463000,21.0410,6.5,70.6,43.0,308.0,6.222833,3.6
4,Accord,41.782336,-74.245633,1993-05-29,0.8900,10.419000,25.4010,7.0,81.0,37.3,330.0,6.412592,6.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
836009,Williamson,43.222800,-77.231800,2022-09-23,2.8590,7.630200,17.8939,3.1,92.2,54.2,327.0,4.750000,5.9
836010,Williamson,43.222800,-77.231800,2022-09-24,0.0000,5.158500,12.0188,3.5,90.4,44.6,276.0,8.050000,2.7
836011,Williamson,43.222800,-77.231800,2022-09-25,0.0000,8.500500,18.3006,2.1,100.0,64.6,191.0,9.550000,4.3
836012,Williamson,43.222800,-77.231800,2022-09-26,7.7450,10.713300,17.3443,3.7,87.7,57.4,221.0,11.550000,6.0
