In [10]:
import pandas as pd
from skimpy import clean_columns

# Pest Data

In [3]:
# Read CSV file. 

raw_data = pd.read_csv('NYSIPM_Sweet_Corn_1993_2022.csv')
raw_data

Unnamed: 0,Site,USDA hardiness Zone,Lat,Lat-Decimal,Long,Long-Decimal,Year,Week,Date,ECB-E,ECB-Z,CEW,FAW,WBC
0,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,21,5/25/21,,,,,
1,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,22,6/1/21,,,,,
2,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,23,6/8/21,,,,,
3,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,24,6/15/21,,,,,
4,Accord,6a,"41°46'56.41""N",41.782336,"74°14'44.28""W",-74.245633,2021,25,6/22/21,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12507,Williamson,6b,"43°14'21.11""N",43.239197,"77°13'32.10""W",-77.225583,2022,35,8/30/22,0,0,39,0,0
12508,Williamson,6b,"43°14'21.11""N",43.239197,"77°13'32.10""W",-77.225583,2022,36,9/6/22,0,0,36,0,0
12509,Williamson,6b,"43°14'21.11""N",43.239197,"77°13'32.10""W",-77.225583,2022,37,9/13/22,0,0,22,0,0
12510,Williamson,6b,"43°14'21.11""N",43.239197,"77°13'32.10""W",-77.225583,2022,38,9/20/22,,,,,


In [4]:
# Subset columns from original dataframe. 

raw_data = raw_data.loc[:, ['Site', 
                   'Lat-Decimal', 
                   'Long-Decimal', 
                   'Year', 
                   'Week',
                   'Date',
                   'ECB-E',
                   'ECB-Z',
                   'CEW',
                   'FAW',
                   'WBC']]

raw_data

Unnamed: 0,Site,Lat-Decimal,Long-Decimal,Year,Week,Date,ECB-E,ECB-Z,CEW,FAW,WBC
0,Accord,41.782336,-74.245633,2021,21,5/25/21,,,,,
1,Accord,41.782336,-74.245633,2021,22,6/1/21,,,,,
2,Accord,41.782336,-74.245633,2021,23,6/8/21,,,,,
3,Accord,41.782336,-74.245633,2021,24,6/15/21,,,,,
4,Accord,41.782336,-74.245633,2021,25,6/22/21,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...
12507,Williamson,43.239197,-77.225583,2022,35,8/30/22,0,0,39,0,0
12508,Williamson,43.239197,-77.225583,2022,36,9/6/22,0,0,36,0,0
12509,Williamson,43.239197,-77.225583,2022,37,9/13/22,0,0,22,0,0
12510,Williamson,43.239197,-77.225583,2022,38,9/20/22,,,,,


In [5]:
raw_data['Site'] = raw_data.loc[:, 'Site'].astype('category')

In [6]:
raw_data['Date'] = pd.to_datetime(raw_data['Date'])
raw_data['Day'] = raw_data[ "Date"].dt.dayofyear

In [7]:
raw_data.dtypes

Site                  category
Lat-Decimal            float64
Long-Decimal           float64
Year                     int64
Week                     int64
Date            datetime64[ns]
ECB-E                   object
ECB-Z                   object
CEW                     object
FAW                     object
WBC                     object
Day                      int64
dtype: object

In [8]:
pest_df = pd.melt(raw_data, id_vars = ['Site', 
                            'Lat-Decimal', 
                            'Long-Decimal',
                            'Year', 
                            'Week', 
                            'Date', 
                            'Day'], 
                 value_vars = ['ECB-E', 
                                 'ECB-Z',
                                 'CEW', 
                                 'FAW', 
                                 'WBC'])

In [11]:
pest_df = clean_columns(pest_df)
pest_df

Unnamed: 0,site,lat_decimal,long_decimal,year,week,date,day,variable,value
0,Accord,41.782336,-74.245633,2021,21,2021-05-25,145,ECB-E,
1,Accord,41.782336,-74.245633,2021,22,2021-06-01,152,ECB-E,
2,Accord,41.782336,-74.245633,2021,23,2021-06-08,159,ECB-E,
3,Accord,41.782336,-74.245633,2021,24,2021-06-15,166,ECB-E,
4,Accord,41.782336,-74.245633,2021,25,2021-06-22,173,ECB-E,0
...,...,...,...,...,...,...,...,...,...
62555,Williamson,43.239197,-77.225583,2022,35,2022-08-30,242,WBC,0
62556,Williamson,43.239197,-77.225583,2022,36,2022-09-06,249,WBC,0
62557,Williamson,43.239197,-77.225583,2022,37,2022-09-13,256,WBC,0
62558,Williamson,43.239197,-77.225583,2022,38,2022-09-20,263,WBC,


# Landscape Data

In [None]:
corn_5km_2002 = pd.read_csv('Landscape/final_cdl_corn_5km_2002.csv', index_col = 0)
corn_5km_2002

In [None]:
corn_5km_2012 = pd.read_csv('Landscape/final_cdl_corn_5km_2012.csv', index_col = 0)
corn_5km_2012

In [None]:
corn_5km_2022 = pd.read_csv('Landscape/final_cdl_corn_5km_2022.csv', index_col = 0)
corn_5km_2022