In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy

In [0]:
#Import GDD and KDD data. Keep only GDD and KDD data
GDD_KDD = pd.read_csv('GDD_KDD_Cleaned.csv', index_col = 0)
GDD_KDD.index = pd.to_datetime(GDD_KDD.index)

GDD_KDD = GDD_KDD.drop(['TMAX_obs', 'TMIN_obs', 'TMAX_bound', 'TMIN_bound'], axis = 1)

In [0]:
#Import development data for Illinois

IL_Development = pd.read_csv('illinois_combined_development_data.csv', index_col = 0)
IL_Development.index = pd.to_datetime(IL_Development.index)

IL_Development = IL_Development.drop(['sum'], axis = 1)

In [0]:
#GDD are calculated for each county and development phase. For each day, multiply percent of crop in phase of interest by the GDD on that day.
#Since we only have one county, product of GDD and percent of crop in phase of interest is the GDD for that day

In [62]:
#Index for both dataframes needs to be identical. Currently they are not because we removed some "bad years" from the weather data due to holes
#Removed 1983, 1986, 1998, 1999, 2000, and 2016. There are 13514 days between (inclusive) 1981 - 2017. Removing these years there should be 11322 days.
#Rember about leap years!

print(len(IL_Development.index))
print(len(GDD_KDD.index))

#Join the dfs. GDD and KDD columns will have NaN for years that were removed in previous cleaning. Then drop rows with any NaN.
df = IL_Development.join(GDD_KDD)
df = df.dropna()

print(len(df.index))

13514
11322
11322


In [0]:
#Calculate GDDs and KDDs for phases. Phase data should be in percent format so divide phase columns by 100

phases = ['vegetative', 'early_grain', 'late_grain']

for p in phases:
  df[p] = df[p]/100

for p in phases:
  df['GDD_'+p] = df[p] * df['GDD']
  
for p in phases:
  df['KDD_'+p] = df[p] *df['KDD']
  
df.loc['2017-04-15':'2017-04-20']

In [0]:
#Find yearly average GDD/KDD for each phase.
data = {}
phase = ['GDD_vegetative', 'GDD_early_grain', 'GDD_late_grain', 'KDD_vegetative', 'KDD_early_grain', 'KDD_late_grain']

storage_df = pd.DataFrame()
storage_df['year'] = df.index.year.unique() #Make year column to eventually map dictionaries to

for p in phase: #Iterate through all phases
  for y in df.index.year.unique(): #Iterate through all years within each phase
    data[y] = df[p][(df.index.year == y)].mean() #Make a dictionary with key = year and values are the mean values in each phase for that year
    storage_df[p] = storage_df['year'].map(data) #Match dictionart key to column of years and put in values from dictionary

In [90]:
storage_df.head()

Unnamed: 0,year,GDD_vegetative,GDD_early_grain,GDD_late_grain,KDD_vegetative,KDD_early_grain,KDD_late_grain
0,1981,2.108359,0.823876,1.234161,0.095409,0.028719,0.03161
1,1982,2.237647,0.745351,1.224173,0.074162,0.063381,0.038842
2,1984,2.25508,0.742555,1.241253,0.187655,0.110284,0.183959
3,1985,2.201089,0.800572,1.433259,0.16484,0.079768,0.14093
4,1987,2.240381,0.831239,1.297272,0.265117,0.172497,0.241803


In [0]:
#Upload the yield data. Filter to Mercer County in IL.
df_yield = pd.read_csv('yield.csv')

df_yield = df_yield[(df_yield['county'] == 'MERCER') & (df_yield['year'] >= 1981) & (df_yield['state_fips'] == 17)]

print(df_yield.head())

print(df_yield.tail())

     year  state_fips  county  county_code  yield
185  2017          17  MERCER          131  238.7
186  2016          17  MERCER          131  213.5
187  2015          17  MERCER          131  205.7
188  2014          17  MERCER          131  193.5
189  2013          17  MERCER          131  177.1
     year  state_fips  county  county_code  yield
217  1985          17  MERCER          131  119.0
218  1984          17  MERCER          131  118.0
219  1983          17  MERCER          131   96.0
220  1982          17  MERCER          131  124.0
221  1981          17  MERCER          131  134.0
