In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy

In [0]:
#Import GDD and KDD data. Keep only GDD and KDD data
GDD_KDD = pd.read_csv('GDD_KDD_Cleaned.csv', index_col = 0)
GDD_KDD.index = pd.to_datetime(GDD_KDD.index)

GDD_KDD = GDD_KDD.drop(['TMAX_obs', 'TMIN_obs', 'TMAX_bound', 'TMIN_bound'], axis = 1)

In [0]:
#Import development data for Illinois

IL_Development = pd.read_csv('illinois_combined_development_data.csv', index_col = 0)
IL_Development.index = pd.to_datetime(IL_Development.index)

IL_Development = IL_Development.drop(['sum'], axis = 1)

In [0]:
#GDD are calculated for each county and development phase. For each day, multiply percent of crop in phase of interest by the GDD on that day.
#Since we only have one county, product of GDD and percent of crop in phase of interest is the GDD for that day

In [132]:
#Index for both dataframes needs to be identical. Currently they are not because we removed some "bad years" from the weather data due to holes
#Removed 1983, 1986, 1998, 1999, 2000, and 2016. There are 13514 days between (inclusive) 1981 - 2017. Removing these years there should be 11322 days.
#Rember about leap years!

print(len(IL_Development.index))
print(len(GDD_KDD.index))

#Join the dfs. GDD and KDD columns will have NaN for years that were removed in previous cleaning. Then drop rows with any NaN.
df = IL_Development.join(GDD_KDD)
df = df.dropna()

print(len(df.index))

13514
11322
11322


In [133]:
#Calculate GDDs and KDDs for phases. Phase data should be in percent format so divide phase columns by 100

phases = ['vegetative', 'early_grain', 'late_grain']

for p in phases:
  df[p] = df[p]/100

for p in phases:
  df['GDD_'+p] = df[p] * df['GDD']
  
for p in phases:
  df['KDD_'+p] = df[p] *df['KDD']
  
df.tail()

Unnamed: 0,vegetative,early_grain,late_grain,GDD,KDD,GDD_vegetative,GDD_early_grain,GDD_late_grain,KDD_vegetative,KDD_early_grain,KDD_late_grain
2017-12-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-12-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-12-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-12-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-12-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
# #In model we will need average GDD and KDD for each phase

# #Make empty dataframe with yearly index
# test = pd.DataFrame(index = df.index.year.unique())    
# test.reindex(index=test.index[::-1])

# #Make dictionary to hold values of means
# data = {k: [] for k in df.index.year.unique()}
  
for y in df.index.year.unique():
#   data['a'] = df['GDD_vegetative'][(df.index.year == y)].mean()
#   data['a'] = 5
  break
  


In [159]:
#Makes a dictionary where key is year and value is mean GDD value for vegetaive phase
data = {}

for y in df.index.year.unique():
  data[y] = df['GDD_vegetative'][(df.index.year == y)].mean()

print(data)

{1981: 2.1083590998043054, 1982: 2.237646966731899, 1984: 2.255080405932865, 1985: 2.2010890410958894, 1987: 2.240381017612524, 1988: 2.4324599921935985, 1989: 2.2429001956947165, 1990: 2.246716438356164, 1991: 2.2244434442270054, 1992: 2.0891253903200626, 1993: 2.0546379647749498, 1994: 1.983344863013699, 1995: 1.9621945205479452, 1996: 2.129673692427791, 1997: 2.173863796477496, 2001: 1.9729194716242664, 2002: 2.000100750163079, 2003: 1.9442484670580553, 2004: 1.947963895394224, 2005: 2.020782583170254, 2006: 2.004901369863014, 2007: 1.9101868884540112, 2008: 2.0526795472287294, 2009: 1.9101334637964777, 2010: 2.152410371819961, 2011: 2.00591350293542, 2012: 1.9168688524590163, 2013: 2.0996971624266147, 2014: 2.008723287671231, 2015: 2.101664383561644, 2017: 2.0717019569471624}


In [105]:
#Upload the yield data. Filter to Mercer County in IL.
df_yield = pd.read_csv('yield.csv')

df_yield = df_yield[(df_yield['county'] == 'MERCER') & (df_yield['year'] >= 1981) & (df_yield['state_fips'] == 17)]

print(df_yield.head())

print(df_yield.tail())

     year  state_fips  county  county_code  yield
185  2017          17  MERCER          131  238.7
186  2016          17  MERCER          131  213.5
187  2015          17  MERCER          131  205.7
188  2014          17  MERCER          131  193.5
189  2013          17  MERCER          131  177.1
     year  state_fips  county  county_code  yield
217  1985          17  MERCER          131  119.0
218  1984          17  MERCER          131  118.0
219  1983          17  MERCER          131   96.0
220  1982          17  MERCER          131  124.0
221  1981          17  MERCER          131  134.0
