#### First Binder notebook test with existing input file in folder

In [1]:
import datetime
import numpy as np
import pandas as pd

### Growing Degree Days (GDD) to Flowering
- Weather data taken from [KSU Weather Station](http://mesonet.k-state.edu/weather/historical/) in Manhattan
- planting date: 2016-06-17
- harvest date: 2016-10-21

In [2]:
manhattan_weather_0 = pd.read_csv('manhattan_weather_2016_daily.csv')
print(manhattan_weather_0.shape)
# manhattan_weather_0.head(5)

(155, 15)


Unnamed: 0,Timestamp,Station,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,WindSpeed2m,WindSpeed2m.1,SoilTemperature5cm,SoilTemperature5cm.1,SoilTemperature10cm,SoilTemperature10cm.1,SolarRadiation,ETo,ETo.1
0,,,max,min,avg,total,avg,max,max,min,max,min,total,grass,alfalfa
1,,,°F,°F,%,inches,mph,mph,°F,°F,°F,°F,ly,inches,inches
2,2016-06-01,Manhattan,81.7,57.9,65.9,0,3.1,13.8,82.1,66.7,78.4,68.1,660.1,0.21,0.25
3,2016-06-02,Manhattan,84.8,56.1,59.6,0,2.8,14,85.1,66.8,80.7,68.4,673.7,0.22,0.26
4,2016-06-03,Manhattan,85.8,55.4,63.2,0.15,3.9,22.9,85.8,67.9,81.1,69.5,667.8,0.23,0.28


#### Change column names and drop first two rows
- Add datetime column

In [3]:
manhattan_weather_1 = manhattan_weather_0.copy()

datetimes = pd.to_datetime(manhattan_weather_1['Timestamp'])
manhattan_weather_1['date'] = datetimes

print(manhattan_weather_1.shape)
# manhattan_weather_1.tail()

(155, 16)


In [4]:
# manhattan_weather_1.columns

In [5]:
# Drop first 2 rows

manhattan_weather_2 = manhattan_weather_1.iloc[2:]
print(manhattan_weather_2.shape)
# manhattan_weather_2.head()

(153, 16)


In [6]:
# Drop `timestamp` column

manhattan_weather_3 = manhattan_weather_2.drop(labels=['Timestamp'], axis=1)
print(manhattan_weather_3.shape)
# manhattan_weather_3.head()

(153, 15)


Unnamed: 0,Station,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,WindSpeed2m,WindSpeed2m.1,SoilTemperature5cm,SoilTemperature5cm.1,SoilTemperature10cm,SoilTemperature10cm.1,SolarRadiation,ETo,ETo.1,date
2,Manhattan,81.7,57.9,65.9,0.0,3.1,13.8,82.1,66.7,78.4,68.1,660.1,0.21,0.25,2016-06-01
3,Manhattan,84.8,56.1,59.6,0.0,2.8,14.0,85.1,66.8,80.7,68.4,673.7,0.22,0.26,2016-06-02
4,Manhattan,85.8,55.4,63.2,0.15,3.9,22.9,85.8,67.9,81.1,69.5,667.8,0.23,0.28,2016-06-03
5,Manhattan,83.6,59.6,60.1,0.01,7.2,23.6,79.7,69.4,77.3,70.8,693.1,0.27,0.36,2016-06-04
6,Manhattan,85.3,57.4,52.9,0.0,4.7,16.9,84.0,66.7,79.7,68.3,707.2,0.26,0.33,2016-06-05


In [7]:
manhattan_weather_4 = manhattan_weather_3.rename({'Station': 'station', 'AirTemperature': 'air_temp_max_F', 
                                                  'AirTemperature.1': 'air_temp_min_F', 'RelativeHumidity': 'avg_rh',
                                                  'Precipitation': 'precip_total', 'WindSpeed2m': 'avg_wind_speed', 
                                                  'WindSpeed2m.1': 'max_wind_speed', 'SoilTemperature5cm': 'soil_temp_5cm_max',
                                                  'SoilTemperature5cm.1': 'soil_temp_5cm_min', 
                                                  'SoilTemperature10cm': 'soil_temp_10cm_max', 
                                                  'SoilTemperature10cm.1': 'soil_temp_10cm_min', 'SolarRadiation': 'solar_rad',
                                                  'ETo': 'eto_grass', 'ETo.1': 'eto_alfalfa'}, axis=1)
print(manhattan_weather_4.shape)
# manhattan_weather_4.head()

(153, 15)


#### Add Day-of-year (DOY) to Weather Dataframe
- slice dataframe to only include season dates from planting to harvest
- change `date` to index, but keep `date` column
- use Pandas `PeriodIndex.dayofyear()`

In [8]:
manhattan_weather_5 = manhattan_weather_4.loc[(manhattan_weather_4['date'] >= '2016-06-17') & (manhattan_weather_4['date'] <= '2016-10-21')]

In [9]:
manhattan_weather_6 = manhattan_weather_5.set_index(keys=['date'], drop=False)
print(manhattan_weather_6.shape)
# manhattan_weather_6.tail(3)

(127, 15)


Unnamed: 0_level_0,station,air_temp_max_F,air_temp_min_F,avg_rh,precip_total,avg_wind_speed,max_wind_speed,soil_temp_5cm_max,soil_temp_5cm_min,soil_temp_10cm_max,soil_temp_10cm_min,solar_rad,eto_grass,eto_alfalfa,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2016-10-19,Manhattan,74.2,47.7,64.8,0.0,4.1,15.6,65.0,58.8,64.6,60.5,333.9,0.11,0.15,2016-10-19
2016-10-20,Manhattan,62.3,43.4,75.3,0.21,5.6,20.5,61.5,57.5,62.4,59.3,290.2,0.09,0.13,2016-10-20
2016-10-21,Manhattan,67.6,35.6,65.1,0.0,4.0,17.5,60.2,54.3,60.3,56.5,354.2,0.1,0.14,2016-10-21


In [10]:
manhattan_weather_7 = manhattan_weather_6.copy()

manhattan_weather_7['day_of_year'] = manhattan_weather_7.index.dayofyear

In [11]:
# manhattan_weather_7.tail(3)

#### Add Growing Degree Days (GDD)
- convert all numeric columns from string `to_numeric`
- add air temps in C
- equation = (F - 32) x 0.5556 = C
- daily gdd equation = ((max_air_temp + min_air_temp) / 2) - 10

In [12]:
cols_to_convert = ['air_temp_max_F', 'air_temp_min_F', 'avg_rh', 'precip_total', 'avg_wind_speed', 'max_wind_speed', 
                   'soil_temp_5cm_max', 'soil_temp_5cm_min', 'soil_temp_10cm_max', 'soil_temp_10cm_min', 'solar_rad', 
                   'eto_grass', 'eto_alfalfa']

In [13]:
manhattan_weather_7[cols_to_convert] = manhattan_weather_7[cols_to_convert].apply(pd.to_numeric)

In [14]:
# manhattan_weather_7.dtypes

station                       object
air_temp_max_F               float64
air_temp_min_F               float64
avg_rh                       float64
precip_total                 float64
avg_wind_speed               float64
max_wind_speed               float64
soil_temp_5cm_max            float64
soil_temp_5cm_min            float64
soil_temp_10cm_max           float64
soil_temp_10cm_min           float64
solar_rad                    float64
eto_grass                    float64
eto_alfalfa                  float64
date                  datetime64[ns]
day_of_year                    int64
dtype: object

In [15]:
manhattan_weather_8 = manhattan_weather_7.copy()

manhattan_weather_8['air_temp_max_C'] = round(((manhattan_weather_8['air_temp_max_F'] - 32) * 0.556), 1)
print(manhattan_weather_8.shape)
# manhattan_weather_8.tail(3)

(127, 17)


In [16]:
manhattan_weather_9 = manhattan_weather_8.copy()

manhattan_weather_9['air_temp_min_C'] = round(((manhattan_weather_9['air_temp_min_F'] - 32) * 0.556), 1)
print(manhattan_weather_9.shape)
# manhattan_weather_9.head(3)

(127, 18)


In [17]:
manhattan_weather_10 = manhattan_weather_9.copy()

manhattan_weather_10['daily_gdd'] = (((manhattan_weather_10['air_temp_max_C'] + manhattan_weather_10['air_temp_min_C'])) / 2) - 10

print(manhattan_weather_10.shape)
# manhattan_weather_10.sample(n=3)

(127, 19)


In [18]:
# Check for any negative daily GDD values (if any, need to be converted to 0)

# manhattan_weather_10.loc[manhattan_weather_10.daily_gdd < 0]

Unnamed: 0_level_0,station,air_temp_max_F,air_temp_min_F,avg_rh,precip_total,avg_wind_speed,max_wind_speed,soil_temp_5cm_max,soil_temp_5cm_min,soil_temp_10cm_max,soil_temp_10cm_min,solar_rad,eto_grass,eto_alfalfa,date,day_of_year,air_temp_max_C,air_temp_min_C,daily_gdd
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2016-10-12,Manhattan,58.2,40.3,70.9,0.0,7.7,19.6,63.0,57.2,63.9,59.2,332.7,0.09,0.14,2016-10-12,286,14.6,4.6,-0.4
2016-10-13,Manhattan,60.3,33.4,72.3,0.0,2.8,11.7,60.0,53.9,60.2,56.2,304.5,0.07,0.1,2016-10-13,287,15.7,0.8,-1.75


In [19]:
# Assign negative daily gdd values to 0

manhattan_weather_11 = manhattan_weather_10.copy()

In [20]:
# ignore SeetingWithCopyWarning

manhattan_weather_11['daily_gdd']['2016-10-12'] = 0
manhattan_weather_11['daily_gdd']['2016-10-13'] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  manhattan_weather_11['daily_gdd']['2016-10-12'] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  manhattan_weather_11['daily_gdd']['2016-10-13'] = 0


In [21]:
# Check to see that negative values were successfully converted to 0

# manhattan_weather_11.loc[manhattan_weather_11.daily_gdd <= 0]

Unnamed: 0_level_0,station,air_temp_max_F,air_temp_min_F,avg_rh,precip_total,avg_wind_speed,max_wind_speed,soil_temp_5cm_max,soil_temp_5cm_min,soil_temp_10cm_max,soil_temp_10cm_min,solar_rad,eto_grass,eto_alfalfa,date,day_of_year,air_temp_max_C,air_temp_min_C,daily_gdd
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2016-10-12,Manhattan,58.2,40.3,70.9,0.0,7.7,19.6,63.0,57.2,63.9,59.2,332.7,0.09,0.14,2016-10-12,286,14.6,4.6,0.0
2016-10-13,Manhattan,60.3,33.4,72.3,0.0,2.8,11.7,60.0,53.9,60.2,56.2,304.5,0.07,0.1,2016-10-13,287,15.7,0.8,0.0


In [22]:
# should now return an empty df

# manhattan_weather_11.loc[manhattan_weather_11.daily_gdd < 0]

Unnamed: 0_level_0,station,air_temp_max_F,air_temp_min_F,avg_rh,precip_total,avg_wind_speed,max_wind_speed,soil_temp_5cm_max,soil_temp_5cm_min,soil_temp_10cm_max,soil_temp_10cm_min,solar_rad,eto_grass,eto_alfalfa,date,day_of_year,air_temp_max_C,air_temp_min_C,daily_gdd
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1


In [23]:
# Add cumulative GDD, round to nearest integer

manhattan_weather_12 = manhattan_weather_11.copy()

manhattan_weather_12['gdd'] = np.rint(np.cumsum(manhattan_weather_12['daily_gdd']))
print(manhattan_weather_12.shape)
# manhattan_weather_12.tail()

(127, 20)


Drop `daily_gdd`

In [24]:
manhattan_weather_13 = manhattan_weather_12.drop(labels=['daily_gdd'], axis=1)
print(manhattan_weather_13.shape)
# manhattan_weather_13.head()

(127, 19)


#### Write to `.csv`

In [None]:
timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
output_filename = f'ksu_weather_2016_daily_{timestamp}.csv'.replace(':', '')

manhattan_weather_13.to_csv(output_filename)