## Weather Station Data Season 4

### MAC Weather Station
* 2017 raw [data](https://cals.arizona.edu/azmet/06.htm)

In [4]:
import datetime
import numpy as np
import pandas as pd

In [5]:
df_0 = pd.read_csv('data/raw/mac_weather_station_raw_daily_2017.csv')
# df_0.head()

Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean
0,2017,1,6,13.6,9.3,11.8,92.7,69.2,83.5,0.23,...,3.5,2.6,188,43,10.9,0.2,1.0,1.2,1.16,9.0
1,2017,2,6,14.9,7.2,10.5,87.7,44.7,71.4,0.39,...,2.2,1.5,129,44,5.8,0.5,1.0,1.6,0.89,5.3
2,2017,3,6,13.9,3.2,9.0,97.0,60.6,81.9,0.24,...,1.0,0.1,349,78,3.3,0.2,0.6,0.9,0.93,5.8
3,2017,4,6,20.4,3.0,11.0,97.8,31.4,73.2,0.46,...,0.9,0.3,76,68,3.6,2.3,1.8,1.5,0.92,5.7
4,2017,5,6,20.9,4.0,12.3,95.5,39.8,71.0,0.53,...,1.5,0.8,253,54,7.1,2.5,2.2,1.8,0.98,6.6


In [6]:
df_0.shape

(365, 28)

### I. Drop columns not needed at this time

In [7]:
df_0.columns

Index(['year', 'day_of_year', 'station_number', 'air_temp_max', 'air_temp_min',
       'air_temp_mean', 'rh_max', 'rh_min', 'rh_mean', 'vpd_mean',
       'solar_rad_total', 'precip_total', '4_in_soil_temp_max',
       '4_in_soil_temp_min', '4_in_soil_temp_mean', '20_in_soil_temp_max',
       '20_in_soil_temp_min', '20_in_soil_temp_mean', 'wind_speed_mean',
       'wind_vector_magnitude', 'wind_vector_direction', 'wind_direction_std',
       'max_wind_speed', 'heat_units', 'eto_azmet', 'eto_p_m',
       'vapor_pressure_mean', 'dewpoint_mean'],
      dtype='object')

In [8]:
cols_to_drop = ['year', 'station_number', 'vpd_mean', 'solar_rad_total', '4_in_soil_temp_max', '4_in_soil_temp_min',
                '4_in_soil_temp_mean', '20_in_soil_temp_max', '20_in_soil_temp_min', '20_in_soil_temp_mean',
                'heat_units', 'eto_azmet', 'eto_p_m', 'vapor_pressure_mean', 'dewpoint_mean']

In [9]:
df_1 = df_0.drop(labels=cols_to_drop, axis=1)
df_1.head()

Unnamed: 0,day_of_year,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,precip_total,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed
0,1,13.6,9.3,11.8,92.7,69.2,83.5,1.78,3.5,2.6,188,43,10.9
1,2,14.9,7.2,10.5,87.7,44.7,71.4,0.0,2.2,1.5,129,44,5.8
2,3,13.9,3.2,9.0,97.0,60.6,81.9,0.0,1.0,0.1,349,78,3.3
3,4,20.4,3.0,11.0,97.8,31.4,73.2,0.0,0.9,0.3,76,68,3.6
4,5,20.9,4.0,12.3,95.5,39.8,71.0,0.0,1.5,0.8,253,54,7.1


### II. Slice dataframe for season dates only and add date column
* Planting Date: 2017-04-20, Day 110
* Last Day of Harvest: 2017-09-16, Day 259

In [11]:
df_2 = df_1.loc[(df_1.day_of_year >= 110) & (df_1.day_of_year <= 259)]
df_2.shape

(150, 13)

In [12]:
season_4_date_range = pd.date_range(start='2017-04-20', end='2017-09-16')

In [13]:
df_3 = df_2.copy()
df_3['date'] = season_4_date_range
# df_3.head()

Unnamed: 0,day_of_year,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,precip_total,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,date
109,110,33.3,14.1,23.5,45.0,5.1,18.2,0.0,1.9,0.8,233,60,8.2,2017-04-20
110,111,34.4,11.1,24.0,46.5,5.5,17.2,0.0,2.2,1.3,274,52,8.5,2017-04-21
111,112,35.5,14.5,25.0,32.5,6.4,15.6,0.0,1.6,0.5,178,66,5.2,2017-04-22
112,113,37.0,12.6,26.5,48.0,6.8,17.5,0.0,2.1,1.1,221,57,8.2,2017-04-23
113,114,33.7,14.9,25.7,33.1,8.3,17.5,0.0,3.0,2.0,214,46,9.9,2017-04-24


### III. Add Growing Degree Days (gdd)

In [15]:
df_4 = df_3.copy()
df_4['gdd'] = np.rint(np.cumsum((((df_4['air_temp_max'] + df_4['air_temp_min']) / 2) - 10)))
# df_4.head()

Unnamed: 0,day_of_year,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,precip_total,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,date,gdd
109,110,33.3,14.1,23.5,45.0,5.1,18.2,0.0,1.9,0.8,233,60,8.2,2017-04-20,14.0
110,111,34.4,11.1,24.0,46.5,5.5,17.2,0.0,2.2,1.3,274,52,8.5,2017-04-21,26.0
111,112,35.5,14.5,25.0,32.5,6.4,15.6,0.0,1.6,0.5,178,66,5.2,2017-04-22,41.0
112,113,37.0,12.6,26.5,48.0,6.8,17.5,0.0,2.1,1.1,221,57,8.2,2017-04-23,56.0
113,114,33.7,14.9,25.7,33.1,8.3,17.5,0.0,3.0,2.0,214,46,9.9,2017-04-24,71.0


### IV. Add cumulative precipitation

In [16]:
df_5 = df_4.copy()
df_5['cum_precip'] = round(np.cumsum(df_5.precip_total), 2)
# df_5.tail()

Unnamed: 0,day_of_year,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,precip_total,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,date,gdd,cum_precip
254,255,42.8,24.2,34.0,58.0,13.3,28.1,0.0,1.6,0.4,263,70,6.2,2017-09-12,2955.0,50.79
255,256,41.3,24.3,33.9,54.1,14.3,27.3,0.0,2.2,1.4,244,49,11.6,2017-09-13,2978.0,50.79
256,257,39.5,22.8,31.4,50.6,17.8,32.9,0.0,4.3,3.6,203,34,13.6,2017-09-14,2999.0,50.79
257,258,36.2,21.4,28.5,63.7,14.2,33.7,0.0,2.9,2.1,192,42,9.9,2017-09-15,3018.0,50.79
258,259,36.3,18.2,27.6,51.4,16.7,29.9,0.0,2.2,1.4,168,47,8.0,2017-09-16,3035.0,50.79


### V. Set date as index

In [17]:
df_6 = df_5.set_index(keys='date')
# df_6.head()

Unnamed: 0_level_0,day_of_year,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,precip_total,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,gdd,cum_precip
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017-04-20,110,33.3,14.1,23.5,45.0,5.1,18.2,0.0,1.9,0.8,233,60,8.2,14.0,0.0
2017-04-21,111,34.4,11.1,24.0,46.5,5.5,17.2,0.0,2.2,1.3,274,52,8.5,26.0,0.0
2017-04-22,112,35.5,14.5,25.0,32.5,6.4,15.6,0.0,1.6,0.5,178,66,5.2,41.0,0.0
2017-04-23,113,37.0,12.6,26.5,48.0,6.8,17.5,0.0,2.1,1.1,221,57,8.2,56.0,0.0
2017-04-24,114,33.7,14.9,25.7,33.1,8.3,17.5,0.0,3.0,2.0,214,46,9.9,71.0,0.0


### Convert to `.csv`

In [18]:
need_to_create_csv = True

if need_to_create_csv:

    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    output_filename = f'weather_station_season_4_{timestamp}.csv'.replace(':', '')
    df_6.to_csv(f'data/processed/{output_filename}')