## Season 4 Weather Data
* queried from geostreams using this [tutorial](https://terraref.github.io/tutorials/accessing-weather-data-in-r.html)
* date already in AZ / MST time

#### Notes
* missing dates
    * recalculate GDD
* dates with some but not all variable values
* Precipitation values needed:
    * Daily max rate
    * Daily total
    * Seasonal Cumulative

In [1]:
import datetime
import numpy as np
import pandas as pd

In [47]:
weather_df_0 = pd.read_csv('data/raw/season_4_weather_data_3.csv')
weather_df_0.head()

Unnamed: 0,source,wind_speed,source_file,eastward_wind,northward_wind,air_temperature,relative_humidity,precipitation_rate,surface_downwelling_shortwave_flux_in_air,surface_downwelling_photosynthetic_photon_flux_in_air,time
0,https://terraref.ncsa.illinois.edu/clowder/dat...,1.591714,58fb33f54f0ce95015796a8d,0.54961,-1.492351,293.041143,17.516286,0.0,0.193057,0.0,2017-04-20 23:15:00
1,https://terraref.ncsa.illinois.edu/clowder/dat...,0.967067,58fb33f54f0ce95015796a8d,0.502316,-0.785215,293.096733,17.3036,0.0,0.182517,0.0,2017-04-20 23:20:00
2,https://terraref.ncsa.illinois.edu/clowder/dat...,0.405,58fb33f54f0ce95015796a8d,0.253301,-0.283427,292.950233,17.726467,0.0,0.183293,0.0,2017-04-20 23:25:00
3,https://terraref.ncsa.illinois.edu/clowder/dat...,0.696633,58fb33f54f0ce95015796a8d,0.18405,-0.660492,292.914833,17.7782,0.0,0.182517,0.0,2017-04-20 23:30:00
4,https://terraref.ncsa.illinois.edu/clowder/dat...,1.431351,58fb33f54f0ce95015796a8d,0.375217,-1.366798,292.865135,17.889493,0.0,0.206236,0.0,2017-04-20 23:35:00


In [48]:
print(weather_df_0.time.min())
print(weather_df_0.time.max())

2017-04-20 02:34:25
2017-09-19 21:52:50


In [49]:
weather_df_0.columns

Index(['source', 'wind_speed', 'source_file', 'eastward_wind',
       'northward_wind', 'air_temperature', 'relative_humidity',
       'precipitation_rate', 'surface_downwelling_shortwave_flux_in_air',
       'surface_downwelling_photosynthetic_photon_flux_in_air', 'time'],
      dtype='object')

In [50]:
weather_df_0.shape

(4390, 11)

### I. Drop and reorder columns 
* Drop `surface_downwelling` columns for now

In [51]:
new_col_order = ['time', 'air_temperature', 'relative_humidity', 'precipitation_rate', 'eastward_wind', 'northward_wind']

In [52]:
weather_df_1 = pd.DataFrame(data=weather_df_0, index=weather_df_0.index, columns=new_col_order)
weather_df_1.head()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind
0,2017-04-20 23:15:00,293.041143,17.516286,0.0,0.54961,-1.492351
1,2017-04-20 23:20:00,293.096733,17.3036,0.0,0.502316,-0.785215
2,2017-04-20 23:25:00,292.950233,17.726467,0.0,0.253301,-0.283427
3,2017-04-20 23:30:00,292.914833,17.7782,0.0,0.18405,-0.660492
4,2017-04-20 23:35:00,292.865135,17.889493,0.0,0.375217,-1.366798


In [None]:
# weather_df_1.tail()

### II. Add `date` column with no time values
* convert to datetime object

In [53]:
time_values = weather_df_1.time.values
just_dates = []

for t in time_values:
    
    just_date = t[:10]
    just_dates.append(just_date)

# print(weather_df_1.shape[0])
# print(len(time_values))
# print(len(just_dates))

In [54]:
iso_format_dates = pd.to_datetime(just_dates)

In [55]:
weather_df_1['date'] = iso_format_dates
# weather_df_1.tail()

In [58]:
weather_df_1.tail()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date
4385,2017-09-19 18:35:00,306.1722,10.742233,0.0,-1.79875,-3.125964,2017-09-19
4386,2017-09-19 18:40:00,305.798933,10.222067,0.0,-1.588845,-2.300176,2017-09-19
4387,2017-09-19 18:45:00,305.258277,10.492432,0.0,-1.111043,-2.198304,2017-09-19
4388,2017-09-19 18:50:00,297.782304,35.341276,0.0,0.431963,-1.448367,2017-09-19
4389,2017-09-19 21:52:50,299.256859,25.094176,0.0,-0.494572,-1.674386,2017-09-19


In [17]:
weather_df_1.dtypes

time                          object
air_temperature              float64
relative_humidity            float64
precipitation_rate           float64
eastward_wind                float64
northward_wind               float64
date                  datetime64[ns]
dtype: object

In [67]:
# weather_df_1.loc[weather_df_1.date == '2017-09-15']
weather_df_1.loc[weather_df_1.date == '2017-09-16']

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date


#### Since the last day of weather data is on 2017-09-15, slice the days not needed after that date

In [71]:
weather_df_2 = weather_df_1.loc[weather_df_1.date < '2017-09-16']

In [72]:
print(weather_df_2.date.min())
print(weather_df_2.date.max())

2017-04-20 00:00:00
2017-09-15 00:00:00


### III. Convert temperature to Celsius
* can round to nearest int (or not round at all) as needed

In [73]:
weather_df_3 = weather_df_2.copy()

In [74]:
weather_df_3['air_temp_C'] = round((weather_df_3['air_temperature'] - 273.15), 2)
weather_df_3.head()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date,air_temp_C
0,2017-04-20 23:15:00,293.041143,17.516286,0.0,0.54961,-1.492351,2017-04-20,19.89
1,2017-04-20 23:20:00,293.096733,17.3036,0.0,0.502316,-0.785215,2017-04-20,19.95
2,2017-04-20 23:25:00,292.950233,17.726467,0.0,0.253301,-0.283427,2017-04-20,19.8
3,2017-04-20 23:30:00,292.914833,17.7782,0.0,0.18405,-0.660492,2017-04-20,19.76
4,2017-04-20 23:35:00,292.865135,17.889493,0.0,0.375217,-1.366798,2017-04-20,19.72


### IV. Find and Fill Missing Values
* Planting Date: 2017-04-20
* Last Harvest Date: 2017-09-16

In [26]:
season_4_date_range = pd.date_range(start='2017-04-20', end='2017-09-16') 

Compare unique dates in table to date range

In [75]:
len(season_4_date_range)

150

In [76]:
weather_df_3.date.nunique()

99

In [77]:
print(weather_df_3.date.min())
print(weather_df_3.date.max())

2017-04-20 00:00:00
2017-09-15 00:00:00


In [78]:
print(f'Missing days in dataset: {len(season_4_date_range) - weather_df_3.date.nunique()}')

Missing days in dataset: 51


In [82]:
missing_days = season_4_date_range.difference(weather_df_3.date)

In [83]:
len(missing_days)

51

In [84]:
missing_days

DatetimeIndex(['2017-04-22', '2017-04-29', '2017-04-30', '2017-05-01',
               '2017-05-06', '2017-05-10', '2017-05-11', '2017-05-12',
               '2017-05-13', '2017-05-20', '2017-05-24', '2017-05-25',
               '2017-05-26', '2017-05-27', '2017-06-03', '2017-06-10',
               '2017-06-17', '2017-06-24', '2017-07-01', '2017-07-02',
               '2017-07-03', '2017-07-04', '2017-07-05', '2017-07-06',
               '2017-07-07', '2017-07-08', '2017-07-09', '2017-07-10',
               '2017-07-13', '2017-07-14', '2017-07-15', '2017-07-22',
               '2017-07-29', '2017-08-05', '2017-08-06', '2017-08-07',
               '2017-08-12', '2017-08-13', '2017-08-17', '2017-08-18',
               '2017-08-19', '2017-08-26', '2017-08-27', '2017-08-28',
               '2017-08-29', '2017-08-30', '2017-08-31', '2017-09-01',
               '2017-09-02', '2017-09-09', '2017-09-16'],
              dtype='datetime64[ns]', freq=None)

#### Use data from Weather Station to populate these dates

In [None]:
weather_station_df = pd.read_csv('')

### IV. Calculate max, min, and mean values
* Temperature: Drop Kelvin column
* Relative humidity
* Eastward Wind
* Northward Wind
* Round values to 2 decimal points

In [20]:
weather_df_3 = weather_df_2.drop(labels='air_temperature', axis=1)
# weather_df_3.tail()

Air temperatures

In [21]:
min_temp = weather_df_3.groupby('date')['air_temp_C'].min()
max_temp = weather_df_3.groupby('date')['air_temp_C'].max()
mean_temp = round(weather_df_3.groupby('date')['air_temp_C'].mean(), 2)

Relative humidity

In [23]:
min_relative_humidity = round(weather_df_3.groupby('date')['relative_humidity'].min(), 2)
max_relative_humidity = round(weather_df_3.groupby('date')['relative_humidity'].max(), 2)
mean_relative_humidity = round(weather_df_3.groupby('date')['relative_humidity'].mean(), 2)

Eastward Wind

In [24]:
min_east_wind = round(weather_df_3.groupby('date')['eastward_wind'].min(), 2)
max_east_wind = round(weather_df_3.groupby('date')['eastward_wind'].max(), 2)
mean_east_wind = round(weather_df_3.groupby('date')['eastward_wind'].mean(), 2)

Northward Wind

In [25]:
min_north_wind = round(weather_df_3.groupby('date')['northward_wind'].min(), 2)
max_north_wind = round(weather_df_3.groupby('date')['northward_wind'].max(), 2)
mean_north_wind = round(weather_df_3.groupby('date')['northward_wind'].mean(), 2)

### V. Precipitation
* daily max precipitation rate
* daily total precipitation 
* cumulative precipitation

In [None]:
max_precip_rate = round(weather_df_3.groupby('date')['precipitation_rate'].max(), 2)

### V. Add GDD

In [None]:
daily_weather_values_2 = daily_weather_values_1.copy()

In [None]:
daily_weather_values_2['gdd'] = np.rint(np.cumsum((((daily_weather_values_2['max_temp'] + daily_weather_values_2['min_temp']) / 2) - 10)))

In [None]:
# daily_weather_values_2[15:20]

### VI. Add min, max, and mean wind values
* eastward wind
* northward wind

In [None]:
wind_df = pd.DataFrame({'min_east_wind': min_east_wind, 'max_east_wind': max_east_wind, 'mean_east_wind': mean_east_wind,
                       'min_north_wind': min_north_wind, 'max_north_wind': max_north_wind, 'mean_north_wind': mean_north_wind})

In [None]:
wind_df.head()

In [None]:
wind_df_1 = wind_df.sort_index(axis=0)
# wind_df_1.shape

In [None]:
# daily_weather_values_2.shape

In [None]:
daily_weather_values_3 = pd.merge(daily_weather_values_2, wind_df_1, how='outer', left_on=daily_weather_values_2.index,
                                 right_on=wind_df_1.index)
# daily_weather_values_3.head()

In [None]:
daily_weather_values_3.shape

### New table with Daily Values

In [None]:
daily_weather_values = pd.DataFrame({'min_temp': min_temp, 'max_temp': max_temp, 'mean_temp': mean_temp,
                                    'min_relative_humidity': min_relative_humidity, 'max_relative_humidity': max_relative_humidity,
                                    'mean_relative_humidity': mean_relative_humidity, 'min_precip_rate': min_precip_rate,
                                    'max_precip_rate': max_precip_rate, 'mean_precip_rate': mean_precip_rate})
# daily_weather_values.head()

In [None]:
daily_weather_values_1 = daily_weather_values.sort_index()
# daily_weather_values_1.tail()

### Final Steps

In [None]:
need_to_create_csv = False

if need_to_create_csv:

    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    output_filename = f'daily_weather_values_{timestamp}.csv'.replace(':', '')
    daily_weather_values_3.to_csv(f'data/processed/{output_filename}')