## Season 6 Weather Data
* queried from geostreams using this [tutorial](https://terraref.github.io/tutorials/accessing-weather-data-in-r.html)
* date already in AZ / MST time

#### Notes
* missing dates
* dates with some but not all variable values

In [2]:
import datetime
import numpy as np
import pandas as pd

In [5]:
weather_df_0 = pd.read_csv('data/raw/season_6_weather_data.csv')
weather_df_0.head()

Unnamed: 0,source,wind_speed,source_file,eastward_wind,northward_wind,air_temperature,relative_humidity,precipitation_rate,surface_downwelling_shortwave_flux_in_air,surface_downwelling_photosynthetic_photon_flux_in_air,time
0,https://terraref.ncsa.illinois.edu/clowder/dat...,0.656267,5ae3dfa74f0c467c1670a0e1,0.599756,-0.233053,290.105233,14.690333,0.0,0.010873,0.0,2018-04-18 22:05:00
1,https://terraref.ncsa.illinois.edu/clowder/dat...,1.389333,5ae3dfa74f0c467c1670a0e1,1.329997,-0.316545,289.968,15.7738,0.0,0.04427,0.0,2018-04-18 22:10:00
2,https://terraref.ncsa.illinois.edu/clowder/dat...,0.977123,5ae3dfa74f0c467c1670a0ee,-0.409435,0.435499,290.044447,18.701601,0.0,168.321262,434.102123,2018-04-18 22:15:00
3,https://terraref.ncsa.illinois.edu/clowder/dat...,1.233767,5ae3dfa74f0c467c1670a0ee,0.06458,-1.225979,289.220977,20.263907,0.0,0.013005,0.0,2018-04-18 23:20:00
4,https://terraref.ncsa.illinois.edu/clowder/dat...,1.390433,5ae3dfa74f0c467c1670a0ee,0.068203,-1.376049,288.6938,21.982733,0.0,0.041163,0.0,2018-04-18 23:25:00


In [6]:
weather_df_0.columns

Index(['source', 'wind_speed', 'source_file', 'eastward_wind',
       'northward_wind', 'air_temperature', 'relative_humidity',
       'precipitation_rate', 'surface_downwelling_shortwave_flux_in_air',
       'surface_downwelling_photosynthetic_photon_flux_in_air', 'time'],
      dtype='object')

In [7]:
weather_df_0.shape

(36485, 11)

### I. Drop and reorder columns 
* will drop `surface_downwelling` columns for now until I get more feedback

In [8]:
new_col_order = ['time', 'air_temperature', 'relative_humidity', 'precipitation_rate', 'eastward_wind', 'northward_wind']

In [9]:
weather_df_1 = pd.DataFrame(data=weather_df_0, index=weather_df_0.index, columns=new_col_order)
# weather_df_1.head()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind
0,2018-04-18 22:05:00,290.105233,14.690333,0.0,0.599756,-0.233053
1,2018-04-18 22:10:00,289.968,15.7738,0.0,1.329997,-0.316545
2,2018-04-18 22:15:00,290.044447,18.701601,0.0,-0.409435,0.435499
3,2018-04-18 23:20:00,289.220977,20.263907,0.0,0.06458,-1.225979
4,2018-04-18 23:25:00,288.6938,21.982733,0.0,0.068203,-1.376049


In [11]:
# weather_df_1.tail()

### II. Add `date` column with no time values

In [12]:
time_values = weather_df_1.time.values
just_dates = []

for t in time_values:
    
    just_date = t[:10]
    just_dates.append(just_date)

# print(weather_df_1.shape[0])
# print(len(time_values))
# print(len(just_dates))

36485
36485
36485


In [14]:
weather_df_1['date'] = just_dates
weather_df_1.tail()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date
36480,2018-08-02 17:10:00,303.294156,43.538212,0.0,-0.838302,-1.795093,2018-08-02
36481,2018-08-02 17:10:00,303.294156,43.538212,0.0,-0.838302,-1.795093,2018-08-02
36482,2018-08-02 17:10:00,303.294156,43.538212,0.0,-0.838302,-1.795093,2018-08-02
36483,2018-08-02 17:10:00,303.294156,43.538212,0.0,-0.838302,-1.795093,2018-08-02
36484,2018-08-02 17:10:00,303.294156,43.538212,0.0,-0.838302,-1.795093,2018-08-02


### III. Convert temperature to Celsius
* can round to nearest int (or not round at all) as needed

In [15]:
weather_df_2 = weather_df_1.copy()

In [16]:
weather_df_2['air_temp_C'] = round((weather_df_2['air_temperature'] - 273.15), 2)
weather_df_2.head()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date,air_temp_C
0,2018-04-18 22:05:00,290.105233,14.690333,0.0,0.599756,-0.233053,2018-04-18,16.96
1,2018-04-18 22:10:00,289.968,15.7738,0.0,1.329997,-0.316545,2018-04-18,16.82
2,2018-04-18 22:15:00,290.044447,18.701601,0.0,-0.409435,0.435499,2018-04-18,16.89
3,2018-04-18 23:20:00,289.220977,20.263907,0.0,0.06458,-1.225979,2018-04-18,16.07
4,2018-04-18 23:25:00,288.6938,21.982733,0.0,0.068203,-1.376049,2018-04-18,15.54


### IV. Add max, min, and mean values
* Temperature: Drop Kelvin column
* Relative humidity
* Precipitation rate
* Eastward wind
* Northward wind

In [17]:
weather_df_3 = weather_df_2.drop(labels='air_temperature', axis=1)
# weather_df_3.tail()

Unnamed: 0,time,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date,air_temp_C
36480,2018-08-02 17:10:00,43.538212,0.0,-0.838302,-1.795093,2018-08-02,30.14
36481,2018-08-02 17:10:00,43.538212,0.0,-0.838302,-1.795093,2018-08-02,30.14
36482,2018-08-02 17:10:00,43.538212,0.0,-0.838302,-1.795093,2018-08-02,30.14
36483,2018-08-02 17:10:00,43.538212,0.0,-0.838302,-1.795093,2018-08-02,30.14
36484,2018-08-02 17:10:00,43.538212,0.0,-0.838302,-1.795093,2018-08-02,30.14


Air temperatures

In [18]:
min_temp = weather_df_3.groupby('date')['air_temp_C'].min()
max_temp = weather_df_3.groupby('date')['air_temp_C'].max()
mean_temp = weather_df_3.groupby('date')['air_temp_C'].mean()

Relative humidity

In [19]:
min_relative_humidity = weather_df_3.groupby('date')['relative_humidity'].min()
max_relative_humidity = weather_df_3.groupby('date')['relative_humidity'].max()
mean_relative_humidity = weather_df_3.groupby('date')['relative_humidity'].mean()

Precipitation rate

In [20]:
min_precip_rate = weather_df_3.groupby('date')['precipitation_rate'].min()
max_precip_rate = weather_df_3.groupby('date')['precipitation_rate'].max()
mean_precip_rate = weather_df_3.groupby('date')['precipitation_rate'].mean()

Northward wind

In [21]:
min_north_wind = weather_df_3.groupby('date')['northward_wind'].min()
max_north_wind = weather_df_3.groupby('date')['northward_wind'].max()
mean_north_wind = weather_df_3.groupby('date')['northward_wind'].mean()

Eastward wind

In [22]:
min_east_wind = weather_df_3.groupby('date')['eastward_wind'].min()
max_east_wind = weather_df_3.groupby('date')['eastward_wind'].max()
mean_east_wind = weather_df_3.groupby('date')['eastward_wind'].mean()

#### Create new dataframe with new values

In [23]:
daily_weather_values = pd.DataFrame({'min_temp': min_temp, 'max_temp': max_temp, 'mean_temp': mean_temp,
                                    'min_relative_humidity': min_relative_humidity, 'max_relative_humidity': max_relative_humidity,
                                    'mean_relative_humidity': mean_relative_humidity, 'min_precip_rate': min_precip_rate,
                                    'max_precip_rate': max_precip_rate, 'mean_precip_rate': mean_precip_rate})
daily_weather_values.head()

Unnamed: 0_level_0,min_temp,max_temp,mean_temp,min_relative_humidity,max_relative_humidity,mean_relative_humidity,min_precip_rate,max_precip_rate,mean_precip_rate
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-04-18,14.42,22.46,16.098333,11.702396,23.6988,20.092809,0.0,0.0,0.0
2018-04-19,14.94,20.35,17.011905,18.186243,30.9112,25.800597,0.0,0.0,0.0
2018-04-20,15.7,15.7,15.7,30.504433,30.504433,30.504433,0.0,0.0,0.0
2018-04-22,18.59,31.52,26.610571,10.341967,31.280344,14.259545,0.0,0.0,0.0
2018-04-23,19.69,28.59,23.213171,11.042233,29.8094,19.12405,0.0,0.0,0.0


In [24]:
daily_weather_values_1 = daily_weather_values.sort_index()
daily_weather_values_1.tail()

Unnamed: 0_level_0,min_temp,max_temp,mean_temp,min_relative_humidity,max_relative_humidity,mean_relative_humidity,min_precip_rate,max_precip_rate,mean_precip_rate
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-07-29,25.97,36.38,30.973393,29.232762,80.706667,51.977446,0.0,0.254,0.004536
2018-07-30,27.65,38.34,31.440476,30.470972,59.8522,46.333852,0.0,0.0,0.0
2018-07-31,28.46,41.56,35.763731,19.6115,57.098533,33.659175,0.0,0.0,0.0
2018-08-01,32.25,43.64,38.199437,16.258933,48.735874,27.809735,0.0,0.0,0.0
2018-08-02,30.14,43.43,38.838834,19.227533,43.538212,26.78739,0.0,0.0,0.0


### V. Add GDD

In [25]:
daily_weather_values_2 = daily_weather_values_1.copy()

In [26]:
daily_weather_values_2['gdd'] = np.rint(np.cumsum((((daily_weather_values_2['max_temp'] + daily_weather_values_2['min_temp']) / 2) - 10)))

Check that no daily gdd values were below 0

In [34]:
# there should be no output

for i,r in daily_weather_values_1.iterrows():
    
    if ((((r['min_temp'] + r['max_temp']) / 2)) - 10) < 0:
        print(i)
        print(r)

In [35]:
daily_weather_values_2.head()

Unnamed: 0_level_0,min_temp,max_temp,mean_temp,min_relative_humidity,max_relative_humidity,mean_relative_humidity,min_precip_rate,max_precip_rate,mean_precip_rate,gdd
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-04-18,14.42,22.46,16.098333,11.702396,23.6988,20.092809,0.0,0.0,0.0,8.0
2018-04-19,14.94,20.35,17.011905,18.186243,30.9112,25.800597,0.0,0.0,0.0,16.0
2018-04-20,15.7,15.7,15.7,30.504433,30.504433,30.504433,0.0,0.0,0.0,22.0
2018-04-22,18.59,31.52,26.610571,10.341967,31.280344,14.259545,0.0,0.0,0.0,37.0
2018-04-23,19.69,28.59,23.213171,11.042233,29.8094,19.12405,0.0,0.0,0.0,51.0


### Final Steps

In [36]:
need_to_create_csv = True

if need_to_create_csv:

    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    output_filename = f'daily_weather_values_season_6_{timestamp}.csv'.replace(':', '')
    daily_weather_values_2.to_csv(f'data/processed/{output_filename}')