## Season 4 Weather Data
* queried from geostreams using this [tutorial](https://terraref.github.io/tutorials/accessing-weather-data-in-r.html)
* date already in AZ / MST time

In [3]:
import datetime
import numpy as np
import pandas as pd

In [9]:
weather_df_0 = pd.read_csv('data/raw/season_4_weather_data.csv')
weather_df_0.head()

Unnamed: 0,source,wind_speed,source_file,eastward_wind,northward_wind,air_temperature,relative_humidity,precipitation_rate,surface_downwelling_shortwave_flux_in_air,surface_downwelling_photosynthetic_photon_flux_in_air,time
0,https://terraref.ncsa.illinois.edu/clowder/dat...,1.843458,58f9e4ea4f0ce95015666e02,0.898601,1.151159,303.100327,19.947991,0.0,675.676636,1813.429907,2017-04-19 12:10:00
1,https://terraref.ncsa.illinois.edu/clowder/dat...,1.508267,58f9e4ea4f0ce95015666e02,0.883875,-0.047253,303.1163,19.9371,0.0,678.462333,1816.936667,2017-04-19 12:15:00
2,https://terraref.ncsa.illinois.edu/clowder/dat...,1.8274,58f9e4ea4f0ce95015666e02,-1.456812,-0.571688,303.582367,19.490567,0.0,683.131,1823.636667,2017-04-19 12:20:00
3,https://terraref.ncsa.illinois.edu/clowder/dat...,2.347067,58f9e4ea4f0ce95015666e02,-1.178588,1.262081,303.520633,18.043,0.0,680.437,1821.41,2017-04-19 12:25:00
4,https://terraref.ncsa.illinois.edu/clowder/dat...,2.917095,58f9e4ea4f0ce95015666e02,-0.221016,2.690058,303.678953,17.383784,0.0,680.580068,1821.422297,2017-04-19 12:30:00


In [10]:
weather_df_0.columns

Index(['source', 'wind_speed', 'source_file', 'eastward_wind',
       'northward_wind', 'air_temperature', 'relative_humidity',
       'precipitation_rate', 'surface_downwelling_shortwave_flux_in_air',
       'surface_downwelling_photosynthetic_photon_flux_in_air', 'time'],
      dtype='object')

In [11]:
weather_df_0.shape

(4453, 11)

#### I. Drop and reorder columns 
* will drop `surface_downwelling` columns for now until I get more feedback

In [7]:
new_col_order = ['time', 'air_temperature', 'relative_humidity', 'precipitation_rate', 'eastward_wind', 'northward_wind']

In [12]:
weather_df_1 = pd.DataFrame(data=weather_df_0, index=weather_df_0.index, columns=new_col_order)
# weather_df_1.head()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind
0,2017-04-19 12:10:00,303.100327,19.947991,0.0,0.898601,1.151159
1,2017-04-19 12:15:00,303.1163,19.9371,0.0,0.883875,-0.047253
2,2017-04-19 12:20:00,303.582367,19.490567,0.0,-1.456812,-0.571688
3,2017-04-19 12:25:00,303.520633,18.043,0.0,-1.178588,1.262081
4,2017-04-19 12:30:00,303.678953,17.383784,0.0,-0.221016,2.690058


In [13]:
# weather_df_1.tail()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind
4448,2017-09-14 23:45:00,299.596367,28.869333,0.0,-1.137041,-1.255996
4449,2017-09-14 23:50:00,299.397567,31.006167,0.0,0.694553,-0.882575
4450,2017-09-14 23:55:00,298.822633,36.4865,0.0,1.831515,-0.457969
4451,2017-09-15 00:00:00,298.543967,37.805967,0.0,1.731793,-0.628593
4452,2017-09-14 15:47:36,304.38697,34.02855,0.0,-1.465922,-2.947309


#### II. Add `date` column with no time values

In [28]:
time_values = weather_df_1.time.values
just_dates = []

for t in time_values:
    
    just_date = t[:10]
    just_dates.append(just_date)

# print(weather_df_1.shape[0])
# print(len(time_values))
# print(len(just_dates))

4453
4453
4453


In [30]:
weather_df_1['date'] = just_dates
# weather_df_1.tail()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date
4448,2017-09-14 23:45:00,299.596367,28.869333,0.0,-1.137041,-1.255996,2017-09-14
4449,2017-09-14 23:50:00,299.397567,31.006167,0.0,0.694553,-0.882575,2017-09-14
4450,2017-09-14 23:55:00,298.822633,36.4865,0.0,1.831515,-0.457969,2017-09-14
4451,2017-09-15 00:00:00,298.543967,37.805967,0.0,1.731793,-0.628593,2017-09-15
4452,2017-09-14 15:47:36,304.38697,34.02855,0.0,-1.465922,-2.947309,2017-09-14


#### II. Convert temperature to Celsius
* can round to nearest int (or not round at all) as needed

In [31]:
weather_df_2 = weather_df_1.copy()

In [32]:
weather_df_2['air_temp_C'] = round((weather_df_2['air_temperature'] - 273.15), 2)
# weather_df_2.head()

Unnamed: 0,time,air_temperature,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date,air_temp_C
0,2017-04-19 12:10:00,303.100327,19.947991,0.0,0.898601,1.151159,2017-04-19,29.95
1,2017-04-19 12:15:00,303.1163,19.9371,0.0,0.883875,-0.047253,2017-04-19,29.97
2,2017-04-19 12:20:00,303.582367,19.490567,0.0,-1.456812,-0.571688,2017-04-19,30.43
3,2017-04-19 12:25:00,303.520633,18.043,0.0,-1.178588,1.262081,2017-04-19,30.37
4,2017-04-19 12:30:00,303.678953,17.383784,0.0,-0.221016,2.690058,2017-04-19,30.53


#### III. Add max, min, and mean values
* Temperature: Drop Kelvin column
* Relative humidity
* Precipitation rate

In [35]:
weather_df_3 = weather_df_2.drop(labels='air_temperature', axis=1)
# weather_df_3.tail()

Unnamed: 0,time,relative_humidity,precipitation_rate,eastward_wind,northward_wind,date,air_temp_C
4448,2017-09-14 23:45:00,28.869333,0.0,-1.137041,-1.255996,2017-09-14,26.45
4449,2017-09-14 23:50:00,31.006167,0.0,0.694553,-0.882575,2017-09-14,26.25
4450,2017-09-14 23:55:00,36.4865,0.0,1.831515,-0.457969,2017-09-14,25.67
4451,2017-09-15 00:00:00,37.805967,0.0,1.731793,-0.628593,2017-09-15,25.39
4452,2017-09-14 15:47:36,34.02855,0.0,-1.465922,-2.947309,2017-09-14,31.24


In [40]:
# weather_df_3.loc[weather_df_3.date == '2017-04-19']['air_temp_C'].min()

19.78

Air temperatures

In [57]:
min_temp = weather_df_3.groupby('date')['air_temp_C'].min()
max_temp = weather_df_3.groupby('date')['air_temp_C'].max()
mean_temp = weather_df_3.groupby('date')['air_temp_C'].mean()

Relative humidity

In [58]:
min_relative_humidity = weather_df_3.groupby('date')['relative_humidity'].min()
max_relative_humidity = weather_df_3.groupby('date')['relative_humidity'].max()
mean_relative_humidity = weather_df_3.groupby('date')['relative_humidity'].mean()

Precipitation rate

In [59]:
min_precip_rate = weather_df_3.groupby('date')['precipitation_rate'].min()
max_precip_rate = weather_df_3.groupby('date')['precipitation_rate'].max()
mean_precip_rate = weather_df_3.groupby('date')['precipitation_rate'].mean()

In [65]:
daily_weather_values = pd.DataFrame({'min_temp': min_temp, 'max_temp': max_temp, 'mean_temp': mean_temp,
                                    'min_relative_humidity': min_relative_humidity, 'max_relative_humidity': max_relative_humidity,
                                    'mean_relative_humidity': mean_relative_humidity, 'min_precip_rate': min_precip_rate,
                                    'max_precip_rate': max_precip_rate, 'mean_precip_rate': mean_precip_rate})
# daily_weather_values.head()

Unnamed: 0_level_0,min_temp,max_temp,mean_temp,min_relative_humidity,max_relative_humidity,mean_relative_humidity,min_precip_rate,max_precip_rate,mean_precip_rate
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-04-19,19.78,31.93,25.448732,7.7329,24.571511,15.610601,0.0,0.0,0.0
2017-04-20,15.28,33.41,25.046047,4.179077,29.93733,14.417725,0.0,0.0,0.0
2017-04-21,18.67,19.41,18.985,17.2296,19.3904,18.265567,0.0,0.0,0.0
2017-04-23,21.33,37.66,30.450351,5.65027,23.4216,12.054445,0.0,0.0,0.0
2017-04-24,20.27,31.16,26.177907,8.464933,27.061567,17.2316,0.0,0.0,0.0


In [67]:
daily_weather_values_1 = daily_weather_values.sort_index()
# daily_weather_values_1.tail()

Unnamed: 0_level_0,min_temp,max_temp,mean_temp,min_relative_humidity,max_relative_humidity,mean_relative_humidity,min_precip_rate,max_precip_rate,mean_precip_rate
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-09-11,26.37,37.83,31.678209,21.921784,46.254773,32.657997,0.0,0.0,0.0
2017-09-12,30.62,31.5,31.17,31.0955,33.271467,31.778879,0.0,0.0,0.0
2017-09-13,27.91,40.98,32.343333,15.874467,47.938767,35.913253,0.0,0.0,0.0
2017-09-14,25.62,36.8,28.997241,22.00645,44.430333,32.921517,0.0,0.0,0.0
2017-09-15,25.39,25.39,25.39,37.805967,37.805967,37.805967,0.0,0.0,0.0


### 

### Final Steps

In [68]:
need_to_create_csv = True

if need_to_create_csv:

    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    output_filename = f'daily_weather_values_{timestamp}.csv'.replace(':', '')
    daily_weather_values_1.to_csv(f'data/processed/{output_filename}')