In [1]:
from noaa_sdk import noaa
import requests
import numpy as np
import pandas as pd
from datetime import datetime,timedelta
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
response = requests.get('https://api.weather.gov/gridpoints/PHI/58,91')
j=response.json()
properties=j['properties']

In [89]:
def init_temp_df():
    temp_dicts=properties['temperature']
    temp_lists=temp_dicts['values']
    df_temp=pd.DataFrame(temp_lists)
    df_temp['temp_f']=df_temp['value']*9/5.+32
    df_temp['timestamp']=pd.to_datetime(df_temp['validTime'].map(lambda x: str(x).split('+')[0]))
    # set to local time from UTC (default)
    df_temp['timestamp']-=timedelta(hours=4)
    df_temp['msr_duration']=df_temp['validTime'].map(lambda x: int(str(x).split('PT')[-1].split('H')[0]))
    df_temp.drop(['validTime','value'],axis=1,inplace=True)
    df_temp = df_temp[['timestamp','msr_duration','temp_f']]
    return df_temp

In [90]:
df_temp = init_temp_df()

In [83]:
def expand_dataframe(df):
    """For any rows where the duration of the measurement
    is over more than one hour, make a new row duplicate row 
    (but with incremented timestamp) for each hour in the duration"""
    dfc = df.copy() # makes a copy so the original is not affected
    mask = dfc['msr_duration'] > 1
    n_rows_multiple_durations = len(dfc[mask]) # variable to store the number of rows where the measurement stands for multiple hours
    print(f"There are originally {n_rows_multiple_durations} multiple rows")
    counter = 0
    while n_rows_multiple_durations > 0:
        # Make a new dataframe that is just the rows with multiples
        new_dfc = dfc.loc[mask].copy()
        # set all of the masked rows in the original dataframe to 1 since we will combine back with this dfc in the end
        dfc['msr_duration'][mask]=1
        # In this new dataframe, increment the timestamp by 1 hour and decrement the msr_duration by 1 hour
        new_dfc['timestamp'] += timedelta(hours=1)
        new_dfc['msr_duration']-=1
        # concatenate this new dfc that we have transformed back with the original, and overwrite the original
        dfc = pd.concat([dfc,new_dfc])
        # calculate the new mask of multiples on this concatenated array
        mask = dfc['msr_duration'] > 1
        n_rows_multiple_durations = len(dfc[mask])
        counter+=1
    # Finally sort by timestamp so the rows fall back in order
    dfc = dfc.sort_values('timestamp')
    # set the index to a proper increasing numerical index without duplicates
    dfc.index = list(range(len(dfc)))
    return dfc

In [91]:
df_temp_expanded = expand_dataframe(df_temp)

There are originally 14 multiple rows


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [92]:
def init_hum_df():
    hum_dicts=properties['relativeHumidity']
    hum_lists=hum_dicts['values']
    df_hum=pd.DataFrame(hum_lists)
    df_hum['relativeHumidity'] = df_hum['value']
    df_hum['timestamp']=pd.to_datetime(df_hum['validTime'].map(lambda x: str(x).split('+')[0]))
    df_hum['timestamp']-=timedelta(hours=4)
    df_hum['msr_duration']=df_hum['validTime'].map(lambda x: int(str(x).split('PT')[-1].split('H')[0]))
    df_hum.drop(['validTime','value'],axis=1,inplace=True)
    df_hum = df_hum[['timestamp','msr_duration','relativeHumidity']]
    return df_hum

In [93]:
df_hum = init_hum_df()

In [94]:
df_hum_expanded = expand_dataframe(df_hum)

There are originally 16 multiple rows


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
# OK great news the timestamp column is now identical in both dataframes from temp and humidity, so I can
# do a simple merge and will get all of the data. 

In [95]:
df_final = pd.merge(left=df_temp_expanded, right=df_hum_expanded, left_on='timestamp', right_on='timestamp')
df_final = df_final[['timestamp','temp_f','relativeHumidity']]

In [97]:
# Only show future times
now = datetime.now()
future_mask = df_final['timestamp']>now
df_final[future_mask]

Unnamed: 0,timestamp,temp_f,relativeHumidity
9,2020-06-27 16:00:00,85.0,55
10,2020-06-27 17:00:00,83.0,61
11,2020-06-27 18:00:00,82.0,63
12,2020-06-27 19:00:00,81.0,67
13,2020-06-27 20:00:00,80.0,69
...,...,...,...
177,2020-07-04 16:00:00,87.0,46
178,2020-07-04 17:00:00,85.0,51
179,2020-07-04 18:00:00,84.0,53
180,2020-07-04 19:00:00,82.0,56


In [None]:
# OK, so this dataframe is ready to be masked based on conditions

In [None]:
# fig=plt.figure()
# ax=fig.add_subplot(2,1,1)
fig, axs = plt.subplots(2, 1, sharex=True)
fig.subplots_adjust(hspace=0.5)

ax=axs[0]
ax.plot(merged_inner['timestamp'],merged_inner['temp_f'],color='r')
ax.set_ylabel("Temperature (F)")
# now plot humidity on second axis
ax_hum = axs[1]
ax_hum.plot(merged_inner['timestamp'],merged_inner['relative_humidity'],color='g')
ax_hum.set_ylabel('% relative humidity')
# Rotate tick marks on x-axis
_=plt.setp(ax_hum.get_xticklabels(), rotation=45)

In [None]:
# Now screen for values below certain thresholds in both temp and humidity
hum_threshold=70 # can't be higher than this
temp_threshold_f=75 # can't be higher than this
temp_mask = merged_inner['temp_f']<temp_threshold_f
hum_mask = merged_inner['relative_humidity']<hum_threshold
good_condis_mask = (temp_mask) & (hum_mask)
merged_inner['condi']=good_condis_mask

In [None]:
fig, axs = plt.subplots(3, 1, sharex=True)
fig.subplots_adjust(hspace=0)

ax=axs[0]
ax.plot(merged_inner['timestamp'],merged_inner['temp_f'],color='r')
ax.set_ylabel("Temp (F)")
# now plot humidity on second axis
ax_hum = axs[1]
ax_hum.plot(merged_inner['timestamp'],merged_inner['relative_humidity'],color='g')
ax_hum.set_ylabel('% rel. hum.')
# now plot the condis
ax_condi=axs[2]
ax_condi.plot(merged_inner['timestamp'],merged_inner['condi'],color='b')
ax_condi.set_ylabel("Condis")
# Rotate tick marks on x-axis
_=plt.setp(ax_condi.get_xticklabels(), rotation=45)
title=fig.suptitle(f"Condis forecast\nT<{temp_threshold_f} F, rel. hum. < {hum_threshold} % ")

In [None]:
best_times = merged_inner['timestamp'][good_condis_mask]
best_times

First convert to local time so it will make it easier to compare with the graphical forecast:

In [None]:
dtimes=merged_inner['timestamp']
dtimes

In [None]:
merged_inner['local_timestamp']=dtimes.map(lambda x: x-timedelta(hours=4))

In [None]:
final_df = merged_inner[['local_timestamp','temp_f','relative_humidity']]
final_df[21:30]

In [None]:
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(df['timestamp'],df['temp_f'])
ax.set_xlabel("Date")
ax.set_ylabel("Temperature (F)")
# Rotate tick marks on x-axis
_=plt.setp(ax.get_xticklabels(), rotation=45)