In [1]:
from fastcore.all import *
import pandas as pd
import numpy as np
import datetime

In [2]:
def parse(file):
    df = pd.read_csv(file, usecols=[0,1,2], skiprows=1, 
                      header=None, parse_dates=True, index_col=0,
                      names=['time', 'temp', 'humidity']).resample('H').mean().reset_index()

    df['location'] = file.name.split('_export')[0]
    df['location'] = df.location.apply(lambda x: 'Backyard' if x =='Outside' else x)
    return df

In [3]:
def parse_files(path='./data/'):
    files = Path('./data/').ls(file_exts='.csv')
    return pd.concat([parse(f) for f in files]).set_index('time')

In [4]:
parse_files()

Unnamed: 0_level_0,temp,humidity,location
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-08-27 00:00:00,69.26,60.9,Backyard
2021-08-27 01:00:00,69.26,61.4,Backyard
2021-08-27 02:00:00,69.44,60.8,Backyard
2021-08-27 03:00:00,69.08,60.1,Backyard
2021-08-27 04:00:00,69.26,60.0,Backyard
...,...,...,...
2021-09-07 11:00:00,64.22,70.4,Lower Southeast
2021-09-07 12:00:00,64.22,70.2,Lower Southeast
2021-09-07 13:00:00,64.22,70.4,Lower Southeast
2021-09-07 14:00:00,64.22,70.8,Lower Southeast


# Scrape Temp and Humidity Data

In [41]:
def subtract_day(strdate):
    "Subtract one day from strdate"
    return (pd.Timestamp(strdate) - pd.DateOffset(1)).strftime('%Y%m%d')

assert subtract_day('20210901') == '20210831'

In [42]:
def rmh(strdate):
    "remove hyphens from date"
    return pd.Timestamp(strdate).strftime('%Y%m%d')

assert rmh('2021-09-01') == '20210901'

In [43]:
def get_outside(api_key, startDate,endDate):
    startDate = subtract_day(startDate) # go back an additional date for a buffer
    url = f'https://api.weather.com/v1/location/KPDX:9:US/observations/historical.json?apiKey={api_key}&units=e&startDate={startDate}&endDate={endDate}'
    data = pd.DataFrame([{'time': x['valid_time_gmt'], 
                          'temp': x['temp'], 
                          'humidity': x['rh'],
                          'location': 'Outside'} for x in urljson(url)['observations']])
    data['time'] = data.time.apply(lambda x: pd.Timestamp(datetime.datetime.fromtimestamp(x)).round('60min'))
    data = data.set_index('time')
    return data

In [44]:
api_key='e1f10a1e78da46f5b10a1e78da96f525'
startDate='20210906'
endDate='20210907'

In [45]:
data = get_outside(api_key, startDate, endDate)
assert data.shape[1] == 3

In [46]:
data.loc['2021-09-07']

Unnamed: 0_level_0,temp,humidity,location
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-09-07 00:00:00,69,58,Outside
2021-09-07 01:00:00,66,63,Outside
2021-09-07 02:00:00,65,63,Outside
2021-09-07 03:00:00,63,67,Outside
2021-09-07 04:00:00,62,70,Outside
2021-09-07 05:00:00,61,72,Outside
2021-09-07 06:00:00,60,78,Outside
2021-09-07 07:00:00,59,78,Outside
2021-09-07 08:00:00,60,72,Outside
2021-09-07 09:00:00,62,67,Outside


# Combine the data

In [47]:
def get_data(api_key, startDate, endDate):
    sensors = parse_files(path='./data/')
    print('finished sensors')
    outside = get_outside(api_key, rmh(startDate), rmh(endDate))
    
    return pd.concat([sensors.loc[startDate:endDate], outside.loc[startDate:endDate]], axis=0)

In [48]:
api_key='e1f10a1e78da46f5b10a1e78da96f525'
startDate='20210901'
endDate='20210907'

In [57]:
df = get_data(api_key, startDate='2021-09-01', endDate='2021-09-08')

finished sensors


In [58]:
df.groupby('location').count()

Unnamed: 0_level_0,temp,humidity
location,Unnamed: 1_level_1,Unnamed: 2_level_1
Backyard,177,177
Lower Northeast,177,177
Lower Southeast,176,176
Lower Southwest,177,177
Outside,178,178
Upper East (Garage),176,176
Upper West (Bedroom),177,177


In [59]:
df

Unnamed: 0_level_0,temp,humidity,location
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-09-01 00:00:00,62.06,60.6,Lower Northeast
2021-09-01 01:00:00,61.88,60.4,Lower Northeast
2021-09-01 02:00:00,61.88,61.0,Lower Northeast
2021-09-01 03:00:00,61.88,61.5,Lower Northeast
2021-09-01 04:00:00,61.70,61.8,Lower Northeast
...,...,...,...
2021-09-08 04:00:00,70.00,53.0,Outside
2021-09-08 05:00:00,68.00,65.0,Outside
2021-09-08 06:00:00,67.00,66.0,Outside
2021-09-08 07:00:00,63.00,70.0,Outside


In [60]:
df.to_csv('combined.csv', index=True)

In [61]:
df = pd.merge(df, df[df['location'] == 'Outside'], on='time', suffixes=('', '_outside'))

In [62]:
df.groupby('location').count()

Unnamed: 0_level_0,temp,humidity,temp_outside,humidity_outside,location_outside
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Backyard,178,178,178,178,178
Lower Northeast,178,178,178,178,178
Lower Southeast,177,177,177,177,177
Lower Southwest,178,178,178,178,178
Outside,180,180,180,180,180
Upper East (Garage),177,177,177,177,177
Upper West (Bedroom),178,178,178,178,178


In [63]:
df.to_csv('combined.csv', index=True)

In [64]:
df[df.location == 'Outside'].loc['2021-09-07':]

Unnamed: 0_level_0,temp,humidity,location,temp_outside,humidity_outside,location_outside
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-07 00:00:00,69.0,58.0,Outside,69.0,58.0,Outside
2021-09-07 01:00:00,66.0,63.0,Outside,66.0,63.0,Outside
2021-09-07 02:00:00,65.0,63.0,Outside,65.0,63.0,Outside
2021-09-07 03:00:00,63.0,67.0,Outside,63.0,67.0,Outside
2021-09-07 04:00:00,62.0,70.0,Outside,62.0,70.0,Outside
2021-09-07 05:00:00,61.0,72.0,Outside,61.0,72.0,Outside
2021-09-07 06:00:00,60.0,78.0,Outside,60.0,78.0,Outside
2021-09-07 07:00:00,59.0,78.0,Outside,59.0,78.0,Outside
2021-09-07 08:00:00,60.0,72.0,Outside,60.0,72.0,Outside
2021-09-07 09:00:00,62.0,67.0,Outside,62.0,67.0,Outside
