In [3]:
import os
import glob
import matplotlib.pyplot as plt
import datetime as dt
import pandas as pd
import numpy as np

In [4]:
def retrieve(start, end, data):
    
    """
    This is a function used to retrieve timeseries data within specific time duration.
    
    Arg:
        start: the start time of the dataset (format: %Y-%m-%d %H:%M:%S)
        end: the end time of the dataset (format: %Y-%m-%d %H:%M:%S)
        data: the name of the dataset/file (needs to be either a dataframe or filename string)
        
    Return:
        data_df: a dataframe that shows the within the defined time range
    """
    import os
    import datetime as dt
    from datetime import datetime
    import pandas as pd
    from zoneinfo import ZoneInfo
    
    if isinstance(data, str):
        path = 'clean_data'
        path = os.path.join(path, data)
        data = pd.read_csv(path)
    elif isinstance(data, pd.DataFrame):
        data = data
    else:
        raise TypeError("input must be a string or a dataframe")
    
    timezone = ZoneInfo('America/Los_Angeles')
    data['date'] = pd.to_datetime(data['date'], utc=True).dt.tz_convert('America/Los_Angeles')
    start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone)
    end = datetime.strptime(end, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone)
    after_start_date = data["date"] >= start
    before_end_date = data["date"] <= end
    between_two_dates = after_start_date & before_end_date
    
    # Using pandas.DataFrame.loc to Filter Rows by Dates
    data_df = data.loc[between_two_dates]
    return data_df

In [5]:
retrieve('2018-06-01 00:00:00', '2018-07-01 00:00:00', 'site_weather.csv').head(60)

Unnamed: 0,date,air_temp_set_1,air_temp_set_2,dew_point_temperature_set_1d,relative_humidity_set_1,solar_radiation_set_1
14524,2018-06-01 00:00:00-07:00,10.89,11.05,6.97,77.0,0.03
14525,2018-06-01 00:15:00-07:00,10.88,10.98,6.83,76.3,0.02
14526,2018-06-01 00:30:00-07:00,10.77,10.84,6.84,76.88,0.03
14527,2018-06-01 00:45:00-07:00,10.77,10.74,6.82,76.82,0.06
14528,2018-06-01 01:00:00-07:00,10.72,10.76,7.3,79.6,0.03
14529,2018-06-01 01:15:00-07:00,10.57,10.55,7.59,82.0,0.04
14530,2018-06-01 01:30:00-07:00,10.44,10.46,7.59,82.7,0.03
14531,2018-06-01 01:45:00-07:00,10.36,10.34,7.46,82.4,0.05
14532,2018-06-01 02:00:00-07:00,10.36,10.27,7.37,81.9,0.02
14533,2018-06-01 02:15:00-07:00,10.21,10.27,7.28,82.2,0.02


In [6]:
print(retrieve.__doc__)


    This is a function used to retrieve timeseries data within specific time duration.
    
    Arg:
        start: the start time of the dataset (format: %Y-%m-%d %H:%M:%S)
        end: the end time of the dataset (format: %Y-%m-%d %H:%M:%S)
        data: the name of the dataset/file (needs to be either a dataframe or filename string)
        
    Return:
        data_df: a dataframe that shows the within the defined time range
    


In [13]:
def hourly(start, end, data):
    
    """
    This is a function defined to derive hourly summary of the dataset.
    
    Arg:
        start: the start time of the dataset (format: %Y-%m-%d %H:%M:%S)
        end: the end time of the dataset (format: %Y-%m-%d %H:%M:%S)
        data: the name of the dataset
        
    Return:
        data_df: a dataframe that shows the hourly summary of the input dataset

    """
    
    import datetime as dt
    from datetime import datetime
    import numpy as np
    from zoneinfo import ZoneInfo
    
    timezone = ZoneInfo('America/Los_Angeles')
    data['date'] = pd.to_datetime(data['date'])
    interval = dt.timedelta(hours=1)
    hourly_df = pd.DataFrame(columns=[data.columns])
    value_df = pd.DataFrame()
    time_df = pd.DataFrame()
    data_temp = pd.Series()
    while start < end:
        end_next = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone) + interval
        end_next = datetime.strftime(end_next, "%Y-%m-%d %H:%M:%S")
        value = retrieve(start, end_next, data).mean(numeric_only=True)
        date = pd.Series({'date': start}, dtype='datetime64[ns]')
        value_df = pd.concat([value_df, pd.DataFrame(value).transpose()], axis = 0)
        time_df = pd.concat([time_df, pd.DataFrame(date).transpose()], axis = 0)
        start = end_next

    hourly_df = pd.concat([time_df, value_df], axis = 1).reset_index(drop=True)
    
    return hourly_df

In [14]:
data = retrieve('2018-05-01 00:00:00', '2018-07-01 00:00:00', 'rtu_oa_damper.csv')

In [9]:
hourly('2018-05-01 00:00:00', '2018-06-01 00:00:00', data)

  data_temp = pd.Series()


In [None]:
print(hourly.__doc__)

In [10]:
def daily(start, end, data):
    
    """
    This is a function defined to derive daily summary of the dataset.
    
    Arg:
        start: the start time of the dataset (format: %Y-%m-%d %H:%M:%S)
        end: the end time of the dataset (format: %Y-%m-%d %H:%M:%S)
        data: the name of the dataset
        
    Return:
        data_df: a dataframe that shows the daily summary of the input dataset

    """
    
    import datetime as dt
    from datetime import datetime
    import numpy as np
    from zoneinfo import ZoneInfo
    
    timezone = ZoneInfo('America/Los_Angeles')
    data['date'] = pd.to_datetime(data['date'])
    interval = dt.timedelta(days=1)
    daily_df = pd.DataFrame(columns=[data.columns])
    value_df = pd.DataFrame()
    time_df = pd.DataFrame()
    data_temp = pd.Series()
    while start < end:
        end_next = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone) + interval
        end_next = datetime.strftime(end_next, "%Y-%m-%d %H:%M:%S")
        value = retrieve(start, end_next, data).mean(numeric_only=True)
        date = pd.Series({'date': start}, dtype='datetime64[ns]')
        value_df = pd.concat([value_df, pd.DataFrame(value).transpose()], axis = 0)
        time_df = pd.concat([time_df, pd.DataFrame(date).transpose()], axis = 0)
        start = end_next

    daily_df = pd.concat([time_df, value_df], axis = 1).reset_index(drop=True)
    
    return daily_df

In [12]:
daily('2018-05-01 00:00:00', '2018-06-01 00:00:00', data).head(60)

  data_temp = pd.Series()


Unnamed: 0,date,rtu_001_oadmpr_pct,rtu_002_oadmpr_pct,rtu_003_oadmpr_pct,rtu_004_oadmpr_pct
0,2018-05-01,38.592228,43.606662,51.128661,40.337405
1,2018-05-02,39.699237,42.229285,49.639695,38.438862
2,2018-05-03,38.762974,40.449872,53.084594,36.619996
3,2018-05-04,38.120309,49.877468,50.955767,40.677146
4,2018-05-05,38.246118,50.013352,51.018428,40.771074
5,2018-05-06,38.021184,49.878611,50.923979,40.687743
6,2018-05-07,38.160675,49.920128,50.900405,40.703092
7,2018-05-08,39.836738,53.969545,61.731776,48.826027
8,2018-05-09,43.393695,57.490108,69.251311,53.200644
9,2018-05-10,48.461308,60.762227,61.232536,47.280378


In [None]:
print(daily.__doc__)

In [None]:
def 