In [9]:
from datetime import datetime, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [10]:
import sys, os
parent_dir = os.getcwd()
path = os.path.dirname(parent_dir)
sys.path.append(path)

from models.time_utilities import polar_time, encode_month, encode_weekend
from models.region_utilities import encode_region

In [155]:
def get_month(dt):
    
    m_sin = np.sin(2 * np.pi * dt.month / 12)
    m_cos = np.cos(2 * np.pi * dt.month / 12)

    return m_cos, m_sin

def polar_time(dt, interval=5):
    """
    Takes a timestamp as input and returns the time in time
    """

    current_m = dt.minute
    # Get utc offset in seconds
    utc_offset = dt.utcoffset()
    if utc_offset == 10.5 * 60 * 60 or utc_offset == 11 * 60 * 60:
        if dt.hour == 0:
            current_hour = 23
        else:
            current_h = dt.hour - 1
    else:
        current_h = dt.hour

    n_intervals = 24 * 60 / interval

    g_min = int((current_h * 60 / interval) + (current_m // interval))
    t_sin = np.sin(2 * np.pi * g_min / n_intervals)
    t_cos = np.cos(2 * np.pi * g_min / n_intervals)
    
    return t_cos, t_sin

    import numpy as np

def encode_region(region):

    if region == 'NSW':
        return np.array([0,0,0])
    elif region == ['VIC']:
        return np.array([1,0,0])
    elif region == 'QLD':
        return np.array([0,1,0])
    elif region == 'SA':
        return np.array([0,0,1])
    else:
        raise Exception(f'Region {region} not recognised!')


In [156]:
raw_df = pd.read_parquet('full_ww_set_2.parquet')
metadata = pd.read_csv('device_metadata.csv')

In [157]:
#Create Datetime index
raw_df.loc[:,"Datetime"]=raw_df.Timestamp.apply(lambda x: datetime.fromtimestamp(x))
raw_df = raw_df.set_index("Datetime")

In [158]:
device_list = raw_df.DeviceID.unique().tolist()
test_device = device_list[0]
test_region = metadata.loc[metadata['DeviceID']==test_device, "State"].tolist()[0]
# [Timezone name, standard time UTC offset in hours.]
timezone_dict = {
    "VIC": ['Australia/Melbourne', 10],
    'NSW': ['Australia/Sydney', 10],
    'QLD': ['Australia/Brisbane', 10],
    'SA': ['Australia/Adelaide', 9.5]
}

test_timezone = timezone_dict[f'{test_region}'][0]

print(
    f"Device {test_device}, Region: {test_region}, Timezone: {test_timezone}"
)

Device DD04108401266, Region: VIC, Timezone: Australia/Melbourne


In [169]:
# Filter df
single_df = raw_df[raw_df['DeviceID']==test_device].copy()
# View duplicates single_df[pd.Index.duplicated(single_df.index)]

# Infer daylist savings time offset for that timezone. 
tz_index = single_df.index.tz_localize(test_timezone, ambiguous='infer')

# Set adjusted timezone to index
tz_df = single_df.set_index(tz_index).copy()

# Resample to 5 minute intervals, forward then backfill, reset index. 
resampled_df = tz_df.resample('5T', convention='start').ffill().bfill().reset_index()

# Create weekdays 
resampled_df.loc[:, 'weekday'] = resampled_df.Datetime.apply(lambda x: 0 if x.weekday() < 5 else 1)

# Convert Time and Month
resampled_df.loc[:, 'month_x'] = resampled_df.Datetime.apply(lambda x: get_month(x)[0])
resampled_df.loc[:, 'month_y'] = resampled_df.Datetime.apply(lambda x: get_month(x)[1])
resampled_df.loc[:, 'time_x'] = resampled_df.Datetime.apply(lambda x: polar_time(x)[0])
resampled_df.loc[:, 'time_y'] = resampled_df.Datetime.apply(lambda x: polar_time(x)[1])

# Add in region -> for ToU, the 'region' column is fine. 
# For the reinforcement learning stuff, we need to one-hot-encode the region.
resampled_df.loc[:, 'region'] = test_region
resampled_df.loc[:, 'region_1'] = resampled_df.Datetime.apply(lambda x: encode_region(x)[0])
resampled_df.loc[:, 'region_2'] = resampled_df.Datetime.apply(lambda x: encode_region(x)[1])
resampled_df.loc[:, 'region_3'] = resampled_df.Datetime.apply(lambda x: encode_region(x)[2])

## Then we make all negative values in the Home Consumption and Solar Consumption channels = 0
resampled_df.loc[resampled_df['Home Consumption'] < 0, 'Home Consumption'] = 0
resampled_df.loc[resampled_df['Solar Consumption'] < 0, 'Solar Consumption'] = 0