## Transfer EPLus Medium Office Building to SEB state transition matrix

In [None]:
#cells will fill entire width of the browser
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))

#Tells Jupyter to reload custom classes from scratch everytime an import cell is run, if you edit a custom class
#between imports Jupyter would otherwise need to be restarted completely. Buyer beware: old class objects in the 
#current namespace will cause errors at execution
%load_ext autoreload
%autoreload 2

#switches matplotlib to show plots in the browser rather than opening a new window
%matplotlib inline

In [None]:
#always forget to do this for better looking plots
import seaborn
seaborn.set()

import matplotlib.pyplot as plt
import os
import numpy as np
import datetime
#from cvxpy import *
from statsmodels.tsa import stattools
from sklearn import preprocessing
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import random
import copy
import scipy
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim

#energyplus processing functions
from eplusprocessing import *
from buildsys_funcs import *

In [None]:
#SEB helper functions

def read_file_timestamp(strin):
    out = datetime.datetime.strptime(strin, '%Y-%m-%dT%H:%M:%S')
    return(out)

def file_name_from_timestamp(dtobj):
    out = datetime.datetime.strftime(dtobj, "%Y-%m-%d")
    return(out)

def read_volttron_file(fobj, col=1):
    p = []
    lines = fobj.readlines()
    lines.pop(0)
    for line in lines:
        p.append(float(line.strip().split(",")[col]))
    return(p)

def utc_offset(data_list):
    first = data_list[0:480]
    last = data_list[480:]
    return(last + first)

def to_celsius(fah):
    c = (fah - 32.0)/1.8
    return(c)

def norm_array(arr):
    return(preprocessing.minmax_scale(arr, axis=0))

def norm_array_custom(arr, minimum, maximum):
    arr_out = (arr - minimum)/(maximum - minimum)
    return(arr_out)

def circularize_normed_data(arr):
    #assume data normalized to 0-1 interval
    x = np.sin(2*np.pi*arr)
    y = np.cos(2*np.pi*arr)
    return(x, y)

filedates = []
for i in range(21,27): #day 27 is missing data
    filedates.append(file_name_from_timestamp(datetime.datetime(year=2018, month=6, day=i)))

In [None]:
moving_average_length = 60 #minutes
bins = 24 #1440/15

In [None]:
datadir = "/home/chase/projects/buildnn/data/PNNL/SEB/"
months = ["March", "April", "May", "June", "July", "August", "September", "October"]

powerdata = []
timedata = []
daydata = []
dtdata = []

for month in months:
    path = datadir + "/" + month + "/" + "ELECTRIC_METER/WholeBuildingDemand/"
    filedates = sorted(os.listdir(path))
    for f in filedates:
        with open(path + f, "r") as d:
            p = []
            t = []
            lines = d.readlines()
            lines.pop(0)
            for line in lines:
                tokens = line.strip().split(",")
                p.append(float(tokens[1]))
                t.append(read_file_timestamp(tokens[0]))
                
            #p = utc_offset(p)
            #t = utc_offset(t)
            
            #moving average according to global moving_average_length time in minutes
            pavg = []
            tavg = []
            wdavg = []
            
            for b in range(bins):
                pavg.append(np.nanmean(p[b*moving_average_length:(b+1)*moving_average_length]))
                try:
                    tavg.append(t[b*moving_average_length].hour*60 + t[b*moving_average_length].minute)
                    wdavg.append(t[b*moving_average_length].weekday())
                    dtdata.append(t[b*moving_average_length])
                except:
                    tavg.append(np.nan)
                    wdavg.append(np.nan)
                    dtdata.append(np.nan)
            powerdata += pavg
            timedata += tavg
            daydata += wdavg

powerdata = np.asarray(powerdata)

In [None]:
airunits = ["AHU1"]#, "AHU2", "AHU3", "AHU4"]
airvars = ["ExhaustAirFlow", "HotWaterFlowRate", "ExhaustFanPower", 
           "OutdoorAirTemperature", "SupplyFanPower", "MixedAirTemperature", "ReturnAirFlow", 
           "ReturnAirTemperature", "SupplyAirFlow", "SupplyFanPower", "ReturnFanPower", 
           "DischargeAirTemperature", "ChilledWaterValvePosition", "HotWaterValvePosition", 
           "HotWaterCoilTemperature", "HeatingPercent", "DuctStaticPressure", "OutdoorAirFlow",
           "HotWaterSupplyTemperature", "HotWaterReturnTemperature", "HotWaterValveTemperature",
           "HrWheelSpeed", "SupplyFanSpeedPercent"]
vavunits = ["VAV100"]#, "VAV102", "VAV118", "VAV119", "VAV120", "VAV121", "VAV129", "VAV131", "VAV133",
            #"VAV136", "VAV142", "VAV143", "VAV150"]
vavvars = ["ZoneCoolingTemperatureSetPoint", "ZoneTemperature", "ZoneHeatingTemperatureSetPoint", 
           "ZoneTemperatureSetPoint", "ZoneDischargeAirTemperature", "ZoneAirFlow"]

ahudata = {}
vavdata = {}

for a in airunits:
    ahudata[a] = {}
    for m in airvars:
        ahudata[a][m] = []
        
for v in vavunits:
    vavdata[v] = {}
    for m in vavvars:
        vavdata[v][m] = []

for month in months:
    for a in airunits:
        print(month, ": ", a)
        path = datadir + month + "/" + a
        msrmnts = os.listdir(path)
        for m in msrmnts:
            if m in airvars:
                filedates = sorted(os.listdir(path + "/" + m))
                for f in filedates:
                    with open(path + "/" + m + "/" + f, 'r') as d:
                        p = read_volttron_file(d) #utc_offset(read_volttron_file(d))
                        pavg = []
                        for b in range(bins):
                            pavg.append(np.nanmean(p[b*moving_average_length:(b+1)*moving_average_length]))
                        ahudata[a][m] += pavg
                        
            if m in vavunits:
                vav_true_vars = os.listdir(path + "/" + m)
                for v in vav_true_vars:
                    if v in vavvars:
                        filedates = sorted(os.listdir(path + "/" + m + "/" + v))
                        for f in filedates:
                            with open(path + "/" + m + "/" + v + "/" + f, 'r') as d:
                                p = read_volttron_file(d) #utc_offset(read_volttron_file(d))
                                pavg = []
                                for b in range(bins):
                                    pavg.append(np.nanmean(p[b*moving_average_length:(b+1)*moving_average_length]))
                                vavdata[m][v] += pavg

for m in ahudata[a]:
    ahudata[a][m] = np.asarray(ahudata[a][m])

for v in vavdata:
    for mv in vavdata[v]:
        vavdata[v][mv] = np.asarray(vavdata[v][mv])

In [None]:
for a in ahudata:
    for m in ahudata[a]:
        print(a, ", ", m, ", ", len(ahudata[a][m]))
        
for a in vavdata:
    for v in vavdata[a]:
        print(a, ", ", v, ", ", len(vavdata[a][v]))