#Profiling Pecan Consumers

This notebook profiles the consumers in the Pecan St dataset who have HVAC using a thermal regimes model using whole-home consumption and temperature

##Initializations

In [1]:
rm(list = ls())
options(error = recover)
library('segmented')
library('lubridate')
library('zoo')


Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric



In [2]:
setwd("~/EnergyAnalytics/batch/pecan/")
source('define_categories_pecan.r')

setwd("~/EnergyAnalytics/utils/")
source('select_data.r')

setwd('~/EnergyAnalytics/thermal_profiles/profiler/')
source('stateProcessorWrapper.r')
source('stateVisualizerWrapper.r')

setwd("~/EnergyAnalytics/batch/pecan/")

Loading required package: R.oo
Loading required package: R.methodsS3
R.methodsS3 v1.7.0 (2015-02-19) successfully loaded. See ?R.methodsS3 for help.
R.oo v1.19.0 (2015-02-27) successfully loaded. See ?R.oo for help.

Attaching package: ‘R.oo’

The following objects are masked from ‘package:methods’:

    getClasses, getMethods

The following objects are masked from ‘package:base’:

    attach, detach, gc, load, save

R.utils v2.0.0 (2015-02-28) successfully loaded. See ?R.utils for help.

Attaching package: ‘R.utils’

The following object is masked from ‘package:utils’:

    timestamp

The following objects are masked from ‘package:base’:


In removeClass("DataFormatter"): class definition for “DataFormatter” not found (no action taken)Loading required package: lmtest

Attaching package: ‘lmtest’

The following object is masked from ‘package:R.utils’:

    reset

Loading required package: nnet
Loading required package: MASS
Loading required package: Rsolnp
Loading required package: tru

In [3]:
DATA_PATH = '~/S3L_server/energy-data/pecan_street/usage-select/'
DUMP_PATH = '~/S3L_server/energy-data/pecan_street/models_2/'
PLOT_PATH = '~/S3L_server/plots/pecan-street-2/'

In [4]:
# load user names
user_names = read.csv('~/S3L_server/energy-data/pecan_street/metadata/user_names_ids.csv')
user_names$X = NULL

# list already processed files
files.input = list.files(path=DUMP_PATH, pattern = '*_decoded*', full.names = T, recursive = T)
already_done  = lapply(files.input, function(x) {
    tmp = tail(strsplit(x, '/')[[1]],n=2)
    res = tmp[1]
    nfo = strsplit(tmp[2], "_")[[1]]
    uid = nfo[1]; nfo = nfo[2]; 
    return(c(uid,res))
})
already_done = data.frame(do.call('rbind', already_done))
if (length(already_done)>0) names(already_done) <- c("ID", "grain")

# list all data files
files    = list.files(path=DATA_PATH, full.names = T, recursive = T)
# files_01 = files[grep('01min',files)]
# files_15 = files[grep('15min',files)]
files_60 = files[grep('60min',files)]

# extract ID
users_df = data.frame(UID = as.character(sapply(files_60, function(s) strsplit(tail(strsplit(s, '/')[[1]], 1), '\\.')[[1]][1])))
rownames(users_df) = NULL
# users_df['file_01min'] = files_01
# users_df['15min'] = files_15
users_df['60min'] = files_60
    
# build data sources dataframe
users_df = merge(user_names, users_df, by.x="ID", by.y="UID")
users_df = melt(users_df, id=c("ID","name"))
names(users_df)[c(3,4)] = c("grain", "file_orig")  
    
# filter out those IDS already done
users_df = subset(users_df, !((ID %in% already_done$ID & grain %in% already_done$grain)))
print(paste("To process:", dim(users_df)[1], "files"))

[1] "To process: 215 files"


In [5]:
head(users_df)

Unnamed: 0,ID,name,grain,file_orig
1,22,Christian,60min,/Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/22.csv
49,1479,Abe,60min,/Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/1479.csv
62,1782,David,60min,/Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/1782.csv
64,1801,Richard,60min,/Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/1801.csv
80,2242,Alfred,60min,/Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/2242.csv
82,2335,Sam,60min,/Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/2335.csv


##Learn thermal regimes models

In [6]:
# function to format data to work with the R thermal profiler code
format_data = function(homeData) {

    # interpolate small gaps
    cat(paste("Initially nrows=",nrow(homeData), '\n'))
    homeData$total = na.spline(homeData$total, maxgap = 12, na.rm=FALSE)
    
    # remove observations with NAs in response
    idx.na = is.na(homeData$total)
    if (sum(idx.na)>0) homeData = homeData[!idx.na,]
    cat(paste("\tRemoved NAs: nrows=",nrow(homeData),"\n"))

    # temperature above reference
    homeData$TemperatureD = homeData$Temperature - 65

    # format data as expected by the HMM package
    cur_data = subset(homeData, select = c('date', 'total'))
    names(cur_data)[2] = 'obs'
    cur_data$date = as.character(cur_data$date)
    cur_covar = subset(homeData, select = c('date', 'Temperature', 'TemperatureD'))
    cur_covar$date = as.character(cur_covar$date)
    cur_month     = month(cur_data$date)
    cur_covar$TemperatureDWinter = cur_covar$TemperatureD * (cur_month %in% c(0,1,2,3,10,11,12))

    return(list(cur_data, cur_covar))
}


In [7]:
apply_thermal_model = function(cur_data, cur_covar, userName, 
                               dump_path = NULL, 
                               plot_path = NULL) {
  
    # define model learning controls
    controls = list(
        Kmin = 2, Kmax = 6, 
        maxit = 50, 
        nRestarts = 5, 
        tol = 1e-5,
        thresh.R2 = 0.75, 
        thresh.MAPE = 0.25,
        test.periods = 12,
        vis.interval = 3 * 24
    )

    # generate visualization interval; make sure there's data in there
    # TODO: there was an error generated here (indices for subsetting were messed up)
    ok = FALSE
    no.secs    = controls$vis.interval * 3600
    while (!ok) {
        idx_start  = 1
        idx_end    = max(nrow(cur_data)-controls$vis.interval-1, 1)
        start_date = sample(cur_data$date[idx_start:idx_end], 1)
        stop_date  = as.character(as.POSIXct(start_date) + no.secs)
        dat        = subset(cur_data, date >= start_date & date < stop_date)
        if (nrow(na.omit(dat)) > 0) 
          ok = TRUE
    }        

    # learn model
    res = try(stateProcessorWrapper(cur_data, 
                                    cur_covar, 
                                    userName, 
                                    controls = controls,
                                    train.frac = 1.0, 
                                    verbose = F, 
                                    tran.vars  = c('(Intercept)', 'Temperature'),
                                    resp.vars = c('(Intercept)', 
                                                  'TemperatureD'), 
                                                  #'TemperatureDWinter'),
                                    dump_path = dump_path))
    if (class(res) == 'try-error') {
        cat(paste('Error in learning model for', userName,'! \n'))
        return(NULL)
    }

    # produce visualizations
    if (!is.null(plot_path)) {
        resv = stateVisualizerWrapper(res$decoder, 
                                   res$interpreter, 
                                   plots_path = plot_path, 
                                   interval = c(start_date, stop_date))
        if (class(resv) == 'try-error') {
            cat('Error in visualizing current user!\n')
        }
    }
    return(NULL)
}


In [8]:
setwd("~/EnergyAnalytics/batch/pecan/")
source('define_categories_pecan.r')

setwd("~/EnergyAnalytics/utils/")
source('select_data.r')
    
setwd('~/EnergyAnalytics/thermal_profiles/profiler/')
source('stateProcessorWrapper.r')
source('stateVisualizerWrapper.r')

setwd("~/EnergyAnalytics/batch/pecan/")

In [9]:
which(users_df$name=='Lucius' & users_df$grain=='60min')

In [None]:
users_df = users_df[with(users_df, order(ID,grain,name)),]
for (i in 157:nrow(users_df)) {
    cat(paste("*****", i, ":", users_df[i,c("name", "ID")], "*****"))
    
    # load data   
    user_id = paste(users_df[i,"ID" ], users_df[i,"name"], sep="_")
    cat(paste('Processing file', users_df[i,'file_orig'], ':', i, '/', nrow(users_df), '\n'))  
    homeData = read.csv(users_df[i,"file_orig"])     
    
    # is there enough data?
    if (is.null(homeData))  {
        cat('Too little data!\n')
        return(NULL)
    }  
    
    # create directory to store models
    dump_path = file.path(DUMP_PATH, paste(user_id, users_df[i,"grain"], sep='/')); 
    dir.create(dump_path, recursive = T)
    # create directory to store plots
    plot_path = file.path(PLOT_PATH, paste(user_id, users_df[i,"name"], sep='/')); 
    dir.create(plot_path, recursive = T)

    # format dataset
    res = format_data(homeData); cur_data = res[[1]]; cur_covar = res[[2]];
    
    # apply model to data
    res = apply_thermal_model(cur_data, cur_covar, user_id, 
                            dump_path = dump_path, 
                            plot_path = plot_path)  
}

***** 143 : 359 ***** ***** 143 : 8197 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8197.csv : 143 / 215 
Initially nrows= 14938 
	Removed NAs: nrows= 14924 
*** Initializing DataFormatter (8197_Manuel) ***
   user  system elapsed 
  0.001   0.000   0.001 
*** Adding covariates data for DataFormatter8197_Manuel ***
   user  system elapsed 
  0.006   0.000   0.006 
*** Initializing StateDecoder (8197_Manuel) ***
   user  system elapsed 
  0.005   0.000   0.005 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -14125.25 
iteration 5 logLik: -12657.23 
iteration 10 logLik: -11875.65 
iteration 15 logLik: -11831.81 
converged at iteration 20 with logLik: -11829.6 
[1] "MAPE=0.22248731485775; R2=0.753361060641082"
iteration 0 logLik: -28430.17 
iteration 5 logLik: -25156.83 
iteration 10 logLik: -22500.55 
iteration 15 logLik: -22408.91 
converged at iteration 18 with logLik: -22408.03 
Model size= 2 
*** Initializing Interprete

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [14924] is not a sub-multiple or multiple of the number of rows [622]

      obs             State        Index                 Time   
 Min.   :1.238   C.Lo (2):53   Min.   : 1   Fri,08/02 00:00: 1  
 1st Qu.:3.456   N (1)   :20   1st Qu.:19   Fri,08/02 01:00: 1  
 Median :5.627                 Median :37   Fri,08/02 02:00: 1  
 Mean   :5.321                 Mean   :37   Fri,08/02 03:00: 1  
 3rd Qu.:7.183                 3rd Qu.:55   Fri,08/02 04:00: 1  
 Max.   :9.480                 Max.   :73   Fri,08/02 05:00: 1  
                                            (Other)        :67  
  TemperatureD    
 Min.   :0.01292  
 1st Qu.:0.01292  
 Median :0.05900  
 Mean   :0.04637  
 3rd Qu.:0.05900  
 Max.   :0.05900  
                  


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 144 : 218 ***** ***** 144 : 8201 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8201.csv : 144 / 215 
Initially nrows= 5729 
	Removed NAs: nrows= 5729 
*** Initializing DataFormatter (8201_Giles) ***
   user  system elapsed 
  0.001   0.000   0.001 
*** Adding covariates data for DataFormatter8201_Giles ***
   user  system elapsed 
  0.002   0.000   0.002 
*** Initializing StateDecoder (8201_Giles) ***
   user  system elapsed 
  0.005   0.002   0.007 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -3774.394 
iteration 5 logLik: -3480.598 
iteration 10 logLik: -2816.638 
iteration 15 logLik: -2806.549 
converged at iteration 17 with logLik: -2806.502 
[1] "MAPE=0.372031436494689; R2=0.776915222472426"
iteration 0 logLik: -7744.453 
iteration 5 logLik: -7467.488 
iteration 10 logLik: -5277.226 
iteration 15 logLik: -5107.59 
iteration 20 logLik: -5105.113 
converged at iteration 25 with logLik: -5104.641 
Model size= 2 

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [5729] is not a sub-multiple or multiple of the number of rows [239]

      obs           State        Index                 Time   
 Min.   :0.2035   N (1):15   Min.   : 1   Fri,01/17 00:00: 1  
 1st Qu.:0.2383   N (2):58   1st Qu.:19   Fri,01/17 01:00: 1  
 Median :0.2900              Median :37   Fri,01/17 02:00: 1  
 Mean   :0.8340              Mean   :37   Fri,01/17 03:00: 1  
 3rd Qu.:1.0364              3rd Qu.:55   Fri,01/17 04:00: 1  
 Max.   :3.4334              Max.   :73   Fri,01/17 05:00: 1  
                                          (Other)        :67  
  TemperatureD      
 Min.   :-0.016054  
 1st Qu.:-0.003328  
 Median :-0.003328  
 Mean   :-0.005943  
 3rd Qu.:-0.003328  
 Max.   :-0.003328  
                    


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 145 : 237 ***** ***** 145 : 8218 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8218.csv : 145 / 215 
Initially nrows= 5255 
	Removed NAs: nrows= 5255 
*** Initializing DataFormatter (8218_Harris) ***
   user  system elapsed 
      0       0       0 
*** Adding covariates data for DataFormatter8218_Harris ***
   user  system elapsed 
  0.002   0.000   0.002 
*** Initializing StateDecoder (8218_Harris) ***
   user  system elapsed 
  0.001   0.000   0.002 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -1114.836 
iteration 5 logLik: 503.8803 
iteration 10 logLik: 523.6747 
converged at iteration 15 with logLik: 523.8769 
[1] "MAPE=0.414378601446393; R2=0.652214744624282"
***** HMM Cross-Validation K = 3 ******
iteration 0 logLik: -1434.835 
iteration 5 logLik: 539.1714 
iteration 10 logLik: 599.9464 
iteration 15 logLik: 708.9061 
iteration 20 logLik: 769.6177 
iteration 25 logLik: 786.598 
iteration 30 logLik: 800.0199

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [5255] is not a sub-multiple or multiple of the number of rows [219]

      obs               State        Index                 Time   
 Min.   :0.06607   C.Lo (3):15   Min.   : 1   Mon,09/23 09:00: 1  
 1st Qu.:0.14232   N (1)   :14   1st Qu.:19   Mon,09/23 10:00: 1  
 Median :0.24937   N (2)   :34   Median :37   Mon,09/23 11:00: 1  
 Mean   :0.37925   N (4)   :10   Mean   :37   Mon,09/23 12:00: 1  
 3rd Qu.:0.36733                 3rd Qu.:55   Mon,09/23 13:00: 1  
 Max.   :2.42460                 Max.   :73   Mon,09/23 14:00: 1  
                                              (Other)        :67  
  TemperatureD       
 Min.   :-0.0029860  
 1st Qu.:-0.0024969  
 Median :-0.0024969  
 Mean   : 0.0103049  
 3rd Qu.:-0.0007477  
 Max.   : 0.0584986  
                     


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 146 : 446 ***** ***** 146 : 8236 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8236.csv : 146 / 215 
Initially nrows= 14920 
	Removed NAs: nrows= 14878 
*** Initializing DataFormatter (8236_Phillip) ***
   user  system elapsed 
  0.001   0.000   0.001 
*** Adding covariates data for DataFormatter8236_Phillip ***
   user  system elapsed 
  0.012   0.001   0.013 
*** Initializing StateDecoder (8236_Phillip) ***
   user  system elapsed 
  0.012   0.000   0.014 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -8113.613 
iteration 5 logLik: -5352.282 
iteration 10 logLik: -4914.091 
iteration 15 logLik: -4898.269 
iteration 20 logLik: -4894.481 
converged at iteration 25 with logLik: -4893.8 
[1] "MAPE=0.405661331097677; R2=0.733717872323788"
***** HMM Cross-Validation K = 3 ******
iteration 0 logLik: -8103.321 
iteration 5 logLik: -5221.127 
iteration 10 logLik: -4342.564 
iteration 15 logLik: -4252.909 
iteration 20 logL

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [14878] is not a sub-multiple or multiple of the number of rows [620]

      obs              State        Index                 Time   
 Min.   :0.1666   C.Lo (3):14   Min.   : 1   Mon,12/10 00:00: 1  
 1st Qu.:0.2260   N (1)   :38   1st Qu.:19   Mon,12/10 01:00: 1  
 Median :0.4946   N (2)   :21   Median :37   Mon,12/10 02:00: 1  
 Mean   :0.6096                 Mean   :37   Mon,12/10 03:00: 1  
 3rd Qu.:0.7369                 3rd Qu.:55   Mon,12/10 04:00: 1  
 Max.   :2.6839                 Max.   :73   Mon,12/10 05:00: 1  
                                             (Other)        :67  
  TemperatureD      
 Min.   :0.0008089  
 1st Qu.:0.0008089  
 Median :0.0039687  
 Mean   :0.0084029  
 3rd Qu.:0.0039687  
 Max.   :0.0318295  
                    


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 147 : 267 ***** ***** 147 : 8282 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8282.csv : 147 / 215 
Initially nrows= 4823 
	Removed NAs: nrows= 4823 
*** Initializing DataFormatter (8282_Isom) ***
   user  system elapsed 
      0       0       0 
*** Adding covariates data for DataFormatter8282_Isom ***
   user  system elapsed 
  0.001   0.000   0.001 
*** Initializing StateDecoder (8282_Isom) ***
   user  system elapsed 
  0.002   0.001   0.002 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -5137.983 
iteration 5 logLik: -4792.014 
iteration 10 logLik: -4522.189 
iteration 15 logLik: -4445.254 
iteration 20 logLik: -4442.705 
converged at iteration 22 with logLik: -4442.619 
[1] "MAPE=0.405194490806403; R2=0.556034888550536"
***** HMM Cross-Validation K = 3 ******
iteration 0 logLik: -5166.449 
iteration 5 logLik: -4795.056 
iteration 10 logLik: -4523.346 
iteration 15 logLik: -4207.6 
iteration 20 logLik: -4158.4

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [4823] is not a sub-multiple or multiple of the number of rows [201]

      obs               State        Index                 Time   
 Min.   : 0.8885   C.Lo (2):51   Min.   : 1   Mon,12/23 00:00: 1  
 1st Qu.: 2.1755   N (1)   :22   1st Qu.:19   Mon,12/23 01:00: 1  
 Median : 3.3645                 Median :37   Mon,12/23 02:00: 1  
 Mean   : 3.8963                 Mean   :37   Mon,12/23 03:00: 1  
 3rd Qu.: 5.2859                 3rd Qu.:55   Mon,12/23 04:00: 1  
 Max.   :10.9464                 Max.   :73   Mon,12/23 05:00: 1  
                                              (Other)        :67  
  TemperatureD    
 Min.   :0.02094  
 1st Qu.:0.02094  
 Median :0.04468  
 Mean   :0.03753  
 3rd Qu.:0.04468  
 Max.   :0.04468  
                  


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 148 : 288 ***** ***** 148 : 8292 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8292.csv : 148 / 215 
Initially nrows= 9791 
	Removed NAs: nrows= 9791 
*** Initializing DataFormatter (8292_Jesus) ***
   user  system elapsed 
  0.001   0.000   0.001 
*** Adding covariates data for DataFormatter8292_Jesus ***
   user  system elapsed 
  0.002   0.000   0.003 
*** Initializing StateDecoder (8292_Jesus) ***
   user  system elapsed 
  0.002   0.000   0.003 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -5086.414 
iteration 5 logLik: -3171.66 
iteration 10 logLik: -2812.683 
iteration 15 logLik: -2775.34 
iteration 20 logLik: -2771.328 
iteration 25 logLik: -2770.787 
converged at iteration 27 with logLik: -2770.741 
[1] "MAPE=0.343963976469761; R2=0.638658619342697"
***** HMM Cross-Validation K = 3 ******
iteration 0 logLik: -5067.623 
iteration 5 logLik: -2977.77 
iteration 10 logLik: -2224.083 
iteration 15 logLik: -2126

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [9791] is not a sub-multiple or multiple of the number of rows [408]

      obs           State        Index                 Time   
 Min.   :0.2281   N (1): 2   Min.   : 1   Fri,05/03 04:00: 1  
 1st Qu.:0.2890   N (2):46   1st Qu.:19   Fri,05/03 05:00: 1  
 Median :0.4036   N (3):25   Median :37   Fri,05/03 06:00: 1  
 Mean   :0.4867              Mean   :37   Fri,05/03 07:00: 1  
 3rd Qu.:0.6067              3rd Qu.:55   Fri,05/03 08:00: 1  
 Max.   :2.6717              Max.   :73   Fri,05/03 09:00: 1  
                                          (Other)        :67  
  TemperatureD      
 Min.   :0.0002756  
 1st Qu.:0.0002756  
 Median :0.0002756  
 Mean   :0.0024207  
 3rd Qu.:0.0047255  
 Max.   :0.0229484  
                    


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 149 : 294 ***** ***** 149 : 8342 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8342.csv : 149 / 215 
Initially nrows= 3839 
	Removed NAs: nrows= 3839 
*** Initializing DataFormatter (8342_Johnie) ***
   user  system elapsed 
      0       0       0 
*** Adding covariates data for DataFormatter8342_Johnie ***
   user  system elapsed 
  0.001   0.000   0.001 
*** Initializing StateDecoder (8342_Johnie) ***
   user  system elapsed 
  0.001   0.000   0.001 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -470.4214 
iteration 5 logLik: 820.2674 
iteration 10 logLik: 848.2425 
iteration 15 logLik: 851.6893 
iteration 20 logLik: 852.0197 
converged at iteration 23 with logLik: 852.0476 
[1] "MAPE=0.287458480899286; R2=0.507261700294015"
***** HMM Cross-Validation K = 3 ******
iteration 0 logLik: -119.0904 
iteration 5 logLik: 1074.88 
iteration 10 logLik: 1156.625 
iteration 15 logLik: 1166.665 
iteration 20 logLik: 1171.772

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [3839] is not a sub-multiple or multiple of the number of rows [160]

      obs            State        Index                 Time   
 Min.   :0.03876   N (1):21   Min.   : 1   Fri,11/15 00:00: 1  
 1st Qu.:0.14710   N (2):41   1st Qu.:19   Fri,11/15 01:00: 1  
 Median :0.18923   N (3):11   Median :37   Fri,11/15 02:00: 1  
 Mean   :0.27665              Mean   :37   Fri,11/15 03:00: 1  
 3rd Qu.:0.31143              3rd Qu.:55   Fri,11/15 04:00: 1  
 Max.   :1.09538              Max.   :73   Fri,11/15 05:00: 1  
                                           (Other)        :67  
  TemperatureD       
 Min.   :-5.971e-03  
 1st Qu.:-1.382e-04  
 Median :-1.382e-04  
 Mean   :-9.657e-04  
 3rd Qu.: 4.033e-05  
 Max.   : 4.033e-05  
                     


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 150 : 177 ***** ***** 150 : 8419 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8419.csv : 150 / 215 
Initially nrows= 14176 
	Removed NAs: nrows= 10913 
*** Initializing DataFormatter (8419_Emmett) ***
   user  system elapsed 
  0.001   0.001   0.000 
*** Adding covariates data for DataFormatter8419_Emmett ***
   user  system elapsed 
  0.013   0.004   0.018 
*** Initializing StateDecoder (8419_Emmett) ***
   user  system elapsed 
  0.003   0.000   0.003 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -8426.565 
iteration 5 logLik: -7372.224 
iteration 10 logLik: -7052.554 
iteration 15 logLik: -6949.881 
iteration 20 logLik: -6918.683 
iteration 25 logLik: -6907.319 
iteration 30 logLik: -6901.195 
iteration 35 logLik: -6897.762 
iteration 40 logLik: -6895.508 
iteration 45 logLik: -6894.158 
iteration 50 logLik: -6893.293 
[1] "MAPE=0.418792675518234; R2=0.388980596659767"
***** HMM Cross-Validation K = 3 ******
it

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [10913] is not a sub-multiple or multiple of the number of rows [455]

      obs              State        Index                 Time   
 Min.   :0.7364   C.Lo (4):34   Min.   : 1   Mon,09/02 00:00: 1  
 1st Qu.:1.2295   N (1)   :30   1st Qu.:19   Mon,09/02 01:00: 1  
 Median :1.7825   N (2)   : 9   Median :37   Mon,09/02 02:00: 1  
 Mean   :2.5382                 Mean   :37   Mon,09/02 03:00: 1  
 3rd Qu.:4.1219                 3rd Qu.:55   Mon,09/02 04:00: 1  
 Max.   :5.9553                 Max.   :73   Mon,09/02 05:00: 1  
                                             (Other)        :67  
  TemperatureD     
 Min.   :0.002225  
 1st Qu.:0.002225  
 Median :0.003123  
 Mean   :0.035054  
 3rd Qu.:0.072474  
 Max.   :0.072474  
                   


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 151 : 346 ***** ***** 151 : 8574 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8574.csv : 151 / 215 
Initially nrows= 3767 
	Removed NAs: nrows= 3767 
*** Initializing DataFormatter (8574_Lucian) ***
   user  system elapsed 
  0.000   0.000   0.001 
*** Adding covariates data for DataFormatter8574_Lucian ***
   user  system elapsed 
  0.001   0.001   0.001 
*** Initializing StateDecoder (8574_Lucian) ***
   user  system elapsed 
  0.001   0.000   0.001 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -533.0053 
iteration 5 logLik: -532.8893 
converged at iteration 8 with logLik: -532.8719 
[1] "MAPE=0.439688483159018; R2=0.00129518950015906"
***** HMM Cross-Validation K = 3 ******
iteration 0 logLik: -532.7912 
iteration 5 logLik: -530.8289 
iteration 10 logLik: -507.3194 
iteration 15 logLik: -334.4317 
iteration 20 logLik: -51.88375 
iteration 25 logLik: 33.55923 
iteration 30 logLik: 37.90849 
iteration 35 logLik: 

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [3767] is not a sub-multiple or multiple of the number of rows [157]

      obs           State        Index                 Time   
 Min.   :0.1657   N (1):20   Min.   : 1   Mon,01/27 00:00: 1  
 1st Qu.:0.2920   N (2):28   1st Qu.:19   Mon,01/27 01:00: 1  
 Median :0.5367   N (3):25   Median :37   Mon,01/27 02:00: 1  
 Mean   :0.6089              Mean   :37   Mon,01/27 03:00: 1  
 3rd Qu.:0.8260              3rd Qu.:55   Mon,01/27 04:00: 1  
 Max.   :1.7228              Max.   :73   Mon,01/27 05:00: 1  
                                          (Other)        :67  
  TemperatureD       
 Min.   :-0.0019967  
 1st Qu.:-0.0019967  
 Median :-0.0004835  
 Mean   :-0.0005556  
 3rd Qu.: 0.0004094  
 Max.   : 0.0004094  
                     


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 152 : 358 ***** ***** 152 : 8589 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8589.csv : 152 / 215 
Initially nrows= 7145 
	Removed NAs: nrows= 7145 
*** Initializing DataFormatter (8589_Malcolm) ***
   user  system elapsed 
  0.001   0.000   0.000 
*** Adding covariates data for DataFormatter8589_Malcolm ***
   user  system elapsed 
  0.002   0.000   0.003 
*** Initializing StateDecoder (8589_Malcolm) ***
   user  system elapsed 
  0.003   0.000   0.003 
***** HMM Cross-Validation K = 2 ******
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
iteration 0 logLik: -6719.415 
converged at iteration 5 with logLik: -6205 
[1] "MAPE=2.91752470573568; R2=0.510110438975529"
***** HMM Cross-Validation K = 3 ******
iteration 0 logLik: -6754.907 
iteration 5 logLik: -5630.291 
iteration 10 logLik: -5191.056 
iteration 15 logLik: -4976.364 


In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [11488] is not a sub-multiple or multiple of the number of rows [479]

      obs           State        Index                 Time   
 Min.   :0.2958   N (1):42   Min.   : 1   Fri,03/29 00:00: 1  
 1st Qu.:0.4728   N (3):31   1st Qu.:19   Fri,03/29 01:00: 1  
 Median :0.5615              Median :37   Fri,03/29 02:00: 1  
 Mean   :0.5954              Mean   :37   Fri,03/29 03:00: 1  
 3rd Qu.:0.7087              3rd Qu.:55   Fri,03/29 04:00: 1  
 Max.   :1.0111              Max.   :73   Fri,03/29 05:00: 1  
                                          (Other)        :67  
  TemperatureD      
 Min.   :-0.000360  
 1st Qu.:-0.000360  
 Median :-0.000360  
 Mean   : 0.002222  
 3rd Qu.: 0.005720  
 Max.   : 0.005720  
                    


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 154 : 380 ***** ***** 154 : 8645 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8645.csv : 154 / 215 
Initially nrows= 9545 
	Removed NAs: nrows= 9531 
*** Initializing DataFormatter (8645_Merritt) ***
   user  system elapsed 
  0.001   0.000   0.001 
*** Adding covariates data for DataFormatter8645_Merritt ***
   user  system elapsed 
  0.002   0.000   0.002 
*** Initializing StateDecoder (8645_Merritt) ***
   user  system elapsed 
  0.004   0.000   0.003 
***** HMM Cross-Validation K = 2 ******
iteration 0 logLik: -5720.319 
iteration 5 logLik: -657.3465 
Bad HMM fit; re-estimating...
iteration 0 logLik: -5870.194 
iteration 5 logLik: -832.6778 
Bad HMM fit; re-estimating...
iteration 0 logLik: -5826.096 
iteration 5 logLik: -747.2318 
Bad HMM fit; re-estimating...
iteration 0 logLik: -5890.932 
iteration 5 logLik: -934.3184 
Bad HMM fit; re-estimating...
iteration 0 logLik: -5894.111 
iteration 5 logLik: -954.5254 
Bad HMM fit;

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [9531] is not a sub-multiple or multiple of the number of rows [398]

      obs              State        Index                 Time   
 Min.   :0.2365   C.Lo (2):69   Min.   : 1   Fri,08/16 00:00: 1  
 1st Qu.:0.3883   N (3)   : 4   1st Qu.:19   Fri,08/16 01:00: 1  
 Median :0.5329                 Median :37   Fri,08/16 02:00: 1  
 Mean   :0.7268                 Mean   :37   Fri,08/16 03:00: 1  
 3rd Qu.:0.8967                 3rd Qu.:55   Fri,08/16 04:00: 1  
 Max.   :3.7822                 Max.   :73   Fri,08/16 05:00: 1  
                                             (Other)        :67  
  TemperatureD     
 Min.   :0.001419  
 1st Qu.:0.026826  
 Median :0.026826  
 Mean   :0.025434  
 3rd Qu.:0.026826  
 Max.   :0.026826  
                   


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

***** 155 : 234 ***** ***** 155 : 8669 *****Processing file /Users/adrianalbert/S3L_server/energy-data/pecan_street/usage-select//60min/8669.csv : 155 / 215 
Initially nrows= 14866 
	Removed NAs: nrows= 14842 
*** Initializing DataFormatter (8669_Harley) ***
   user  system elapsed 
  0.001   0.000   0.001 
*** Adding covariates data for DataFormatter8669_Harley ***
   user  system elapsed 
  0.018   0.003   0.024 
*** Initializing StateDecoder (8669_Harley) ***
   user  system elapsed 
  0.005   0.000   0.006 
***** HMM Cross-Validation K = 2 ******
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
[1] "MAPE=NA; R2=NA"
***** HMM Cross-Validation K = 3 ******
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
Bad HMM fit; re-estimating...
[1] "MAPE=NA; R2=NA"
*

In matrix(as.numeric(myMat), ncol = 24, byrow = T): data length [3479] is not a sub-multiple or multiple of the number of rows [145]

      obs           State        Index                 Time   
 Min.   :0.7728   N (1):21   Min.   : 1   Mon,10/21 00:00: 1  
 1st Qu.:0.9050   N (2):12   1st Qu.:19   Mon,10/21 01:00: 1  
 Median :1.3370   N (3):40   Median :37   Mon,10/21 02:00: 1  
 Mean   :1.6006              Mean   :37   Mon,10/21 03:00: 1  
 3rd Qu.:2.2321              3rd Qu.:55   Mon,10/21 04:00: 1  
 Max.   :4.5319              Max.   :73   Mon,10/21 05:00: 1  
                                          (Other)        :67  
  TemperatureD      
 Min.   :-0.002766  
 1st Qu.:-0.002766  
 Median :-0.001753  
 Mean   : 0.001439  
 3rd Qu.:-0.001753  
 Max.   : 0.019439  
                    


: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
: The shape palette can deal with a maximum of 6 discrete values because
more than 6 becomes difficult to discriminate; you have 7. Consider
specifying shapes manually. if you must have them.

In [None]:
i