In [1]:
# bring packages into main namespace
using Pkg, DataFrames, StatsPlots, Statistics, CSV, Plots, Dates, Flux, DecisionTree, LinearAlgebra, Missings, MLDataUtils
include("proxgrad.jl")
plotlyjs()
#init_notebook_mode(connected=true)
#, LowRankModels

Plots.PlotlyJSBackend()

In [2]:
dfmt = dateformat"yyyy-mm-dd HH:MM:SS-ss";

In [3]:
df = CSV.read("/Users/alfredorodriguez/Desktop/final_disaggregation_package/data/all_ny_data_may2019.csv", dateformat = dfmt); # read 10000 rows from the csv

In [4]:
df[20000:20010,1:7];

Unnamed: 0_level_0,dataid,localminute,air1,air2,air3,airwindowunit1,aquarium1
Unnamed: 0_level_1,Int64,DateTime,Float64?,Float64?,Float64?,Float64?,Missing
1,4414,2019-05-23T08:05:00,0.0,0.006,0.07,missing,missing
2,4414,2019-05-23T08:04:00,0.0,0.006,0.07,missing,missing
3,4414,2019-05-23T08:03:00,0.0,0.006,0.07,missing,missing
4,4414,2019-05-23T08:02:00,0.0,0.006,0.07,missing,missing
5,4414,2019-05-23T08:01:00,0.0,0.005,0.071,missing,missing
6,4414,2019-05-23T08:00:00,0.0,0.006,0.034,missing,missing
7,7999,2019-05-23T08:59:00,missing,missing,missing,0.104,missing
8,7999,2019-05-23T08:58:00,missing,missing,missing,0.104,missing
9,7999,2019-05-23T08:57:00,missing,missing,missing,0.104,missing
10,7999,2019-05-23T08:56:00,missing,missing,missing,0.104,missing


In [5]:
sort!(df,[:dataid, :localminute],rev=false);

In [6]:
function change_missing_to_0(df)
    columns = size(df)[2]
    rows = size(df)[1]
    for i = 1:columns
        for j = 1:rows
            if typeof(df[i][j]) == Missing
                df[i][j] = 0
            end
        end
    end
end

change_missing_to_0 (generic function with 1 method)

In [7]:
function delete_missing_cols(df)
    columns = size(df)[2]
    rows = size(df)[1]
    for i = 1:columns
        count = 0
        for j = 1:rows
            if typeof(df[i][j]) == Missing
                count += 1
            end
        end
        if count == rows
            select!(df, Not(Symbol(names(df)[i])))
        end
    end
end

delete_missing_cols (generic function with 1 method)

In [8]:
function plotting(df, column)
    display(plot(df[:localminute],
            df[Symbol(string(column))],
            xlabel = "Time",
            ylabel = "Power (kW)",
            legend = false))
end

plotting (generic function with 1 method)

In [9]:
function separate_buildings(df)
    labels = unique(df[:dataid])
    buildings = []
    for i in labels
        push!(buildings, df[in([i]).(df.dataid), :])
    end
    return buildings
end

separate_buildings (generic function with 1 method)

In [10]:
function plot_column(buildings, column)
    for i in buildings
        plotting(i, string(column))
    end
end

plot_column (generic function with 1 method)

In [11]:
#Deletes missing columns
df = df[:, mean.(ismissing, eachcol(df)) .< 0.99];

In [12]:
df = select(df, Not([:bedroom3]));

In [13]:
#df = select(df, Not([:bedroom3, :garage2, :shed1]));

In [14]:
#delete_missing_cols(df)

In [15]:
change_missing_to_0(df)

In [612]:
change_missing_to_0(apparent)

In [16]:
total_power = df[:grid]+df[:solar]+df[:solar2];

In [17]:
insert!(df, 3, total_power, :total_power);

In [18]:
buildings = separate_buildings(df);

In [19]:
new_buildings = []
for building in buildings
    if (size(building)[1]) == 44640
        push!(new_buildings, building)
    end
end

In [20]:
#plot_column(new_buildings, "grid")

In [21]:
#plot_column(new_buildings, "total_power")

In [22]:
function single_bldg_subset(df)
    subset = select(df, Not([:grid, :solar, :leg1v, :leg2v, :dataid, :localminute, :solar2]));
    return subset
end
#took out :total_power

single_bldg_subset (generic function with 1 method)

In [23]:
function single_bldg_sum(subset)
    sums = ([sum(subset[i]) for i in 1 : size(subset,2)]);
    return sums
end

single_bldg_sum (generic function with 1 method)

In [24]:
function all_bldg_subsets(buildings)
    all_subsets = []
    for i = 1:size(buildings, 1)
        subset = single_bldg_subset(buildings[i])
        push!(all_subsets, subset)
    end
    return all_subsets
end

all_bldg_subsets (generic function with 1 method)

In [25]:
function all_bldg_sums(buildings)
    all_sums = []
    all_subsets = all_bldg_subsets(buildings);
    for i = 1:size(buildings, 1)
        subset = single_bldg_sum(all_subsets[i])
        push!(all_sums, subset)
    end
    return all_sums
end

all_bldg_sums (generic function with 1 method)

In [26]:
function plot_appliance_sums(buildings)
    all_sums = all_bldg_sums(buildings)
    all_subsets = all_bldg_subsets(buildings)
    for i = 1:size(all_sums, 1)
        display(bar(names(all_subsets[i]), all_sums[i],
            xrotation=90,
            xticks=(0.5:size(all_subsets[i],2), names(all_subsets[i])),
            label = false,
            title = string("Building ", i),
                ylabel = "Total Power (kW)"))
    end
end

plot_appliance_sums (generic function with 1 method)

In [None]:
plot_appliance_sums(new_buildings)

In [27]:
function histogram_of_appliances(buildings)
    tallies = all_bldg_sums(buildings)
    counters = []

    for i = 1:size(tallies, 1)
        count = zeros(size(tallies[i], 1))
        for j = 1:size(tallies[i], 1)
            if tallies[i][j] != 0
                count[j] = 1
            end
        end
        push!(counters, count)
    end

    bar(names(all_bldg_subsets(buildings)[1]), sum(counters),
                xrotation=90,
                xticks=(0.5:size(all_bldg_subsets(buildings)[1],2), names(all_bldg_subsets(buildings)[1])),
                label = false,
                title = string("Histogram of Appliances for all buildings"),
                    ylabel = "Frequency")
end

histogram_of_appliances (generic function with 1 method)

In [None]:
histogram_of_appliances(new_buildings)

In [28]:
averages = all_bldg_subsets(new_buildings);

In [29]:
for i = 1:size(averages)[1]
    averages[i] = disallowmissing(averages[i])
end

In [30]:
avg_power = zeros(size(averages[1], 2))
tracker = zeros(size(averages[1], 2))
for building in averages
    for size = 1:size(averages[1])[2]
        if mean(building[size]) != 0
            avg_power[size] += mean(building[size])
            tracker[size] += 1
        else
            
        end
    end
end

In [None]:
plot(avg_power./tracker,
    xrotation=90,
    label = false,
    ylabel = "Power (kW)",
    title = "Average Power Consumption per Appliance",
    xticks=(1:size(all_bldg_subsets(buildings)[1],2),
        names(all_bldg_subsets(buildings)[1])))

In [31]:
function total_appliances(buildings)
    tallies = all_bldg_sums(buildings)
    counters = []

    for i = 1:size(tallies, 1)
        count = zeros(size(tallies[i], 1))
        for j = 1:size(tallies[i], 1)
            if tallies[i][j] != 0
                count[j] = 1
            end
        end
        push!(counters, count)
    end
    return sum(counters)
end

total_appliances (generic function with 1 method)

In [32]:
appliance_totals = total_appliances(new_buildings);

In [33]:
avgs = avg_power./tracker;

In [34]:
function findextrema(v, n; rev=false)
    idx = partialsortperm(v, 1:n; rev=rev)
    return v[idx], idx
end

findextrema (generic function with 1 method)

In [35]:
largest_appliances = findextrema(appliance_totals.*avgs, 6; rev = true);

In [36]:
largest_appliances[2]

6-element view(::Array{Int64,1}, 1:6) with eltype Int64:
  1
 49
  2
 16
 42
  9

In [37]:
app_names = names(all_bldg_subsets(buildings)[1])
for appliance in largest_appliances[2]
    println(app_names[appliance])
end

total_power
waterheater1
air1
drye1
refrigerator1
car1


In [38]:
#create models for “waterheater1”, “air1”, “drye1”, “refrigerator1” and “car1”

In [39]:
size(averages[1])

(44640, 51)

In [50]:
#waterheater1
function extract_homes_with_appliance(df, appliance)
    homes = []
    for home = 1:size(df)[1]
        headings = names(df[home])
        if appliance in headings #mean(df[home][Symbol(string(appliance))]) != 0
            push!(homes, df[home])
        end
    end
    return homes
end

extract_homes_with_appliance (generic function with 1 method)

In [42]:
function remove_empty_appliances(df)
    for home = 1:size(df)[1]
        headings = names(df[home])
        for heading in headings
            if mean(df[home][Symbol(string(heading))]) == 0
                df[home] = select(df[home], Not([Symbol(string(heading))]))
            end
        end
    end
end

remove_empty_appliances (generic function with 1 method)

In [43]:
remove_empty_appliances(averages)

In [51]:
interested_appliances = ["waterheater1", "air1", "drye1", "refrigerator1", "car1"]
sorted_homes = []
for appliance in interested_appliances
    push!(sorted_homes, extract_homes_with_appliance(averages, appliance))
end

In [231]:
function cross_validate_folds(X, y, k)
    total_rows = size(X)[1]
    chunk = convert(Int64, (round(size(X)[1]/k)))
    folds_X = []
    folds_y = []
    for i = 0:k-1
        push!(folds_X, X[i*chunk+1:(i+1)*chunk, :])
        push!(folds_y, y[i*chunk+1:(i+1)*chunk, :])
    end
    return folds_X, folds_y
end 

cross_validate_folds (generic function with 1 method)

In [217]:
function train_set_test_set(df, appliance)
    X_train, X_test = splitobs(select(df, Not([Symbol(string(appliance))])), at = 0.8);
    y_train, y_test = splitobs(df[Symbol(string(appliance))], at = 0.8);
    return X_train, X_test, y_train, y_test
end

#, k, loss, reg

train_set_test_set (generic function with 1 method)

In [251]:
function rmse(y, y_predict)
    y_difference = y - y_predict
    y_squared = y_difference.^2
    return sqrt(mean(y_squared))
end

rmse (generic function with 1 method)

In [461]:
function forward_chain(df, loss, reg, appliance, k=5)
    X_train, X_test, y_train, y_test = train_set_test_set(df, appliance)
    X_val, y_val = cross_validate_folds(X_train, y_train, k)
    X_train = Matrix(X_train);
    X_test = Matrix(X_test);
    y_train = Array{Float64,1}(y_train);
    y_test = Array{Float64,1}(y_test);
    
    for i = 1:size(X_val)[1]
        X_val[i] = Matrix(X_val[i])
        y_val[i] = vec(y_val[i])
    end

    
    chunk = convert(Int64, (round(size(X_train)[1]/k)))
    w_collect = []
    predictions = []
    errors = []
    for i = 1:k-1
        temp_X = X_train[1:i*chunk, :]
        temp_y = y_train[1:i*chunk, :]
        temp_y = vec(temp_y)
        temp_val_X = X_val[i+1]
        temp_val_y = y_val[i+1]
        n = size(temp_X)[1]
        loss_func = 1/n*loss
        w = proxgrad(loss_func, reg, temp_X, temp_y, maxiters=100)
        yhat = impute(loss, temp_val_X*w);
        error = rmse(temp_val_y, yhat)
        push!(w_collect, w)
        push!(predictions, yhat)
        push!(errors, error)
    end
    return mean(errors)
end 

forward_chain (generic function with 4 methods)

In [462]:
forward_chain(sorted_homes[1][1], QuadLoss(), ZeroReg(), "waterheater1")

0.12238730151005814

In [257]:
X_train, X_test, y_train, y_test = train_set_test_set(sorted_homes[1][1], "waterheater1");

In [388]:
X_val, y_val = cross_validate_folds(select(sorted_homes[1][1], Not([Symbol(string(:waterheater1))])), sorted_homes[1][1][:waterheater1], 5);

In [531]:
function errors(df, appliance)
    all_errors = []
    for home in df
        X_train, X_test, y_train, y_test = train_set_test_set(home, string(appliance));
        w = Matrix(X_train)\y_train;
        prediction = Matrix(X_test)*w;
        error = rmse(y_test, prediction);
        push!(all_errors, error)
    end
    return all_errors
end

errors (generic function with 1 method)

In [534]:
errors_hwh = errors(sorted_homes[1], "waterheater1");

In [535]:
errors_air1 = errors(sorted_homes[2], "air1");

In [536]:
errors_drye1 = errors(sorted_homes[3], "drye1");

In [537]:
errors_ref1 = errors(sorted_homes[4], "refrigerator1");

In [538]:
errors_car1 = errors(sorted_homes[5], "car1");

In [588]:
size(errors_car1)

(6,)

In [712]:
app_names = ["waterheater1, n = 19", "air1, n = 19", "drye1, n = 34", "refrigerator1, n = 28", "car1, n = 6"]

5-element Array{String,1}:
 "waterheater1, n = 19"
 "air1, n = 19"
 "drye1, n = 34"
 "refrigerator1, n = 28"
 "car1, n = 6"

In [713]:
boxplot([app_names[1]], errors_hwh, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], errors_air1, label = false)
boxplot!([app_names[3]], errors_drye1, label = false)
boxplot!([app_names[4]], errors_ref1, label = false)
boxplot!([app_names[5]], errors_car1, label = false)

In [572]:
times = buildings[1][:localminute][convert(Int64, (round(size(buildings[1])[1]*0.8)))+1:end];

In [605]:
X_train, X_test, y_train, y_test = train_set_test_set(sorted_homes[1][1], "waterheater1");

In [607]:
#plot(times, (Matrix(X_test))*w, label = "Prediction", ylabel = "Power (kW)", xlabel = "Date and Time")
plot(times, y_test, label = "True")

In [608]:
X_train, X_test, y_train, y_test = train_set_test_set(sorted_homes[2][1], "air1");
#plot(times, (Matrix(X_test))*w, label = "Prediction", ylabel = "Power (kW)", xlabel = "Date and Time")
plot(times, y_test, label = "True")

In [601]:
X_train, X_test, y_train, y_test = train_set_test_set(sorted_homes[3][1], "drye1");
#plot(times, (Matrix(X_test))*w, label = "Prediction", ylabel = "Power (kW)", xlabel = "Date and Time")
plot(times, y_test, label = "True")

In [602]:
X_train, X_test, y_train, y_test = train_set_test_set(sorted_homes[4][1], "refrigerator1");
#plot(times, (Matrix(X_test))*w, label = "Prediction", ylabel = "Power (kW)", xlabel = "Date and Time")
plot(times, y_test, label = "True")

In [603]:
X_train, X_test, y_train, y_test = train_set_test_set(sorted_homes[5][1], "car1");
#plot(times, (Matrix(X_test))*w, label = "Prediction", ylabel = "Power (kW)", xlabel = "Date and Time")
plot(times, y_test, label = "True")

In [858]:
function autoregress(df)
    col_names = []
    for name in names(df)
        push!(col_names, name*" (t-2)")
    end
    regression = df[2:end,:];
    rename!(regression, Symbol.(col_names))
    autoregress = hcat(df[1:end-1,1:end], regression);
    return autoregress
end

autoregress (generic function with 1 method)

In [734]:
ar_df = []
for int_appliance in sorted_homes
    for i = 1:size(int_appliance)[1]
        push!(ar_df, autoregress(int_appliance[i]))
    end
end

In [735]:
for i = 1:19
    ar_df[i] = select(ar_df[i], Not([Symbol("waterheater1 (t-1)")]));
end

In [736]:
for i = 20:38
    ar_df[i] = select(ar_df[i], Not([Symbol("air1 (t-1)")]));
end

In [737]:
for i = 39:72
    ar_df[i] = select(ar_df[i], Not([Symbol("drye1 (t-1)")]));
end

In [738]:
for i = 73:100
    ar_df[i] = select(ar_df[i], Not([Symbol("refrigerator1 (t-1)")]));
end

In [740]:
for i = 101:106
    ar_df[i] = select(ar_df[i], Not([Symbol("car1 (t-1)")]));
end

In [741]:
ar_errors_hwh = errors(ar_df[1:19], "waterheater1");

In [873]:
mean(ar_errors_hwh)

0.17530334256345892

In [742]:
ar_errors_air = errors(ar_df[20:38], "air1");

In [874]:
mean(ar_errors_air)

0.10817410149894548

In [743]:
ar_errors_dry = errors(ar_df[39:72], "drye1");

In [875]:
mean(ar_errors_dry)

0.2016863675613916

In [744]:
ar_errors_ref = errors(ar_df[73:100], "refrigerator1");

In [876]:
mean(ar_errors_ref)

0.058747716192994355

In [745]:
ar_errors_car = errors(ar_df[101:106], "car1");

In [877]:
mean(ar_errors_car)

0.35626113230304374

In [746]:
boxplot([app_names[1]], ar_errors_hwh, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], ar_errors_air, label = false)
boxplot!([app_names[3]], ar_errors_dry, label = false)
boxplot!([app_names[4]], ar_errors_ref, label = false)
boxplot!([app_names[5]], ar_errors_car, label = false)

In [766]:
l1_l1_app1_errors = []
for home in ar_df[1:19]
    error = forward_chain(home, L1Loss(), OneReg(), "waterheater1")
    push!(l1_l1_app1_errors, error)
end

In [878]:
mean(l1_l1_app1_errors)

0.6084845459666519

In [767]:
l1_l1_app2_errors = []
for home in ar_df[20:38]
    error = forward_chain(home, L1Loss(), OneReg(), "air1")
    push!(l1_l1_app2_errors, error)
end

In [879]:
mean(l1_l1_app2_errors)

0.2933390565342167

In [768]:
l1_l1_app3_errors = []
for home in ar_df[39:72]
    error = forward_chain(home, L1Loss(), OneReg(), "drye1")
    push!(l1_l1_app3_errors, error)
end

In [880]:
mean(l1_l1_app3_errors)

0.44849085744603745

In [769]:
l1_l1_app4_errors = []
for home in ar_df[73:100]
    error = forward_chain(home, L1Loss(), OneReg(), "refrigerator1")
    push!(l1_l1_app4_errors, error)
end

In [881]:
mean(l1_l1_app4_errors)

0.08271060026745401

In [770]:
l1_l1_app5_errors = []
for home in ar_df[101:106]
    error = forward_chain(home, L1Loss(), OneReg(), "car1")
    push!(l1_l1_app5_errors, error)
end

In [882]:
mean(l1_l1_app5_errors)

0.8130049170615151

In [771]:
boxplot([app_names[1]], l1_l1_app1_errors, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], l1_l1_app2_errors, label = false)
boxplot!([app_names[3]], l1_l1_app3_errors, label = false)
boxplot!([app_names[4]], l1_l1_app4_errors, label = false)
boxplot!([app_names[5]], l1_l1_app5_errors, label = false)

In [772]:
l1_l2_app1_errors = []
for home in ar_df[1:19]
    error = forward_chain(home, L1Loss(), QuadReg(), "waterheater1")
    push!(l1_l2_app1_errors, error)
end

In [883]:
mean(l1_l2_app1_errors)

0.5512905068841066

In [773]:
l1_l2_app2_errors = []
for home in ar_df[20:38]
    error = forward_chain(home, L1Loss(), QuadReg(), "air1")
    push!(l1_l2_app2_errors, error)
end

In [884]:
mean(l1_l2_app2_errors)

0.25930462248528247

In [774]:
l1_l2_app3_errors = []
for home in ar_df[39:72]
    error = forward_chain(home, L1Loss(), QuadReg(), "drye1")
    push!(l1_l2_app3_errors, error)
end

In [885]:
mean(l1_l2_app3_errors)

0.44812285699560817

In [775]:
l1_l2_app4_errors = []
for home in ar_df[73:100]
    error = forward_chain(home, L1Loss(), QuadReg(), "refrigerator1")
    push!(l1_l2_app4_errors, error)
end

In [886]:
mean(l1_l2_app4_errors)

0.07362322342806264

In [776]:
l1_l2_app5_errors = []
for home in ar_df[101:106]
    error = forward_chain(home, L1Loss(), QuadReg(), "car1")
    push!(l1_l2_app5_errors, error)
end

In [887]:
mean(l1_l2_app5_errors)

0.8033574276984782

In [777]:
boxplot([app_names[1]], l1_l2_app1_errors, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], l1_l2_app2_errors, label = false)
boxplot!([app_names[3]], l1_l2_app3_errors, label = false)
boxplot!([app_names[4]], l1_l2_app4_errors, label = false)
boxplot!([app_names[5]], l1_l2_app5_errors, label = false)

In [907]:
l2_l2_app1_errors = []
for home in ar_df[1:19]
    error = forward_chain(home, QuadLoss(), ZeroReg(), "waterheater1")
    push!(l2_l2_app1_errors, error)
end

In [913]:
mean(l2_l2_app1_errors )

0.3403943068420354

In [908]:
l2_l2_app2_errors = []
for home in ar_df[20:38]
    error = forward_chain(home, QuadLoss(), ZeroReg(), "air1")
    push!(l2_l2_app2_errors, error)
end

In [914]:
mean(l2_l2_app2_errors )

0.187862453197009

In [909]:
l2_l2_app3_errors = []
for home in ar_df[39:72]
    error = forward_chain(home, QuadLoss(), ZeroReg(), "drye1")
    push!(l2_l2_app3_errors, error)
end

In [915]:
mean(l2_l2_app3_errors )

0.28632415909004605

In [910]:
l2_l2_app4_errors = []
for home in ar_df[73:100]
    error = forward_chain(home, QuadLoss(), ZeroReg(), "refrigerator1")
    push!(l2_l2_app4_errors, error)
end

In [916]:
mean(l2_l2_app4_errors )

40.58890765605433

In [911]:
l2_l2_app5_errors = []
for home in ar_df[101:106]
    error = forward_chain(home, QuadLoss(), ZeroReg(), "car1")
    push!(l2_l2_app5_errors, error)
end

In [917]:
mean(l2_l2_app5_errors )

0.36620980442155115

In [912]:
boxplot([app_names[1]], l2_l2_app1_errors, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], l2_l2_app2_errors, label = false)
boxplot!([app_names[3]], l2_l2_app3_errors, label = false)
boxplot!([app_names[4]], l2_l2_app4_errors, label = false)
boxplot!([app_names[5]], l2_l2_app5_errors, label = false)

In [784]:
l2_l1_app1_errors = []
for home in ar_df[1:19]
    error = forward_chain(home, QuadLoss(), OneReg(), "waterheater1")
    push!(l2_l1_app1_errors, error)
end

In [785]:
l2_l1_app2_errors = []
for home in ar_df[20:38]
    error = forward_chain(home, QuadLoss(), OneReg(), "air1")
    push!(l2_l1_app2_errors, error)
end

In [786]:
l2_l1_app3_errors = []
for home in ar_df[39:72]
    error = forward_chain(home, QuadLoss(), OneReg(), "drye1")
    push!(l2_l1_app3_errors, error)
end

In [787]:
l2_l1_app4_errors = []
for home in ar_df[73:100]
    error = forward_chain(home, QuadLoss(), OneReg(), "refrigerator1")
    push!(l2_l1_app4_errors, error)
end

In [788]:
l2_l1_app5_errors = []
for home in ar_df[101:106]
    error = forward_chain(home, QuadLoss(), OneReg(), "car1")
    push!(l2_l1_app5_errors, error)
end

In [789]:
boxplot([app_names[1]], l2_l1_app1_errors, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], l2_l1_app2_errors, label = false)
boxplot!([app_names[3]], l2_l1_app3_errors, label = false)
boxplot!([app_names[4]], l2_l1_app4_errors, label = false)
boxplot!([app_names[5]], l2_l1_app5_errors, label = false)

In [790]:
hub_l1_app1_errors = []
for home in ar_df[1:19]
    error = forward_chain(home, HuberLoss(), OneReg(), "waterheater1")
    push!(hub_l1_app1_errors, error)
end

In [888]:
mean(hub_l1_app1_errors)

0.5868365754758332

In [791]:
hub_l1_app2_errors = []
for home in ar_df[20:38]
    error = forward_chain(home, HuberLoss(), OneReg(), "air1")
    push!(hub_l1_app2_errors, error)
end

In [889]:
mean(hub_l1_app2_errors)

0.28046605357195475

In [792]:
hub_l1_app3_errors = []
for home in ar_df[39:72]
    error = forward_chain(home, HuberLoss(), OneReg(), "drye1")
    push!(hub_l1_app3_errors, error)
end

In [890]:
mean(hub_l1_app3_errors)

0.44849888714310104

In [793]:
hub_l1_app4_errors = []
for home in ar_df[73:100]
    error = forward_chain(home, HuberLoss(), OneReg(), "refrigerator1")
    push!(hub_l1_app4_errors, error)
end

In [891]:
mean(hub_l1_app4_errors)

0.08299903735867006

In [794]:
hub_l1_app5_errors = []
for home in ar_df[101:106]
    error = forward_chain(home, HuberLoss(), OneReg(), "car1")
    push!(hub_l1_app5_errors, error)
end

In [895]:
mean(hub_l1_app5_errors[1:6])

0.8130049170615151

In [795]:
boxplot([app_names[1]], hub_l1_app1_errors, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], hub_l1_app2_errors, label = false)
boxplot!([app_names[3]], hub_l1_app3_errors, label = false)
boxplot!([app_names[4]], hub_l1_app4_errors, label = false)
boxplot!([app_names[5]], hub_l1_app5_errors, label = false)

In [796]:
hub_l2_app1_errors = []
for home in ar_df[1:19]
    error = forward_chain(home, HuberLoss(), QuadReg(), "waterheater1")
    push!(hub_l2_app1_errors, error)
end

In [896]:
mean(hub_l2_app1_errors)

0.47958993584864934

In [797]:
hub_l2_app2_errors = []
for home in ar_df[20:38]
    error = forward_chain(home, HuberLoss(), QuadReg(), "air1")
    push!(hub_l2_app2_errors, error)
end

In [897]:
mean(hub_l2_app2_errors)

0.24029645285547685

In [798]:
hub_l2_app3_errors = []
for home in ar_df[39:72]
    error = forward_chain(home, HuberLoss(), QuadReg(), "drye1")
    push!(hub_l2_app3_errors, error)
end

In [899]:
mean(hub_l2_app3_errors)

0.42249579554100924

In [799]:
hub_l2_app4_errors = []
for home in ar_df[73:100]
    error = forward_chain(home, HuberLoss(), QuadReg(), "refrigerator1")
    push!(hub_l2_app4_errors, error)
end

In [900]:
mean(hub_l2_app4_errors)

0.07403791693518923

In [904]:
hub_l2_app5_errors = []
for home in ar_df[101:106]
    error = forward_chain(home, HuberLoss(), QuadReg(), "car1")
    push!(hub_l2_app5_errors, error)
end

In [905]:
mean(hub_l2_app5_errors)

0.70675046718097

In [810]:
boxplot([app_names[1]], hub_l2_app1_errors, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], hub_l2_app2_errors, label = false)
boxplot!([app_names[3]], hub_l2_app3_errors, label = false)
boxplot!([app_names[4]], hub_l2_app4_errors, label = false)
boxplot!([app_names[5]], hub_l2_app5_errors, label = false)

In [811]:
function box_plots(ar_df, loss, reg)
    app_names = ["waterheater1, n = 19", "air1, n = 19", "drye1, n = 34", "refrigerator1, n = 28", "car1, n = 6"]
    hub_l2_app1_errors = []
    for home in ar_df[1:19]
        error = forward_chain(home, loss, reg, "waterheater1")
        push!(hub_l2_app1_errors, error)
    end

    hub_l2_app2_errors = []
    for home in ar_df[20:38]
        error = forward_chain(home, loss, reg, "air1")
        push!(hub_l2_app2_errors, error)
    end

    hub_l2_app3_errors = []
    for home in ar_df[39:72]
        error = forward_chain(home, loss, reg, "drye1")
        push!(hub_l2_app3_errors, error)
    end

    hub_l2_app4_errors = []
    for home in ar_df[73:100]
        error = forward_chain(home, loss, reg, "refrigerator1")
        push!(hub_l2_app4_errors, error)
    end

    hub_l2_app5_errors = []
    for home in ar_df[101:106]
        error = forward_chain(home, loss, reg, "car1")
        push!(hub_l2_app5_errors, error)
    end

    boxplot([app_names[1]], hub_l2_app1_errors, label = false, ylabel = "RMSE (kW)")
    boxplot!([app_names[2]], hub_l2_app2_errors, label = false)
    boxplot!([app_names[3]], hub_l2_app3_errors, label = false)
    boxplot!([app_names[4]], hub_l2_app4_errors, label = false)
    boxplot!([app_names[5]], hub_l2_app5_errors, label = false)
end

box_plots (generic function with 1 method)

In [906]:
box_plots(ar_df, QuadLoss(), ZeroReg())

In [817]:
hub_l2_app5_errors = []
lambda = [0:0.01:1;];
for home in ar_df[101:106]
    container = []
    for value in lambda
        error = forward_chain(home, HuberLoss(), value*QuadReg(), "car1")
        push!(container, error)
    end
    push!(hub_l1_app5_errors, container)
end

In [919]:
plot(lambda, mean(hub_l1_app5_errors[7:12]), legend = false, ylabel = "RMSE (kW)", xlabel = "Lambda")

In [859]:
ar_df_2 = []
for int_appliance in ar_df
    push!(ar_df_2, autoregress(int_appliance))
end

In [862]:
for i = 1:19
    ar_df_2[i] = select(ar_df_2[i], Not([Symbol("waterheater1 (t-2)")]));
end

In [861]:
for i = 20:38
    ar_df_2[i] = select(ar_df_2[i], Not([Symbol("air1 (t-2)")]));
end

In [863]:
for i = 39:72
    ar_df_2[i] = select(ar_df_2[i], Not([Symbol("drye1 (t-2)")]));
end

In [864]:
for i = 73:100
    ar_df_2[i] = select(ar_df_2[i], Not([Symbol("refrigerator1 (t-2)")]));
end

In [865]:
for i = 101:106
    ar_df_2[i] = select(ar_df_2[i], Not([Symbol("car1 (t-2)")]));
end

In [866]:
ar_errors_hwh = errors(ar_df_2[1:19], "waterheater1");

In [867]:
ar_errors_air = errors(ar_df_2[20:38], "air1");

In [868]:
ar_errors_dry = errors(ar_df_2[39:72], "drye1");

In [869]:
ar_errors_ref = errors(ar_df_2[73:100], "refrigerator1");

In [870]:
ar_errors_car = errors(ar_df_2[101:106], "car1");

In [871]:
boxplot([app_names[1]], ar_errors_hwh, label = false, ylabel = "RMSE (kW)")
boxplot!([app_names[2]], ar_errors_air, label = false)
boxplot!([app_names[3]], ar_errors_dry, label = false)
boxplot!([app_names[4]], ar_errors_ref, label = false)
boxplot!([app_names[5]], ar_errors_car, label = false)

In [872]:
mean(ar_errors_hwh)

0.17530334256345892