In [None]:
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=DeprecationWarning)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, KFold
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import math
import time

In [None]:
data = pd.read_excel('~/datasets/Dataset.xlsx', sheet_name=['Total Consumers'])
df = data['Total Consumers']
df

In [None]:
df_location = pd.read_pickle("~/datasets/df_location.pkl")
df_location = df_location.iloc[35136:, [0, 2, 1]]
df_location.reset_index(drop=True, inplace=True)
df_location

In [None]:
num_samples_per_house = df_location.Location.value_counts()[1]
num_houses = df_location.Location.nunique()

In [None]:
def no_ml_predict(X: np.array, y: np.array):
    mse = truncate_metric(mean_squared_error(X, y))
    #wape = mean_absolute_error(X,y) / y.mean()
    wape = truncate_metric(float(np.sum(np.abs(X - y)) / np.sum(np.abs(y))))
    r2 = truncate_metric(r2_score(X, y))
    
    print('MSE: %.4f' % mse)
    print('WAPE: %.2f' % (wape * 100))
    print('R2: %.4f' % r2)
    return mse, wape, r2


def truncate_metric(metric):
    m = math.trunc(10000*metric) / 10000
    return m

def truncate(df):
    trunc = lambda x: math.trunc(10000*x) / 10000
    df_trunc = df.applymap(trunc)
    return df_trunc

def truncate_location(df):
    trunc = lambda x: math.trunc(10000*x) / 10000
    df_trunc = pd.DataFrame(df['Energy']).applymap(trunc)
    df = pd.concat([df.iloc[:,0:-1], df_trunc],axis=1)
    return df
    
def plot_results(preds: np.array, actuals: np.array, title: str):
    
    plt.scatter(actuals, preds, c='b', label='predicted')
    plt.xlabel('actual')
    plt.ylabel('predicted')
    plt.title(title)
    plt.xlim(0, plt.xlim()[1])
    plt.ylim(0, plt.ylim()[1])
    _ = plt.plot([0, 100], [0, 100], '--r', label='y=x')
    plt.legend()
    plt.show()
    
def total_averaged_metrics(metrics_list):
    mse = np.round(sum(i for i, j, k in metrics_list)/len(metrics_list),3)
    wape = np.round(sum(j for i, j, k in metrics_list)/len(metrics_list),3)
    r2 = np.round(sum(k for i, j, k in metrics_list)/len(metrics_list),3)
    print("Total Averaged MSE: {}".format(mse))
    print("Total Averaged WAPE: {}".format(wape * 100))
    print("Total Averaged R2: {}".format(r2))
    return mse, wape, r2

In [None]:
def normalize_training(X_train):
    scaler = MinMaxScaler(feature_range=(0,1))
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    return X_train, scaler

## Individual 51 houses preprocessing
### Previous 15 minutes

In [None]:
X15 = df_location.copy()
for i in range(1, num_houses):
    X15.iloc[(num_samples_per_house*i)-1] = np.nan
X15.dropna(inplace=True)
m = len(X15)
X15 = X15.iloc[0:(m-1)]
X15.drop(["Time", "Location"],inplace=True, axis=1)
X15.reset_index(drop=True, inplace=True)
X15

In [None]:
y15 = df_location.copy()
for i in range(1, num_houses):
    y15.iloc[(num_samples_per_house*i)] = np.nan
y15.dropna(inplace=True)
y15 = y15.iloc[1:].reset_index(drop=True)
y15.drop(["Time", "Location"],inplace=True, axis=1)
y15.reset_index(drop=True, inplace=True)
y15

### Previous day at same hour

In [None]:
Xday = df_location.copy()
for i in range(1, num_houses):
    Xday.iloc[(num_samples_per_house*i)-1] = np.nan
Xday.dropna(inplace=True)
m = len(Xday)
Xday = Xday.iloc[0:(m-96)]
Xday.drop(["Time", "Location"],inplace=True, axis=1)
Xday.reset_index(drop=True, inplace=True)
Xday

In [None]:
yday = df_location.copy()
for i in range(1, num_houses):
    yday.iloc[(num_samples_per_house*i)] = np.nan
yday.dropna(inplace=True)
yday = yday.iloc[96:].reset_index(drop=True)
yday.drop(["Time", "Location"],inplace=True, axis=1)
yday.reset_index(drop=True, inplace=True)
yday

### Previous week same day and same hour

In [None]:
Xweek = df_location.copy()
for i in range(1, num_houses):
    Xweek.iloc[(num_samples_per_house*i)-1] = np.nan
Xweek.dropna(inplace=True)
m = len(Xweek)
Xweek = Xweek.iloc[0:(m-672)]
Xweek.drop(["Time", "Location"],inplace=True, axis=1)
Xweek.reset_index(drop=True, inplace=True)
Xweek

In [None]:
yweek = df_location.copy()
for i in range(1, num_houses):
    yweek.iloc[(num_samples_per_house*i)] = np.nan
yweek.dropna(inplace=True)
yweek = yweek.iloc[672:].reset_index(drop=True)
yweek.drop(["Time", "Location"],inplace=True, axis=1)
yweek.reset_index(drop=True, inplace=True)
yweek

## Prediction organized by location
### Previous 15 minutes

In [None]:
mse_15, wape_15, r2_15 = no_ml_predict(X15.values, y15.values)

In [None]:
plot_results(X15, y15, "Last 15 organized by location")

### Previous day at same hour

In [None]:
mse_day, wape_day, r2_day = no_ml_predict(Xday.values, yday.values)

In [None]:
plot_results(Xday, yday, "Last day organized by location")

### Previous week same day and same hour

In [None]:
mse_week, wape_week, r2_week = no_ml_predict(Xweek.values, yweek.values)

In [None]:
plot_results(Xweek, yweek, "Last week organized by location")

In [None]:
names_alg = ["No ML - Previous 15\n", "No ML - Previous Day\n", "No ML - Previous Week\n"]
all_metrics = [(mse_15, wape_15, r2_15), (mse_day, wape_day, r2_day), (mse_week, wape_week, r2_week)]

In [None]:
with open('../../gpu_logs/no_ml_.txt', 'w') as file:
    for i in range(0, 3):
        file.write(names_alg[i]) 
        file.write("No_ML\n")
        mse_msg = "MSE: " + str(all_metrics[i][0]) + "\n"
        wape_msg = "WAPE: " + str(all_metrics[i][1]) + "\n"
        r2_msg = "R2: " + str(all_metrics[i][2]) + "\n"
        file.write(mse_msg)
        file.write(wape_msg)
        file.write(r2_msg)