In [1]:
# Import Libraries
import glob
import itertools
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [2]:
from sklearn import datasets, preprocessing
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from neupy import algorithms

In [3]:
def rmsle(expected, predicted):
    log_expected = np.log1p(expected + 1)
    log_predicted = np.log1p(predicted + 1)
    squared_log_error = np.square(log_expected - log_predicted)
    return np.sqrt(np.mean(squared_log_error))


def scorer(network, X, y):
    result = network.predict(X)
    return rmsle(result, y)

In [4]:
files = glob.glob('..\\..\\data\\processed\\framework\\d2\\*.csv')

In [5]:
feature_set = ['Temperature', 'Dew Point', 'Clothing', 'SR', 'Occupancy']
targets = ['SPT', 'adjustedSPT', 'SPT_Random']

scaler = preprocessing.MinMaxScaler(feature_range = (0, 1))
df_metric = pd.DataFrame(columns=['mean_err', 'med_err', 'mae', 'mse', 'rmse', 'std_err', 'se'])
for f in files:
    df_org = pd.read_csv(f, index_col=[0], parse_dates=True, infer_datetime_format="%m/%d/%Y %H:%M:%S")
    df = df_org['2018'].copy()
    
    for target in targets:
        x_train, x_test, y_train, y_test = train_test_split(
            scaler.fit_transform(df[feature_set].astype(int).values),
            scaler.fit_transform(df[target].astype(int).values.reshape(-1, 1)),
            test_size=1100/df.shape[0]
        )
        
        random_search = RandomizedSearchCV(
            algorithms.GRNN(std=0.1, verbose=False),
            param_distributions={'std': np.arange(1e-2, 1, 1e-3)},
            n_iter=100,
            cv=3,
            scoring=scorer,
        )
        
        random_search.fit(x_test, y_test)
        
        best_std = random_search.best_params_['std']
        
        nw = algorithms.GRNN(std=best_std, verbose=False)
        nw.train(x_train, y_train)
        y_predicted = nw.predict(x_test)
        
        df_out = pd.DataFrame(data={\
                    "Predicted":scaler.inverse_transform(y_predicted.reshape(-1, 1)).reshape(1, -1)[0], \
                    "Actual": scaler.inverse_transform(y_test.reshape(-1, 1)).reshape(1, -1)[0] \
                })
        
        fname = f.split('\\')[-1].split('.')[0]
        df_out.to_csv('..\\..\\output\\results\\d2\\' + fname + '_' + target + '.csv')
        
        indx = fname.split('apt_')[1] + '_' + target
        
        df_metric.loc[indx, 'mean_err'] = (df_out['Predicted'] - df_out['Actual']).mean()
        df_metric.loc[indx, 'med_err'] = (df_out['Predicted'] - df_out['Actual']).median()
        df_metric.loc[indx, 'mae'] = (df_out['Predicted'] - df_out['Actual']).abs().mean()
        df_metric.loc[indx, 'mse'] = ((df_out['Predicted'] - df_out['Actual']) ** 2).mean()
        df_metric.loc[indx, 'rmse'] = np.sqrt(((df_out['Predicted'] - df_out['Actual']) ** 2).mean())
        
        df_metric.loc[indx, 'std_err'] = (df_out['Predicted'] - df_out['Actual']).std()
        df_metric.loc[indx, 'se'] = (df_out['Predicted'] - df_out['Actual']).std() / np.sqrt(df_out.shape[0])

  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return (dot(self.y_train.T, ratios) / ratios.sum(axis=0)).T
  return

In [6]:
df_metric.to_csv('..\\..\\output\\results\\d2\\metric.csv')