In [1]:
# Path setup
import sys
import os

sys.path.append("/home/dchen/Random_Forest_Weights/")

# Basics:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

# Visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# Helpful:
from sklearn.model_selection import train_test_split

# Pipeline and ColumnsTransformer:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# models:
import statsmodels.api as sm

# my functions:
from src_rf.methods.calc_mean import *
from src_rf.methods.calc_weights import *
from src_rf.methods.calc_dist import *
from src_rf.datasets.load_weights_energy import * 

### 0. Setup

In [2]:
def quantile_loss(y_true, y_pred, tau):
    return max(tau * (y_true - y_pred), (1 - tau) * (y_pred - y_true))

In [3]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

### 1. Load Data and train test split

In [4]:
df = pd.read_csv("/home/dchen/Random_Forest_Weights/src_rf/data/energy_data_hourly.csv"
                 , index_col = 'datetime', parse_dates=True)

In [5]:
X = df.drop('total_energy_usage', axis = 1)
y = df['total_energy_usage']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3 ,shuffle=False, random_state=42)

### 2. Load Random Forest Weights

In [7]:
rf_weights = load_weights_energy()

### 3. Calculate Quantiles:

In [8]:
rf_dist = calc_dist_rf(rf_weights, y_train)

In [10]:
quantile_preds = np.zeros((len(y_test), 5))
for count, q in enumerate(quantiles):
    quantile_preds[:,count] = np.array(calc_quantile_rf(rf_dist,0.5, y_train))