In [None]:
from methods.shap_methods import *
import pickle
import os

In [None]:
VARIABLE = "CPIAUCSL" # Select from CPIAUCSL, GS5, RPI and UNRATE
shap_file_path(VARIABLE)

In [None]:
with open(shap_file_path(VARIABLE), "rb") as input_file:
    shap_object = pickle.load(input_file)

In [None]:
shap_object.plot_importance()

In [1]:
from methods.config import *
from methods.clean_data import Data_Prep
from methods.build_nn_model import build_model
from methods.nn import get_NN_results
from methods.model_results import get_model_details
from methods.data_methods import prepare_model_data
from methods.shap_methods import NNForecastShap, shap_file_path

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
END_YEAR = 2019
VARIABLES = "UNRATE" # Select from CPIAUCSL, GS5, RPI and UNRATE
OUTPUT_STEPS = 1

In [3]:
data_prep = Data_Prep(DATA_PATH, TRANSFORM_PATH)
model_details = get_model_details(END_YEAR, VARIABLES, OUTPUT_STEPS)
look_back_steps = int(model_details['look_back_years']*12)
number_of_pca = model_details['number_of_pca']

In [5]:
data_prep.transform_to_supervised_learning(NA_CUTOFF, [VARIABLES], OUTPUT_STEPS, start = f'{START_YEAR}-01-01', end = f'{END_YEAR}-01-01')
dataset = data_prep.supervised_dataset
full_dataset = dataset['transformed_data']

In [21]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import plotly.express as px

class PCAExplain:
    def __init__(self, window, X_variables, val_test, test_steps, number_of_pca):
        self.window = window
        self.X_variables = X_variables
        self.val_test = val_test
        self.test_steps = test_steps
        self.number_of_pca = number_of_pca
        self.variable_mapping = {f"{k}(t)": v for k,v in DETAILED_VARIABLE_MAPPING.items()}
        self.pca = self._get_pca_information(window, X_variables, val_test, test_steps)
        self.pca_component_df = pd.DataFrame(self.pca.components_, columns = self.X_variables).T
        self.pca_component_df.index = self.pca_component_df.index.map(self.variable_mapping)

    
    def _get_pca_information(self, window, X_variables, val_test, test_steps):
        X_data = window[X_variables]

        pca = PCA()
        scaler = StandardScaler()
        # Fit PCA on Training Only
        fit_pca_data = X_data.iloc[: -(VAL_STEPS + TEST_STEPS)]
        scaled_fit_pca_data = scaler.fit_transform(fit_pca_data)
        pca = pca.fit(scaled_fit_pca_data)
        return pca

    def plot_explained_variance(self):
        fig = px.bar(self.pca.explained_variance_ratio_*100)
        fig.update_traces(marker = dict(color = "#0052CC"))
        fig.update_layout(title = f"PCA Explained Variance", xaxis_title = "PCA", yaxis_title = r"% Explained Variance", plot_bgcolor="white", showlegend=False)
        return fig

    def plot_pca_component(self, component_number, top_n = 10):
        x = self.pca_component_df[component_number].sort_values(ascending = False,key = abs)[:top_n]
        fig = px.bar(x)
        fig.update_traces(marker = dict(color = "#0052CC"))
        fig.update_layout(title = f"PCA({component_number}) Top Components", xaxis_title = "Components", yaxis_title = r"Value", plot_bgcolor="white", showlegend=False)
        return fig

In [22]:
test = PCAExplain(full_dataset, dataset["X_variables"], VAL_STEPS, TEST_STEPS, number_of_pca)

In [23]:
test.plot_explained_variance()

In [26]:
test.plot_pca_component(9)