# Dealing with Non-Stationarity (Training & Validation)

### Loading Libraries

In [None]:
%cd ../..

In [None]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd
from pandas.api.types import is_list_like

# Data Visualization
import seaborn as sns
import plotly.io as pio
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

# Warnings
import joblib
import warnings
import humanize

# IO & Requests
import time
import random
import requests
from io import StringIO

# StatsModels
import statsmodels.api as sm
from statsmodels.tsa.seasonal import MSTL , DecomposeResult

# OS
import os
import sys
import pickleshare
import missingno as msno
from itertools import cycle
from typing import List, Tuple

# PyArrow
import pyarrow as pa

# FuncTools
from functools import partial

# Path & Notebook Optimizer
from pathlib import Path
import missingno as msno
from tqdm.auto import tqdm

# Scikit-Learn
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso

# IPython
from IPython.display import display, HTML

# NIXTLA
from statsforecast.core import StatsForecast
from utilsforecast.plotting import plot_series
from utilsforecast.evaluation import evaluate

# Forecast
# from datasetsforecast.losses import *
from utilsforecast.evaluation import evaluate

# SRC
from src.utils.general import LogTime
from src.utils.data_utils import _get_32_bit_dtype 
from src.transforms.target_transformations import AutoStationaryTransformer

In [None]:
warnings.filterwarnings("ignore", category=UserWarning)

warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
os.makedirs("imgs/chapter_07", exist_ok=True)

preprocessed = Path.home() / "Desktop" / "data" / "london_smart_meters" / "preprocessed"

In [None]:
tqdm.pandas()

np.random.seed(0)

pio.templates.default = "plotly_white"

sys.path.append('/Users/joaquinromero/Desktop/MTSF') 

In [None]:
from src.window_ops.rolling import (
    seasonal_rolling_max,
    seasonal_rolling_mean,
    seasonal_rolling_min,
    seasonal_rolling_std,
)

### Reading The `Preprocessed and Feature Engineered` Files (Train + Validation)

In [None]:
# Reading The Missing Value Imputed and Train Test Split Data
try:
    train_df = pd.read_parquet(preprocessed/"selected_blocks_train_missing_imputed_feature_engg.parquet")
    val_df = pd.read_parquet(preprocessed/"selected_blocks_val_missing_imputed_feature_engg.parquet")
    train_df["type"] = "train"
    val_df["type"] = "val"
    train_df = pd.concat([train_df,val_df])
    del val_df
    display(train_df.head())
except FileNotFoundError:
    display(HTML("""
    <div class="alert alert-block alert-warning">
    <b>Warning!</b> File not found. Please make sure you have run 01-Feature Engineering.ipynb in Chapter06
    </div>
    """))

In [None]:
#Takes a while. Snack Break!
transformer_pipelines = {}
for _id in tqdm(train_df["LCLid"].unique()):
    #Initialize the AutoStationaryTransformer with a seasonality period of 48*7
    auto_stationary = AutoStationaryTransformer(seasonal_period=48*7)
    #Creating the timeseries with datetime index
    y = train_df.loc[train_df["LCLid"]==_id, ["energy_consumption","timestamp"]].set_index("timestamp")
    #Fitting and trainsforming the train
    y_stat = auto_stationary.fit_transform(y, freq="30min")
    # Setting the transformerd series back to the dataframe
    train_df.loc[train_df["LCLid"]==_id, "energy_consumption"] = y_stat.values
    #Saving the pipeline
    transformer_pipelines[_id] = auto_stationary

#### Saving `The Transformed File` as well as `The Transformer Pipelines`

In [None]:
train_df = train_df.loc[:,["LCLid","timestamp","energy_consumption"]].set_index(["LCLid","timestamp"])

train_df.rename(columns={"energy_consumption":"energy_consumption_auto_stat"}, inplace=True)

In [None]:
train_df.head()

In [None]:
train_df.to_parquet(preprocessed/"selected_blocks_train_val_auto_stat_target.parquet")

joblib.dump(transformer_pipelines, preprocessed/"auto_transformer_pipelines_train_val.pkl")