# Extracting trend
The idea is to make the signals stationary by extracting the trend from the data

Important remark is that the trend is visible on the aggregated level but not on the 15 minute data

In [None]:
import altair as alt
alt.data_transformers.disable_max_rows()
import numpy as np 
import pandas as pd 
from pathlib import Path
from dtaidistance.preprocessing import differencing
from statsmodels.tsa.seasonal import seasonal_decompose, STL

In [None]:
PRE_PATH = Path('/cw/dtaiproj/ml/2020-FLAIR-VITO/profile-clustering/preprocessed/combined')
info_path = PRE_PATH/'reindexed_info.csv'
data_path = PRE_PATH/'reindexed_DST_data.csv'
info_df = pd.read_csv(info_path, index_col = [0,1], nrows = 100)
data_df = pd.read_csv(data_path, index_col = [0,1], nrows = 100)
data_df.columns = pd.to_datetime(data_df.columns)
data_df.columns.name = 'timestamp'


In [None]:
# TEST_PROFILE_IDX = 10 # this one shows a nice trend on aggregate level
TEST_PROFILE_IDX = 11 

profile_df = data_df.dropna(axis =1, how = 'all').iloc[TEST_PROFILE_IDX].to_frame('value')

In [None]:
def simple_chart(df, title): 
    return alt.Chart(df.reset_index(), width = 1800, title= title).mark_line().encode(
        x = 'timestamp', 
        y = 'value'
    )

In [None]:
daily_profile = profile_df.resample('1D').sum()
weekly_profile = profile_df.resample('1W').sum()
monthly_profile = profile_df.resample('1M').sum()


# Simply resample the data at different frequencies

In [None]:
alt.vconcat(simple_chart(profile_df, '15 min'), simple_chart(daily_profile, '1 day'), simple_chart(weekly_profile, '1 week'), simple_chart(monthly_profile, '1 month'))

# Seasonal decompose of daily

In [None]:
result = STL(daily_profile.value, period = 31, seasonal = 31, robust = True).fit()
# result = seasonal_decompose(daily_profile.value.to_numpy(), period = 30, extrapolate_trend = True)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
result.plot();

In [None]:
daily_profile['trend'] = result.trend
daily_profile['resid'] = daily_profile.value - daily_profile.trend

In [None]:
daily_profile

In [None]:
alt.Chart(pd.melt(daily_profile.reset_index(), id_vars = 'timestamp'), width = 800).mark_line().encode(
    x = 'timestamp', 
    y = 'value'
).facet(row = 'variable:O').resolve_scale(y = 'independent').resolve_axis(x = 'independent')