In [1]:
import pandas as pd 
import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
import numpy as np
from scipy import stats
from sklearn import preprocessing
import seaborn as sns

In [2]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [3]:
df_2019 = pd.read_csv("data/2019_WEP.csv",usecols=["WEP ($/MWh)"])
time_index = pd.date_range(start='1/1/2019 00:00:00', end='31/12/2019 23:30:00', freq="30min")
df_2019.index = time_index
std = df_2019['WEP ($/MWh)'].std()
mean = df_2019['WEP ($/MWh)'].mean()
df_2019['WEP ($/MWh)'].loc[df_2019['WEP ($/MWh)'] > mean + 3*std] = mean + 3*std

In [4]:
series_to_supervised(df_2019,336,48).head()

Unnamed: 0,var1(t-336),var1(t-335),var1(t-334),var1(t-333),var1(t-332),var1(t-331),var1(t-330),var1(t-329),var1(t-328),var1(t-327),...,var1(t+38),var1(t+39),var1(t+40),var1(t+41),var1(t+42),var1(t+43),var1(t+44),var1(t+45),var1(t+46),var1(t+47)
2019-01-08 00:00:00,83.33,83.83,83.19,83.13,83.2,78.81,78.3,73.55,73.65,73.41,...,129.1,126.75,115.42,131.12,114.81,123.36,107.11,115.01,103.04,93.63
2019-01-08 00:30:00,83.83,83.19,83.13,83.2,78.81,78.3,73.55,73.65,73.41,73.46,...,126.75,115.42,131.12,114.81,123.36,107.11,115.01,103.04,93.63,105.15
2019-01-08 01:00:00,83.19,83.13,83.2,78.81,78.3,73.55,73.65,73.41,73.46,73.65,...,115.42,131.12,114.81,123.36,107.11,115.01,103.04,93.63,105.15,100.07
2019-01-08 01:30:00,83.13,83.2,78.81,78.3,73.55,73.65,73.41,73.46,73.65,73.38,...,131.12,114.81,123.36,107.11,115.01,103.04,93.63,105.15,100.07,97.01
2019-01-08 02:00:00,83.2,78.81,78.3,73.55,73.65,73.41,73.46,73.65,73.38,73.29,...,114.81,123.36,107.11,115.01,103.04,93.63,105.15,100.07,97.01,97.54
