# Libraries

In [2]:
# Standard
import pandas as pd

# Machine Learning
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

%store -r Kelmarsh_df Penmanshiel_df

# Training Data Set

In [28]:
class TimeSeriesDataProcessor:
    def __init__(self, dataframe, forecast, look_back, batch_size=64, train_size=0.7, test_size=0.5, random_state=42):
        self.dataframe = dataframe
        self.forecast = forecast
        self.look_back = look_back
        self.batch_size = batch_size
        self.train_size = train_size
        self.test_size = test_size
        self.random_state = random_state

    def shifted_data(self):
        data = self.dataframe
        forecast = self.forecast
        look_back = self.look_back
        shifts = range(forecast, look_back + forecast)
        variables = data.columns
        
        print(f"data columns: \n {data.columns}")
        print(f"data shape: {data.shape}")

        shifted_columns = []
        for column in variables:
            for i in shifts:
                shifted_df = data[[column]].shift(i)
                shifted_df.rename(columns={column: f"{column} (lag {i})"}, inplace=True)
                shifted_columns.append(shifted_df)
        
        data_shifted = pd.concat([data] + shifted_columns, axis=1)
        data_shifted.dropna(inplace=True)
        
        return data_shifted

    def prepare_datasets(self):
        try:
            s_df = self.shifted_data().drop(['Wind speed (m/s)'], axis=1)
        except KeyError:
            s_df = self.shifted_data().copy()

        print(f"shifted data frame: {s_df.shape}")
        print(f"shifted df columns: \n {s_df.columns}")

        # Splitting dataset
        df_train, df_rem = train_test_split(s_df, train_size=self.train_size, random_state=self.random_state)
        df_eval, df_test = train_test_split(df_rem, test_size=self.test_size, random_state=self.random_state)

        # Wrapping datasets
        self.train_dataset = TimeSeriesDataset(df_train)
        self.test_dataset = TimeSeriesDataset(df_test)
        self.eval_dataset = TimeSeriesDataset(df_eval)

    def create_dataloaders(self):
        self.prepare_datasets()

        self.train_loader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
        self.test_loader = DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
        self.eval_loader = DataLoader(self.eval_dataset, batch_size=self.batch_size, shuffle=False)

        return self.train_loader, self.test_loader, self.eval_loader

class TimeSeriesDataset(Dataset):
    def __init__(self, dataframe):
        self.labels = dataframe.iloc[:, 0].values
        self.features = dataframe.iloc[:, 1:].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        features = torch.tensor(self.features[idx], dtype=torch.float)
        labels = torch.tensor(self.labels[idx], dtype=torch.float)
        return features, labels

In [25]:
data = Kelmarsh_df['1'][-4096:]
data = data.set_index('# Date and time')
data.index.names = [None]
data = data.drop(['Long Term Wind (m/s)'], axis=1)

print(f" data columns {data.columns}")
data.head()

 data columns Index(['Wind speed (m/s)', 'Energy Export (kWh)'], dtype='object')


Unnamed: 0,Wind speed (m/s),Energy Export (kWh)
2021-06-02 13:20:00,5.447385,51.0
2021-06-02 13:30:00,5.426735,73.0
2021-06-02 13:40:00,5.1147,41.0
2021-06-02 13:50:00,5.327649,54.0
2021-06-02 14:00:00,6.526953,110.0


In [29]:

processor = TimeSeriesDataProcessor(dataframe=data, forecast=1, look_back=72, batch_size=64)
train_loader, test_loader, eval_loader = processor.create_dataloaders()

data columns: 
 Index(['Wind speed (m/s)', 'Energy Export (kWh)'], dtype='object')
data shape: (4096, 2)
shifted data frame: (3878, 145)
shifted df columns: 
 Index(['Energy Export (kWh)', 'Wind speed (m/s) (lag 1)',
       'Wind speed (m/s) (lag 2)', 'Wind speed (m/s) (lag 3)',
       'Wind speed (m/s) (lag 4)', 'Wind speed (m/s) (lag 5)',
       'Wind speed (m/s) (lag 6)', 'Wind speed (m/s) (lag 7)',
       'Wind speed (m/s) (lag 8)', 'Wind speed (m/s) (lag 9)',
       ...
       'Energy Export (kWh) (lag 63)', 'Energy Export (kWh) (lag 64)',
       'Energy Export (kWh) (lag 65)', 'Energy Export (kWh) (lag 66)',
       'Energy Export (kWh) (lag 67)', 'Energy Export (kWh) (lag 68)',
       'Energy Export (kWh) (lag 69)', 'Energy Export (kWh) (lag 70)',
       'Energy Export (kWh) (lag 71)', 'Energy Export (kWh) (lag 72)'],
      dtype='object', length=145)
