In [5]:
# general
import pandas as pd
import numpy as np

# stats
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf, month_plot, quarter_plot

# plotting
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.figsize'] = [20, 5]

# settings
color_pal = sns.color_palette()

def join_date_and_time(df):
    df = df.copy()
    df['Date'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%d/%m/%Y %H:%M:%S')
    df = df.drop(['Time'], axis=1)
    return df

def clean(df):
    df = df.copy()
    for col in df.columns:
        #df = df[~df[col].str.contains('?', regex=False)]
        df[col] = df[col].str.replace('?', 'nan', regex=False).astype(float)
        df = df.fillna(method='ffill')
    return df

def create_time_series_features(df):
    df['hour'] = df.index.hour
    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    return df

FILE_PATH = 'data/household_power_consumption.txt'

df = pd.read_csv(FILE_PATH, delimiter=';', dtype=str)
df = join_date_and_time(df)
df = df.set_index('Date')
df = clean(df)
#df['Sub_metering_4'] = (df['Global_active_power'] * 1000 / 60) - (df['Sub_metering_1'] + df['Sub_metering_2'] + df['Sub_metering_3'])
df.head()

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


In [6]:
from sklearn.preprocessing import RobustScaler
import torch

transformer = RobustScaler().fit(df)
processed_features = transformer.transform(df)
processed_features = pd.DataFrame(processed_features, columns=df.columns)
df = processed_features.set_index(df.index)

hourly_resampled_df = create_time_series_features(df.resample('H').mean())
daily_resampled_df = create_time_series_features(df.resample('D').mean())

column = 'Global_active_power'

hourly_shifted_df = hourly_resampled_df.copy()
daily_shifted_df = daily_resampled_df.copy()

hourly_shifted_df[column] = hourly_shifted_df[column].shift(-1)
daily_shifted_df[column] = daily_shifted_df[column].shift(-1)

hourly_shifted_df.loc[hourly_shifted_df.index[-1], 'Global_active_power'] = 0.0
daily_shifted_df.loc[daily_shifted_df.index[-1], 'Global_active_power'] = 0.0

In [8]:
FEATURES = ["Global_reactive_power","Voltage","Global_intensity","Sub_metering_1","Sub_metering_2","Sub_metering_3","hour","dayofweek","quarter","month","year","dayofyear"]

TARGET = ["Global_active_power"]

train_df = hourly_shifted_df[:'2009-12-31'].fillna(0.0)
test_df = hourly_resampled_df['2010-01-01':].fillna(0.0)

X_train = train_df[FEATURES]
y_train = train_df[TARGET] 

X_test = test_df[FEATURES]
y_test = test_df[TARGET] 

X_train = torch.tensor(X_train.values.astype(np.float32))
y_train = torch.tensor(y_train.values.astype(np.float32))

X_test = torch.tensor(X_test.values.astype(np.float32))
y_test = torch.tensor(y_test.values.astype(np.float32))

In [30]:
import torch
from torch import nn

class EnergyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=12, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50,1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x
    
torch.manual_seed(42)
import torch.optim as optim
import torch.utils.data as data

model = EnergyModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
#loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=1000)

# RNN is not yet implemented on Mac silicon processor
#device = "mps" if torch.backends.mps.is_available() else "cpu"
#model.to(device)

epoch_count = []
train_loss_values = []
test_loss_values = []

epochs = 400

for epoch in range(epochs):
    model.train()
    #for X_batch, y_batch in loader:
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 10 != 0:
        continue
    
    model.eval()
    with torch.inference_mode():
        y_pred = model(X_test)
        test_loss = loss_fn(y_pred, y_test)
        
        epoch_count.append(epoch)
        train_loss_values.append(loss.detach().numpy())
        test_loss_values.append(test_loss.detach().numpy())

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error

y_pred = model(X_test)

mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy(), squared=False)
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = mean_absolute_percentage_error(y_test.detach().numpy(), y_pred.detach().numpy())

print("hidden_size: 50, epochs: 400, batch: -")
print(f"RMSE: {np.sqrt(mse)}, MAE: {mae}, MAPE: {mape} ")

hidden_size: 50, epochs: 400, batch: -
RMSE: 0.7195882201194763, MAE: 0.42574596405029297, MAPE: 4.156902313232422 


In [27]:
import torch
from torch import nn

class EnergyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=12, hidden_size=50, num_layers=1, batch_first=True)
        self.deep_lstm = nn.LSTM(input_size=50, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50,1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x, _ = self.deep_lstm(x)
        x = self.linear(x)
        return x
    
torch.manual_seed(42)
import torch.optim as optim
import torch.utils.data as data

model = EnergyModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
#loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=1000)

# RNN is not yet implemented on Mac silicon processor
#device = "mps" if torch.backends.mps.is_available() else "cpu"
#model.to(device)

epoch_count = []
train_loss_values = []
test_loss_values = []

epochs = 400

for epoch in range(epochs):
    model.train()
    #for X_batch, y_batch in loader:
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 10 != 0:
        continue
    
    model.eval()
    with torch.inference_mode():
        y_pred = model(X_test)
        test_loss = loss_fn(y_pred, y_test)
        
        epoch_count.append(epoch)
        train_loss_values.append(loss.detach().numpy())
        test_loss_values.append(test_loss.detach().numpy())

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error

y_pred = model(X_test)

mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy(), squared=False)
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = mean_absolute_percentage_error(y_test.detach().numpy(), y_pred.detach().numpy())

print("hidden_size: 50x50, epochs: 400, batch: -")
print(f"RMSE: {np.sqrt(mse)}, MAE: {mae}, MAPE: {mape} ")

hidden_size: 50x50, epochs: 400, batch: -
RMSE: 0.6189409494400024, MAE: 0.28782814741134644, MAPE: 2.642587184906006 


In [28]:
import torch
from torch import nn

class EnergyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=12, hidden_size=50, num_layers=1, batch_first=True)
        self.deep_lstm = nn.LSTM(input_size=50, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50,1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x, _ = self.deep_lstm(x)
        x, _ = self.deep_lstm(x)
        x, _ = self.deep_lstm(x)
        x = self.linear(x)
        return x
    
torch.manual_seed(42)
import torch.optim as optim
import torch.utils.data as data

model = EnergyModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
#loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=1000)

# RNN is not yet implemented on Mac silicon processor
#device = "mps" if torch.backends.mps.is_available() else "cpu"
#model.to(device)

epoch_count = []
train_loss_values = []
test_loss_values = []

epochs = 400

for epoch in range(epochs):
    model.train()
    #for X_batch, y_batch in loader:
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 10 != 0:
        continue
    
    model.eval()
    with torch.inference_mode():
        y_pred = model(X_test)
        test_loss = loss_fn(y_pred, y_test)
        
        epoch_count.append(epoch)
        train_loss_values.append(loss.detach().numpy())
        test_loss_values.append(test_loss.detach().numpy())

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error

y_pred = model(X_test)

mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy(), squared=False)
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = mean_absolute_percentage_error(y_test.detach().numpy(), y_pred.detach().numpy())

print("hidden_size: 50x50x50x50, epochs: 400, batch: -")
print(f"RMSE: {np.sqrt(mse)}, MAE: {mae}, MAPE: {mape} ")

hidden_size: 50x50x50x50, epochs: 400, batch: -
RMSE: 0.7640368938446045, MAE: 0.4481506645679474, MAPE: 3.9662024974823 


In [29]:
import torch
from torch import nn

class EnergyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=12, hidden_size=50, num_layers=1, batch_first=True)
        self.deep_lstm = nn.LSTM(input_size=50, hidden_size=50, num_layers=1, batch_first=True)
        self.deep_linear = nn.Linear(50,50)
        self.linear = nn.Linear(50,1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x, _ = self.deep_lstm(x)
        x = self.deep_linear(x)
        x, _ = self.deep_lstm(x)
        x, _ = self.deep_lstm(x)
        x = self.linear(x)
        return x
     
torch.manual_seed(42)
import torch.optim as optim
import torch.utils.data as data

model = EnergyModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
#loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=1000)

# RNN is not yet implemented on Mac silicon processor
#device = "mps" if torch.backends.mps.is_available() else "cpu"
#model.to(device)

epoch_count = []
train_loss_values = []
test_loss_values = []

epochs = 400

for epoch in range(epochs):
    model.train()
    #for X_batch, y_batch in loader:
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 10 != 0:
        continue
    
    model.eval()
    with torch.inference_mode():
        y_pred = model(X_test)
        test_loss = loss_fn(y_pred, y_test)
        
        epoch_count.append(epoch)
        train_loss_values.append(loss.detach().numpy())
        test_loss_values.append(test_loss.detach().numpy())

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error

y_pred = model(X_test)

mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy(), squared=False)
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = mean_absolute_percentage_error(y_test.detach().numpy(), y_pred.detach().numpy())

print("hidden_size: 50x50x1x50x50, epochs: 400, batch: -")
print(f"RMSE: {np.sqrt(mse)}, MAE: {mae}, MAPE: {mape} ")

hidden_size: 50x50x1x50x50, epochs: 400, batch: -
RMSE: 0.7542456388473511, MAE: 0.47223737835884094, MAPE: 6.151860237121582 


In [31]:
import torch
from torch import nn

class EnergyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=12, hidden_size=50, num_layers=1, batch_first=True)
        self.deep_lstm = nn.LSTM(input_size=50, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50,1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x, _ = self.deep_lstm(x)
        x = self.linear(x)
        return x
    
torch.manual_seed(42)
import torch.optim as optim
import torch.utils.data as data

model = EnergyModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=1000)

# RNN is not yet implemented on Mac silicon processor
#device = "mps" if torch.backends.mps.is_available() else "cpu"
#model.to(device)

epoch_count = []
train_loss_values = []
test_loss_values = []

epochs = 50

for epoch in range(epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_train)
        loss = loss_fn(y_pred, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if epoch % 10 != 0:
        continue
    
    model.eval()
    with torch.inference_mode():
        y_pred = model(X_test)
        test_loss = loss_fn(y_pred, y_test)
        
        epoch_count.append(epoch)
        train_loss_values.append(loss.detach().numpy())
        test_loss_values.append(test_loss.detach().numpy())

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error

y_pred = model(X_test)

mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy(), squared=False)
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = mean_absolute_percentage_error(y_test.detach().numpy(), y_pred.detach().numpy())

print("hidden_size: 50x50, epochs: 50, batch: 1000")
print(f"RMSE: {np.sqrt(mse)}, MAE: {mae}, MAPE: {mape} ")

hidden_size: 50x50, epochs: 50, batch: 1000
RMSE: 0.5498433113098145, MAE: 0.21881520748138428, MAPE: 2.246748924255371 


In [32]:
import torch
from torch import nn

class EnergyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=12, hidden_size=50, num_layers=1, batch_first=True)
        self.deep_linear = nn.Linear(50,25)
        self.deep_lstm = nn.LSTM(input_size=25, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50,1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.deep_linear(x)
        x, _ = self.deep_lstm(x)
        x = self.linear(x)
        return x
    
torch.manual_seed(42)
import torch.optim as optim
import torch.utils.data as data

model = EnergyModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=1000)

# RNN is not yet implemented on Mac silicon processor
#device = "mps" if torch.backends.mps.is_available() else "cpu"
#model.to(device)

epoch_count = []
train_loss_values = []
test_loss_values = []

epochs = 50

for epoch in range(epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_train)
        loss = loss_fn(y_pred, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if epoch % 10 != 0:
        continue
    
    model.eval()
    with torch.inference_mode():
        y_pred = model(X_test)
        test_loss = loss_fn(y_pred, y_test)
        
        epoch_count.append(epoch)
        train_loss_values.append(loss.detach().numpy())
        test_loss_values.append(test_loss.detach().numpy())

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error

y_pred = model(X_test)

mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy(), squared=False)
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = mean_absolute_percentage_error(y_test.detach().numpy(), y_pred.detach().numpy())

print("hidden_size: 50x50, epochs: 50, batch: 1000")
print(f"RMSE: {np.sqrt(mse)}, MAE: {mae}, MAPE: {mape} ")

hidden_size: 50x50, epochs: 50, batch: 1000
RMSE: 0.5535768270492554, MAE: 0.22418361902236938, MAPE: 2.096618175506592 


In [None]:
import torch
from torch import nn

class EnergyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.GRU(input_size=12, hidden_size=50, num_layers=1, batch_first=True)
        self.deep_lstm = nn.GRU(input_size=50, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50,1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x, _ = self.deep_lstm(x)
        x = self.linear(x)
        return x
    
torch.manual_seed(42)
import torch.optim as optim
import torch.utils.data as data

model = EnergyModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=1000)

# RNN is not yet implemented on Mac silicon processor
#device = "mps" if torch.backends.mps.is_available() else "cpu"
#model.to(device)

epoch_count = []
train_loss_values = []
test_loss_values = []

epochs = 50

for epoch in range(epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_train)
        loss = loss_fn(y_pred, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if epoch % 10 != 0:
        continue
    
    model.eval()
    with torch.inference_mode():
        y_pred = model(X_test)
        test_loss = loss_fn(y_pred, y_test)
        
        epoch_count.append(epoch)
        train_loss_values.append(loss.detach().numpy())
        test_loss_values.append(test_loss.detach().numpy())

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error

y_pred = model(X_test)

mse = mean_squared_error(y_test.detach().numpy(), y_pred.detach().numpy(), squared=False)
mae = mean_absolute_error(y_test.detach().numpy(), y_pred.detach().numpy())
mape = mean_absolute_percentage_error(y_test.detach().numpy(), y_pred.detach().numpy())

print("hidden_size: 50x50, epochs: 50, batch: 1000")
print(f"RMSE: {np.sqrt(mse)}, MAE: {mae}, MAPE: {mape} ")