In [1]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch import nn
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
# Dataset from https://www.kaggle.com/datasets/saurabhshahane/electricity-load-forecasting
df = pd.read_csv('kaggle_electricity_load_forecasting_train.csv')
df = df.set_index('datetime')

In [3]:
data_output = np.array(df['nat_demand']).reshape(-1, 1)
data_input = np.array(df[['T2M_toc', 'QV2M_toc','TQL_toc','W2M_toc']])

In [4]:
data_output = torch.Tensor(data_output).cuda()
data_input = torch.Tensor(data_input).cuda()

X_train, X_test, y_train, y_test = train_test_split(data_input, data_output, test_size=0.2)

In [5]:
class AttentionNet(nn.Module):
    def __init__(self, inp_size, out_size, linear_size=36):
        super(AttentionNet, self).__init__()

        self.inp_size = inp_size
        self.linear_size = linear_size
        self.out_size = out_size
        
        self.inp_lin = nn.Linear(self.inp_size, self.linear_size)
        self.lin1 = nn.Linear(self.linear_size, self.linear_size)
        self.lin2 = nn.Linear(self.linear_size, self.linear_size)
        self.out_lin = nn.Linear(self.linear_size, self.out_size)
    def forward(self, x):
        x = self.inp_lin(x)
        x1 = F.relu(self.lin1(x))
        x2 = F.relu(self.lin2(x))
        temp = torch.matmul(x1, torch.matmul(x2.transpose(-2, -1), x2))
        x = self.out_lin(temp)
        return x


class DLFTNet(nn.Module):
    def __init__(self, inp_size, out_size, linear_size=36):
        super(DLFTNet, self).__init__()

        self.inp_size = inp_size
        self.linear_size = linear_size
        self.out_size = out_size
        
        self.inp_lin = nn.Linear(self.inp_size, self.linear_size)
        self.lin1 = nn.Linear(self.linear_size, self.linear_size)
        self.out_lin = nn.Linear(self.linear_size ** 2, self.out_size)
        
    def forward(self, x):
        x = self.inp_lin(x)
        x1 = torch.fft.fft(x).real.view(-1, x.shape[1], 1)
        x2 = F.relu(self.lin1(x).view(-1, 1, x.shape[1]))
        temp = torch.matmul(x1, x2).reshape(-1, x.shape[1] ** 2)
        x = self.out_lin(temp)
        return x

class MLPNet(nn.Module):
    def __init__(self, inp_size, out_size, freq_domain=None, linear_size=36):
        super(MLPNet, self).__init__()

        self.inp_size = inp_size
        self.linear_size = linear_size
        self.out_size = out_size
        self.freq_domain = freq_domain
        
        self.inp_lin = nn.Linear(self.inp_size, self.linear_size)
        self.lin1 = nn.Linear(self.linear_size, self.linear_size)
        self.out_lin = nn.Linear(self.linear_size, self.out_size)
        
    def forward(self, x):
        x = self.inp_lin(x)
        x1 = F.relu(self.lin1(x))
        x = self.out_lin(x1)
        return x

In [6]:
net = MLPNet(4, 1).cuda()
opti = torch.optim.Adam(net.parameters(), lr=0.0001)
loss = torch.nn.L1Loss().cuda()

for i in range(0, 10000) :
    opti.zero_grad()
    out = net(X_train)
    l = loss(out, y_train)
    l.backward()
    opti.step()
    if i % 100 == 0 :
        print(i, l.item())

out = net(X_test).detach().cpu().numpy()
cpu_y_test = y_test.detach().cpu().numpy()
print(root_mean_squared_error(cpu_y_test, out))

  from .autonotebook import tqdm as notebook_tqdm


0 1182.615478515625
100 1177.202880859375
200 1169.762451171875
300 1160.4730224609375
400 1148.1759033203125
500 1131.9595947265625
600 1111.125244140625
700 1085.0704345703125
800 1053.261474609375
900 1015.2235107421875
1000 970.5525512695312
1100 918.9031982421875
1200 859.9591064453125
1300 793.4448852539062
1400 719.1085815429688
1500 636.7168579101562
1600 546.1304321289062
1700 448.59893798828125
1800 351.4959411621094
1900 271.2974853515625
2000 221.19187927246094
2100 196.92221069335938
2200 186.4965362548828
2300 181.93414306640625
2400 179.2973175048828
2500 177.26112365722656
2600 175.37591552734375
2700 173.523193359375
2800 171.66604614257812
2900 169.80076599121094
3000 167.9314422607422
3100 166.061279296875
3200 164.20379638671875
3300 162.35951232910156
3400 160.54598999023438
3500 158.76885986328125
3600 157.00802612304688
3700 155.28018188476562
3800 153.59251403808594
3900 151.96241760253906
4000 150.39871215820312
4100 148.89822387695312
4200 147.45233154296875
4

In [7]:
net = AttentionNet(4, 1).cuda()
opti = torch.optim.Adam(net.parameters(), lr=0.0001)
loss = torch.nn.L1Loss().cuda()

for i in range(0, 10000) :
    opti.zero_grad()
    out = net(X_train)
    l = loss(out, y_train)
    l.backward()
    opti.step()
    if i % 100 == 0 :
        print(i, l.item())

out = net(X_test).detach().cpu().numpy()
cpu_y_test = y_test.detach().cpu().numpy()
print(root_mean_squared_error(cpu_y_test, out))

0 7282236.5
100 37487.6015625
200 48025.59375
300 47266.23828125
400 35357.7265625
500 39922.109375
600 39596.59765625
700 25588.37890625
800 30273.6640625
900 29120.55078125
1000 20498.322265625
1100 76905.4921875
1200 24109.064453125
1300 10171.361328125
1400 10208.13671875
1500 1711.6390380859375
1600 3421.7548828125
1700 6860.49365234375
1800 14253.9873046875
1900 4317.369140625
2000 3267.496826171875
2100 8079.59130859375
2200 9878.986328125
2300 1971.7557373046875
2400 6650.9404296875
2500 4765.849609375
2600 939.4674682617188
2700 3707.892578125
2800 5218.16357421875
2900 3862.19482421875
3000 2993.172119140625
3100 5562.0322265625
3200 1964.3350830078125
3300 3016.974853515625
3400 4873.9609375
3500 1670.475341796875
3600 931.0582275390625
3700 3241.307373046875
3800 2295.7392578125
3900 1980.336181640625
4000 1736.53466796875
4100 1684.1612548828125
4200 1713.9290771484375
4300 1749.7264404296875
4400 1745.22412109375
4500 1716.3428955078125
4600 1672.03564453125
4700 1592.165

In [8]:
net = DLFTNet(4, 1).cuda()
opti = torch.optim.Adam(net.parameters(), lr=0.0001)
loss = torch.nn.L1Loss().cuda()

for i in range(0, 10000) :
    opti.zero_grad()
    out = net(X_train)
    l = loss(out, y_train)
    l.backward()
    opti.step()
    if i % 100 == 0 :
        print(i, l.item())

out = net(X_test).detach().cpu().numpy()
cpu_y_test = y_test.detach().cpu().numpy()
print(root_mean_squared_error(cpu_y_test, out))

0 1247.81884765625
100 196.32151794433594
200 115.03426361083984
300 114.99043273925781
400 114.9762954711914
500 114.96565246582031
600 114.9582748413086
700 114.95230865478516
800 114.94759368896484
900 114.94313049316406
1000 114.93885803222656
1100 114.93463134765625
1200 114.92959594726562
1300 114.92379760742188
1400 114.91921997070312
1500 114.91461944580078
1600 114.91012573242188
1700 114.90570831298828
1800 114.90118408203125
1900 114.8965835571289
2000 114.89196014404297
2100 114.88724517822266
2200 114.8824462890625
2300 114.8775405883789
2400 114.87251281738281
2500 114.86746978759766
2600 114.86224365234375
2700 114.85684204101562
2800 114.8512954711914
2900 114.84574127197266
3000 114.84011840820312
3100 114.83438873291016
3200 114.82870483398438
3300 114.8231201171875
3400 114.8171615600586
3500 114.81134033203125
3600 114.80547332763672
3700 114.79936218261719
3800 114.79340362548828
3900 114.78734588623047
4000 114.78141021728516
4100 114.77549743652344
4200 114.76959