In [1]:
import os
import platform
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch

In [2]:
os.getcwd()

'/Users/youngseoklee/Desktop/workplace/dacon_energy'

In [3]:
#파일경로 설정
if 'macOS' in platform.platform():
    path = '/Users/youngseoklee/Desktop/workplace/datas/dacon_energy/'
    print(path)
elif 'Linux' in platform.platform():
    path = '/workplace/datas/dacon_energy/'
    print(path)
else:
    print('어느 os에도 속해있지 않습니다')

/Users/youngseoklee/Desktop/workplace/datas/dacon_energy/


In [4]:
#디바이스 설정
if 'macOS' in platform.platform():
    device = 'mps' if torch.backends.mps.is_available() else 'cpu'
    print(device)
elif 'Linux' in platform.platform():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(device)
else:
    print('어느 os에도 속해있지 않습니다')

mps


In [5]:
train_df = pd.read_csv(path + 'train_df.csv', index_col=0)
test_df = pd.read_csv(path + 'test_df.csv', index_col=0)
sub = pd.read_csv(path + 'sample_submission.csv', index_col=0)

In [6]:
train_df.shape , test_df.shape

((204000, 29), (16800, 28))

In [7]:
train_df.keys()

Index(['건물번호', '기온(C)', '강수량(mm)', '풍속(m/s)', '습도(%)', '전력소비량(kWh)', 'Year',
       'Month', 'Day', 'Hour', '연면적(m2)', '냉방면적(m2)', '태양광용량(kW)',
       'ESS저장용량(kWh)', 'PCS용량(kW)', '건물유형_건물기타', '건물유형_공공', '건물유형_대학교',
       '건물유형_데이터센터', '건물유형_백화점및아울렛', '건물유형_병원', '건물유형_상용', '건물유형_아파트',
       '건물유형_연구소', '건물유형_지식산업센터', '건물유형_할인마트', '건물유형_호텔및리조트', '요일', '주말'],
      dtype='object')

# Preprocessing

In [13]:
import torch
from torch import nn
from torch.nn import Transformer
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np

In [14]:
SEQUENCE_LENGTH = 24
TRAIN_SIZE = 0.8
BATCH_SIZE = 64

In [15]:


# Select all columns and convert to numpy array
X = train_df.drop('전력소비량(kWh)', axis=1).values
y = train_df['전력소비량(kWh)'].values



# Generate sequences
X = [X[i:i+SEQUENCE_LENGTH, :] for i in range(X.shape[0]-SEQUENCE_LENGTH)]
y = [y[i+SEQUENCE_LENGTH] for i in range(y.shape[0]-SEQUENCE_LENGTH)]
X, y = np.array(X), np.array(y).reshape(-1, 1)


In [16]:
X.shape, y.shape

# batch, seq_len, features /// 

((203976, 24, 26), (203976, 1))

In [17]:
# 0~203976

#data[0:24, : -1]
#data[1:25, : -1]
#.....
#data[203975 : 2~ , :-1]


In [18]:
# Split into training and validation datasets
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=TRAIN_SIZE, shuffle=False)

In [19]:

# Convert to PyTorch tensors and create dataloaders
train_data = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
val_data = TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float())
test_data = TensorDataset(torch.from_numpy(test_df.values).float())

train_loader = DataLoader(train_data, shuffle=False, batch_size=BATCH_SIZE)
val_loader = DataLoader(val_data, shuffle=False, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_data, shuffle=False, batch_size=BATCH_SIZE)



In [20]:
#(B, Seq_len, C) (B, C)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((163180, 24, 26), (40796, 24, 26), (163180, 1), (40796, 1))

# Define Model

In [21]:
# 시계열 데이터를 분해하는 부분
class moving_avg(nn.Module):
    """
    Moving average block to highlight the trend of time series
    """
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x


class series_decomp(nn.Module):
    """
    Series decomposition block
    """
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean

In [22]:
# 1-layer linear network 구현 부분
class Model(nn.Module):
    """
    DLinear
    """
    def __init__(self, configs):
        super(Model, self).__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len

        # Decompsition Kernel Size
        kernel_size = 25
        self.decompsition = series_decomp(kernel_size)
        self.individual = configs.individual
        self.channels = configs.enc_in

        if self.individual:
            self.Linear_Seasonal = nn.ModuleList()
            self.Linear_Trend = nn.ModuleList()
            self.Linear_Decoder = nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Seasonal.append(nn.Linear(self.seq_len,self.pred_len))
                self.Linear_Seasonal[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
                self.Linear_Trend.append(nn.Linear(self.seq_len,self.pred_len))
                self.Linear_Trend[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
                self.Linear_Decoder.append(nn.Linear(self.seq_len,self.pred_len))
        else:
            self.Linear_Seasonal = nn.Linear(self.seq_len,self.pred_len)
            self.Linear_Trend = nn.Linear(self.seq_len,self.pred_len)
            self.Linear_Decoder = nn.Linear(self.seq_len,self.pred_len)
            self.Linear_Seasonal.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
            self.Linear_Trend.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))

    def forward(self, x):
        # x: [Batch, Input length, Channel]
        seasonal_init, trend_init = self.decompsition(x)
        seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1)
        if self.individual:
            seasonal_output = torch.zeros([seasonal_init.size(0),seasonal_init.size(1),self.pred_len],dtype=seasonal_init.dtype).to(seasonal_init.device)
            trend_output = torch.zeros([trend_init.size(0),trend_init.size(1),self.pred_len],dtype=trend_init.dtype).to(trend_init.device)
            for i in range(self.channels):
                seasonal_output[:,i,:] = self.Linear_Seasonal[i](seasonal_init[:,i,:])
                trend_output[:,i,:] = self.Linear_Trend[i](trend_init[:,i,:])
        else:
            seasonal_output = self.Linear_Seasonal(seasonal_init)
            trend_output = self.Linear_Trend(trend_init)

        x = seasonal_output + trend_output
        return x.permute(0,2,1) # to [Batch, Output length, Channel]