In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:

# 방법 1: raw string (추천)
df = pd.read_csv("/content/drive/MyDrive/풍력발전공모전/df_new_2.csv")

In [3]:
df.head()

Unnamed: 0,date,cvprecipr,dswrf,fvmax_50m,fvmin_50m,hcc,lcc,lhnf,lsprecip,lsprecipr,...,wind_dir_sin,wind_dir_cos,air_density,wind_speed_dir_interaction,hour,month,sin_hour,cos_hour,sin_month,cos_month
0,2020-01-02 0:00,0.003905,111.88281,8.548861,8.431797,0.152649,0.0,3.463112,0.0,0.0,...,-0.87953,0.475844,1.260925,3.286746,0,1,0.0,1.0,0.5,0.866025
1,2020-01-02 1:00,0.011452,258.71042,9.016828,8.387072,0.0,0.0,7.625702,0.0,0.0,...,-0.897914,0.440171,1.254309,3.211706,1,1,0.258819,0.965926,0.5,0.866025
2,2020-01-02 2:00,0.021348,385.6125,9.082644,8.784274,0.0,0.0,14.090698,0.0,0.0,...,-0.890101,0.455764,1.24825,3.319019,2,1,0.5,0.866025,0.5,0.866025
3,2020-01-02 3:00,0.031884,468.6591,8.749963,7.899988,0.0,0.090363,21.724625,0.0,0.0,...,-0.894413,0.447241,1.241211,2.916209,3,1,0.707107,0.707107,0.5,0.866025
4,2020-01-02 4:00,0.043603,492.2545,8.347463,7.631493,0.0,0.078217,29.783371,0.0,0.0,...,-0.922975,0.384859,1.237008,2.494523,4,1,0.866025,0.5,0.5,0.866025


# 0. class 임베딩

## 01. 1️⃣ 임베딩 모듈 (Embed.py 참조)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import time


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

# ----------------------------
# 위치 임베딩
# ----------------------------
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model).float()
        pe.requires_grad = False
        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))
    def forward(self, x):
        return self.pe[:, :x.size(1), :]

# ----------------------------
# 값 임베딩
# ----------------------------
class TokenEmbedding(nn.Module):
    def __init__(self, c_in, d_model):
        super().__init__()
        self.tokenConv = nn.Conv1d(c_in, d_model, kernel_size=3, padding=1, padding_mode='circular', bias=False)
        nn.init.kaiming_normal_(self.tokenConv.weight, mode='fan_in', nonlinearity='leaky_relu')
    def forward(self, x):
        x = self.tokenConv(x.permute(0,2,1)).transpose(1,2)
        return x

# ----------------------------
# 시간 임베딩 (Temporal + TimeFeature)
# ----------------------------
class TemporalEmbedding(nn.Module):
    def __init__(self, d_model, embed_type='fixed', freq='h'):
        super().__init__()
        Embed = nn.Embedding if embed_type != 'fixed' else nn.Embedding
        self.hour_embed = Embed(24, d_model)
        self.weekday_embed = Embed(7, d_model)
        self.day_embed = Embed(32, d_model)
        self.month_embed = Embed(13, d_model)
    def forward(self, x):
        x = x.long()
        return self.hour_embed(x[:,:,3]) + self.weekday_embed(x[:,:,2]) + \
               self.day_embed(x[:,:,1]) + self.month_embed(x[:,:,0])

class TimeFeatureEmbedding(nn.Module):
    def __init__(self, d_model, embed_type='timeF', freq='h'):
        super().__init__()
        freq_map = {'h':4,'t':5,'s':6,'m':1,'a':1,'w':2,'d':3,'b':3}
        d_inp = freq_map[freq]
        self.embed = nn.Linear(d_inp, d_model, bias=False)
    def forward(self, x):
        return self.embed(x)

# ----------------------------
# 최종 데이터 임베딩
# ----------------------------
class DataEmbedding(nn.Module):
    def __init__(self, c_in, d_model, embed_type='timeF', freq='h', dropout=0.1):
        super().__init__()
        self.value_embedding = TokenEmbedding(c_in, d_model)
        self.position_embedding = PositionalEmbedding(d_model)
        self.temporal_embedding = TemporalEmbedding(d_model) if embed_type != 'timeF' else TimeFeatureEmbedding(d_model)
        self.dropout = nn.Dropout(dropout)
    def forward(self, x, x_mark):
        x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
        return self.dropout(x)


## 02. 2️⃣ Inception 블록 (Conv2D 기반)

In [6]:
class Inception_Block_V1(nn.Module):
    def __init__(self, in_channels, out_channels, num_kernels=6):
        super().__init__()
        self.kernels = nn.ModuleList([nn.Conv2d(in_channels, out_channels, kernel_size=2*i+1, padding=i)
                                      for i in range(num_kernels)])
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
    def forward(self, x):
        res_list = [conv(x) for conv in self.kernels]
        res = torch.stack(res_list, dim=-1).mean(-1)
        return res


## 03. 3️⃣ TimesBlock (FFT 기반 주기 추출 + Inception 적용)

In [7]:
def FFT_for_Period(x, k=2):
    xf = torch.fft.rfft(x, dim=1)
    frequency_list = abs(xf).mean(0).mean(-1)
    frequency_list[0] = 0
    _, top_list = torch.topk(frequency_list, k)
    top_list = top_list.detach().cpu().numpy()
    return x.shape[1] // top_list, abs(xf).mean(-1)[:, top_list]

class TimesBlock(nn.Module):
    def __init__(self, configs):
        super().__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.k = configs.top_k
        self.conv = nn.Sequential(
            Inception_Block_V1(configs.d_model, configs.d_ff, num_kernels=configs.num_kernels),
            nn.GELU(),
            Inception_Block_V1(configs.d_ff, configs.d_model, num_kernels=configs.num_kernels)
        )
    def forward(self, x):
        B, T, N = x.size()
        period_list, period_weight = FFT_for_Period(x, self.k)
        res = []
        for i in range(self.k):
            period = period_list[i]
            if (self.seq_len+self.pred_len)%period != 0:
                length = (((self.seq_len+self.pred_len)//period)+1)*period
                padding = torch.zeros([B,length-(self.seq_len+self.pred_len),N], device=x.device)
                out = torch.cat([x,padding], dim=1)
            else:
                length = self.seq_len+self.pred_len
                out = x
            out = out.reshape(B,length//period,period,N).permute(0,3,1,2).contiguous()
            out = self.conv(out)
            out = out.permute(0,2,3,1).reshape(B,-1,N)
            res.append(out[:,:(self.seq_len+self.pred_len),:])
        res = torch.stack(res, dim=-1)
        period_weight = F.softmax(period_weight, dim=1).unsqueeze(1).unsqueeze(1).repeat(1,T,N,1)
        res = torch.sum(res*period_weight, -1)
        return res + x


## 04. 4️⃣ TimesNet 전체 모델

In [8]:
class TimesNet(nn.Module):
    def __init__(self, configs):
        super().__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.layer = configs.e_layers
        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout)
        self.model = nn.ModuleList([TimesBlock(configs) for _ in range(configs.e_layers)])
        self.layer_norm = nn.LayerNorm(configs.d_model)
        self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
        self.predict_linear = nn.Linear(self.seq_len, self.pred_len+self.seq_len)

    def forward(self, x_enc, x_mark_enc, x_dec=None, x_mark_dec=None):
        means = x_enc.mean(1, keepdim=True).detach()
        x_enc = x_enc.sub(means)
        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False)+1e-5)
        x_enc = x_enc.div(stdev)

        enc_out = self.enc_embedding(x_enc, x_mark_enc)
        enc_out = self.predict_linear(enc_out.permute(0,2,1)).permute(0,2,1)

        for i in range(self.layer):
            enc_out = self.layer_norm(self.model[i](enc_out))

        dec_out = self.projection(enc_out)
        dec_out = dec_out.mul(stdev[:,0,:].unsqueeze(1).repeat(1,self.pred_len+self.seq_len,1))
        dec_out = dec_out.add(means[:,0,:].unsqueeze(1).repeat(1,self.pred_len+self.seq_len,1))
        return dec_out[:, -self.pred_len:, :]


In [11]:
class Config:
    enc_in = 46
    target = 'energy_kwh'
    features = 'S'
    freq = 'h'

    # ---- 학습용 단축 ----
    seq_len = 168
    label_len = 84
    pred_len = 24

    factor = 3
    output_attention = False
    use_norm = True
    class_strategy = 'projection'

    d_model = 128
    n_heads = 4
    e_layers = 1
    d_layers = 1
    d_ff = 512

    dropout = 0.2
    activation = 'gelu'
    dec_in = enc_in
    c_out = enc_in

    batch_size = 64
    num_workers = 0
    model = 'TimesNet'
    embed = 'timeF'
    train_epochs = 3
    learning_rate = 0.0001

    # TimesNet 특화 파라미터
    top_k = 2
    num_kernels = 6
    task_name = 'long_term_forecast'


In [13]:
df.head()

Unnamed: 0,date,cvprecipr,dswrf,fvmax_50m,fvmin_50m,hcc,lcc,lhnf,lsprecip,lsprecipr,...,wind_dir_sin,wind_dir_cos,air_density,wind_speed_dir_interaction,hour,month,sin_hour,cos_hour,sin_month,cos_month
0,2020-01-02 0:00,0.003905,111.88281,8.548861,8.431797,0.152649,0.0,3.463112,0.0,0.0,...,-0.87953,0.475844,1.260925,3.286746,0,1,0.0,1.0,0.5,0.866025
1,2020-01-02 1:00,0.011452,258.71042,9.016828,8.387072,0.0,0.0,7.625702,0.0,0.0,...,-0.897914,0.440171,1.254309,3.211706,1,1,0.258819,0.965926,0.5,0.866025
2,2020-01-02 2:00,0.021348,385.6125,9.082644,8.784274,0.0,0.0,14.090698,0.0,0.0,...,-0.890101,0.455764,1.24825,3.319019,2,1,0.5,0.866025,0.5,0.866025
3,2020-01-02 3:00,0.031884,468.6591,8.749963,7.899988,0.0,0.090363,21.724625,0.0,0.0,...,-0.894413,0.447241,1.241211,2.916209,3,1,0.707107,0.707107,0.5,0.866025
4,2020-01-02 4:00,0.043603,492.2545,8.347463,7.631493,0.0,0.078217,29.783371,0.0,0.0,...,-0.922975,0.384859,1.237008,2.494523,4,1,0.866025,0.5,0.5,0.866025


In [14]:
# 1️⃣ date 컬럼 datetime으로 변환
df['date'] = pd.to_datetime(df['date'], errors='coerce')  # 변환 실패 시 NaT로 처리

# 2️⃣ 입력 컬럼 선정 (target 제외)
df_cols = [col for col in df.columns if col != 'date']
df_cols.remove('energy_kwh')  # target 제외
data_cols_ordered = df_cols + ['energy_kwh']

# 3️⃣ 학습용 scaler
scaler = StandardScaler()
train_scaled = scaler.fit_transform(df[df['date'].dt.year <= 2021][data_cols_ordered].values).astype(np.float32)
val_scaled = scaler.transform(df[df['date'].dt.year == 2022][data_cols_ordered].values).astype(np.float32)
test_scaled = scaler.transform(df[df['date'].dt.year == 2023][data_cols_ordered].values).astype(np.float32)

# 4️⃣ 시간 임베딩 생성
def create_stamp(df):
    return pd.DataFrame({
        'month': df['date'].dt.month,
        'day': df['date'].dt.day,
        'weekday': df['date'].dt.weekday,
        'hour': df['date'].dt.hour
    }).values.astype(np.float32)

train_stamp = create_stamp(df[df['date'].dt.year <= 2021])
val_stamp = create_stamp(df[df['date'].dt.year == 2022])
test_stamp = create_stamp(df[df['date'].dt.year == 2023])


In [None]:
# ======================================================================
# TimesNet 학습용 DataLoader 및 학습 루프
# ======================================================================

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import time

# --- 1. Dataset 정의 ---
class Dataset_Custom(Dataset):
    def __init__(self, data_x, data_y, data_stamp, size=None):
        self.data_x, self.data_y, self.data_stamp = data_x, data_y, data_stamp
        if size is None:
            self.seq_len, self.label_len, self.pred_len = 384, 96, 96
        else:
            self.seq_len, self.label_len, self.pred_len = size[0], size[1], size[2]

    def __getitem__(self, index):
        s_begin, s_end = index, index + self.seq_len
        r_begin, r_end = s_end - self.label_len, s_end - self.label_len + self.label_len + self.pred_len
        return self.data_x[s_begin:s_end], self.data_y[r_begin:r_end], self.data_stamp[s_begin:s_end], self.data_stamp[r_begin:r_end]

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

def data_provider(args, flag, data_x, data_y, data_stamp):
    shuffle_flag = flag in ['train', 'val']
    drop_last = flag in ['train', 'val']
    batch_size = args.batch_size if flag in ['train', 'val'] else 1
    dataset = Dataset_Custom(data_x, data_y, data_stamp, [args.seq_len, args.label_len, args.pred_len])
    print(f"'{flag}' data loaded with {len(dataset)} samples.")
    return dataset, DataLoader(dataset, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last)

# --- 2. Config 사용 ---
args = Config()

# --- 3. DataLoader 생성 ---
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train_data, train_loader = data_provider(args, 'train', train_scaled, train_scaled, train_stamp)
val_data, val_loader = data_provider(args, 'val', val_scaled, val_scaled, val_stamp)
test_data, test_loader = data_provider(args, 'test', test_scaled, test_scaled, test_stamp)

# --- 4. 모델 생성 ---
model = TimesNet(args).to(device)
print(f"\n🎉 TimesNet 모델 로딩 완료! (사용 장치: {device})")

'train' data loaded with 15769 samples.
'val' data loaded with 8401 samples.
'test' data loaded with 8569 samples.

🎉 TimesNet 모델 로딩 완료! (사용 장치: cpu)

>>>>>>> 학습 시작 >>>>>>>


In [None]:
# --- 5. 학습 루프 ---
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
criterion = nn.SmoothL1Loss()

print("\n>>>>>>> 학습 시작 >>>>>>>")
start_time = time.time()

for epoch in range(args.train_epochs):
    model.train()
    epoch_loss = 0.0
    for batch_x, batch_y, batch_x_mark, batch_y_mark in train_loader:
        optimizer.zero_grad()
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        batch_x_mark, batch_y_mark = batch_x_mark.to(device), batch_y_mark.to(device)

        outputs = model(batch_x, batch_x_mark, batch_x, batch_x_mark)
        outputs = outputs[:, -args.pred_len:, :]

        loss = criterion(outputs, batch_y[:, -args.pred_len:, :])
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{args.train_epochs} | Loss: {avg_loss:.6f}")

total_time = time.time() - start_time
print(f"\n학습 완료! 총 소요 시간: {total_time:.2f}초")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

# ============================
# 1️⃣ NMAE 계산 함수
# ============================
def compute_nmae(y_true, y_pred, capacity):
    """Normalized Mean Absolute Error (%). capacity는 전체 데이터 최대값."""
    y_true_flat = y_true.flatten()
    mask = y_true_flat >= 0.1 * capacity
    if mask.sum() < 10:  # 충분한 데이터가 없으면 전체 사용
        mask = slice(None)
    nmae_score = np.mean(np.abs(y_true_flat[mask] - y_pred.flatten()[mask])) / capacity * 100
    return nmae_score

# 전체 Capacity 계산 (최대값)
capacity = df[Config.target].max()
print(f"Capacity ({Config.target} 최대값): {capacity:.4f}")

# ============================
# 2️⃣ Validation 전체 예측
# ============================
model.eval()
preds, trues = [], []

with torch.no_grad():
    for batch_x, batch_y, batch_x_mark, batch_y_mark in val_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        batch_x_mark, batch_y_mark = batch_x_mark.to(device), batch_y_mark.to(device)

        # TimesNet forward
        outputs = model(batch_x, batch_x_mark)
        outputs = outputs[:, -Config.pred_len:, :]
        batch_y = batch_y[:, -Config.pred_len:, :]

        preds.append(outputs.cpu())
        trues.append(batch_y.cpu())

# Concatenate 전체
preds = torch.cat(preds, dim=0).numpy()
trues = torch.cat(trues, dim=0).numpy()

# 역스케일링 (StandardScaler 사용 시)
inverse_preds_all = scaler.inverse_transform(preds.reshape(-1, Config.enc_in))
inverse_trues_all = scaler.inverse_transform(trues.reshape(-1, Config.enc_in))
target_idx = df.columns.get_loc(Config.target)

inverse_preds_target = inverse_preds_all[:, target_idx]
inverse_trues_target = inverse_trues_all[:, target_idx]

# NMAE 계산
nmae = compute_nmae(inverse_trues_target, inverse_preds_target, capacity)
print(f"\nValidation NMAE (%): {nmae:.4f}")

# ============================
# 3️⃣ 첫 번째 샘플 비교
# ============================
first_sample_pred = preds[0]
first_sample_true = trues[0]

# 역변환
inverse_first_pred = scaler.inverse_transform(first_sample_pred)
inverse_first_true = scaler.inverse_transform(first_sample_true)

first_pred_target = inverse_first_pred[:, target_idx]
first_true_target = inverse_first_true[:, target_idx]

# 예측 시작 날짜
start_idx = Config.seq_len
comparison_dates = df_val.iloc[start_idx:start_idx + Config.pred_len]['date'].reset_index(drop=True)

# 결과 데이터프레임
results_df = pd.DataFrame({
    'date': comparison_dates,
    'Actual': first_true_target,
    'Predicted': first_pred_target
})
results_df['Difference'] = results_df['Actual'] - results_df['Predicted']

print("\n--- 첫 샘플 예측 결과 상세 비교 ---")
print(results_df.head(30))

# ============================
# 4️⃣ 시각화
# ============================
plt.figure(figsize=(15,6))
plt.plot(results_df['date'], results_df['Actual'], label='Actual', marker='o')
plt.plot(results_df['date'], results_df['Predicted'], label='Predicted', marker='x', linestyle='--')
plt.title('TimesNet Prediction vs Actual (First Validation Sample)')
plt.xlabel('Date')
plt.ylabel('Energy (kWh)')
plt.legend()
plt.grid(True)
plt.show()
