In [6]:
import torch
from torch.utils.data import Dataset
import numpy as np
from torch.utils.data import DataLoader
import torch.nn as nn
import pandas as pd
from tqdm import tqdm
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

  from .autonotebook import tqdm as notebook_tqdm


device(type='cuda')

In [7]:
import sys
import sktime
import tqdm as tq
import matplotlib
import seaborn as sns
import sklearn as skl
import pandas as pd
import numpy as np
import sys
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.utils.plotting import plot_series

print("-------------------------- Python & library version --------------------------")
print("Python version: {}".format(sys.version))
print("pandas version: {}".format(pd.__version__))
print("numpy version: {}".format(np.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("tqdm version: {}".format(tq.__version__))
print("sktime version: {}".format(sktime.__version__))
print("seaborn version: {}".format(sns.__version__))
print("scikit-learn version: {}".format(skl.__version__))
print("------------------------------------------------------------------------------")

-------------------------- Python & library version --------------------------
Python version: 3.9.17 (main, Jul  5 2023, 20:47:11) [MSC v.1916 64 bit (AMD64)]
pandas version: 2.0.3
numpy version: 1.25.2
matplotlib version: 3.7.2
tqdm version: 4.65.0
sktime version: 0.21.0
seaborn version: 0.12.2
scikit-learn version: 1.3.0
------------------------------------------------------------------------------


In [8]:
current_path = os.getcwd()
parent_path = os.path.abspath(os.path.join(current_path, '..','..'))
sys.path.append(parent_path)
pd.set_option('display.max_columns', 30)
train = pd.read_csv('../../../train.csv')
train.drop(['num_date_time'],axis=1,inplace=True)
test = pd.read_csv('../../../test.csv')
test.drop(['num_date_time'],axis=1,inplace=True)
building = pd.read_csv('../../../building_info.csv')
test = pd.read_csv('../../../merge_test_encoding.csv', encoding = "CP949")
train_loc = pd.read_csv('../../../train_location.csv')
train = pd.concat([train,train_loc['location']],axis=1)
def SMAPE(true, pred):
    v = 2 * abs(pred - true) / (abs(pred) + abs(true))
    output = np.mean(v) * 100
    return output

In [9]:
from preprocessing import fillnan

train, test = fillnan(train, test)

In [10]:
# Class Definitions

class moving_avg(torch.nn.Module):
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = torch.nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)

        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

class series_decomp(torch.nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return moving_mean, residual

class LTSF_DLinear(torch.nn.Module):
    def __init__(self, window_size, forecast_size, kernel_size, individual, feature_size):
        super(LTSF_DLinear, self).__init__()
        self.window_size = window_size
        self.forecast_size = forecast_size
        self.decomposition = series_decomp(kernel_size)
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear_Seasonal = torch.nn.ModuleList()
            self.Linear_Trend = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Trend.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Trend[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
                self.Linear_Seasonal.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Seasonal[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
        else:
            self.Linear_Trend = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Trend.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
            self.Linear_Seasonal = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Seasonal.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))

    def forward(self, x):
        trend_init, seasonal_init = self.decomposition(x)
        trend_init, seasonal_init = trend_init.permute(0, 2, 1), seasonal_init.permute(0, 2, 1)
        if self.individual:
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.forecast_size], dtype=trend_init.dtype).to(trend_init.device)
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.forecast_size], dtype=seasonal_init.dtype).to(seasonal_init.device)
            for idx in range(self.channels):
                trend_output[:, idx, :] = self.Linear_Trend[idx](trend_init[:, idx, :])
                seasonal_output[:, idx, :] = self.Linear_Seasonal[idx](seasonal_init[:, idx, :])
        else:
            trend_output = self.Linear_Trend(trend_init)
            seasonal_output = self.Linear_Seasonal(seasonal_init)
        x = seasonal_output + trend_output
        ################
        #x = torch.relu(x) # 음수 값 제거
        ################
        return x.permute(0, 2, 1)

class Data(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

In [11]:
train_df = train[['건물번호', '일시', '전력소비량(kWh)']]
train_df

Unnamed: 0,건물번호,일시,전력소비량(kWh)
0,1,20220601 00,1085.28
1,1,20220601 01,1047.36
2,1,20220601 02,974.88
3,1,20220601 03,953.76
4,1,20220601 04,986.40
...,...,...,...
203995,100,20220824 19,881.04
203996,100,20220824 20,798.96
203997,100,20220824 21,825.12
203998,100,20220824 22,640.08


In [76]:
# Function to reshape the data into a time series format for each ID
'''
def reshape_data(df):
    time_series_data = []
    for idx, row in df.iterrows():
        sales_data = row[6:].values.astype(float)
        time_series_data.append(sales_data)
    return np.array(time_series_data)
'''
'''
def reshape_data(df):
    time_series_data = []
    # Grouping by building number and iterating through each group
    for building_num, group_data in df.groupby('건물번호'):
        # Extracting the power consumption column and converting it to a numpy array
        power_consumption = group_data['전력소비량(kWh)'].values.astype(float)
        time_series_data.append(power_consumption)
    return np.array(time_series_data)
'''
def reshape_data(df):
    time_series_data = []
    building_numbers = []  # Add this line to collect building numbers
    # Grouping by building number and iterating through each group
    for building_num, group_data in df.groupby('건물번호'):
        # Extracting the power consumption column and converting it to a numpy array
        power_consumption = group_data['전력소비량(kWh)'].values.astype(float)
        time_series_data.append(power_consumption)
        building_numbers.append(building_num)  # Add this line to store building numbers
    return np.array(time_series_data), building_numbers  # Return building numbers as well

# Modified time_slide_df function to work with the current data format
def time_slide_df(data, window_size, forecast_size):
    data_list = []
    dap_list = []
    for idx in range(0, len(data) - window_size - forecast_size + 1):
        x = data[idx:idx + window_size].reshape(window_size, 1)
        y = data[idx + window_size:idx + window_size + forecast_size]
        data_list.append(x)
        dap_list.append(y)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32')

# Function to create DataLoader for each ID
def create_dataloader(data, window_size, forecast_size, batch_size):
    X, Y = time_slide_df(data, window_size, forecast_size)
    ds = Data(X, Y)
    return DataLoader(ds, batch_size=batch_size, shuffle=True)

# Reshape the data
time_series_data = reshape_data(train_df)
time_series_data

(array([[1085.28, 1047.36,  974.88, ..., 1806.24, 1387.2 , 1925.28],
        [1170.36, 1146.96, 1115.28, ..., 1564.92,  948.96, 1348.92],
        [ 926.28,  884.52,  882.36, ..., 2135.16, 1158.84, 1025.28],
        ...,
        [ 533.52,  516.78,  509.4 , ...,  971.82,  815.94,  632.34],
        [ 642.96,  603.36,  581.22, ..., 1062.  ,  865.62,  752.58],
        [ 372.  ,  333.36,  320.64, ...,  825.12,  640.08,  540.24]]),
 [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97

In [35]:
def SMAPE(true, pred):
    v = 2 * torch.abs(pred - true) / (torch.abs(pred) + torch.abs(true))
    output = torch.mean(v) * 100
    return output


In [79]:
def reshape_data(df):
    time_series_data = []
    building_numbers = []  # Add this line to collect building numbers
    # Grouping by building number and iterating through each group
    for building_num, group_data in df.groupby('건물번호'):
        # Extracting the power consumption column and converting it to a numpy array
        power_consumption = group_data['전력소비량(kWh)'].values.astype(float)
        time_series_data.append(power_consumption)
        building_numbers.append(building_num)  # Add this line to store building numbers
    return np.array(time_series_data), building_numbers  # Return building numbers as well
time_series_data, building_numbers = reshape_data(train_df)  # Get building numbers here


In [94]:
# Define the window size, forecast size, and batch size
window_size = 168   # Considering the last 30 days for prediction
forecast_size = 168 # Predicting the next 21 days
batch_size = 2040
epoch_count = 1000
lr = 0.001
min_delta = 0.001
patience = 10

future_predictions_by_id = {}
loss_history = {}

for idx, (id_val, data) in tqdm(enumerate(zip(building_numbers, time_series_data)), total=100):
    # Standardizing the data
    mean_ = np.mean(data)
    std_ = np.std(data)
    standardized_data = (data - mean_) / std_
    individual_loss_history = []
    # Create DataLoader
    train_dl = create_dataloader(standardized_data, window_size, forecast_size, batch_size)
    best_loss = float('inf')
    no_improvement_count = 0

    # Training the model
    DLinear_model = LTSF_DLinear(window_size=window_size, forecast_size=forecast_size, kernel_size=3, individual=False, feature_size=1)
    DLinear_model.to(device)
    #criterion = nn.MSELoss()
    #criterion = SMAPE()
    optimizer = torch.optim.AdamW(DLinear_model.parameters(), lr=lr)
    for epoch in range(1, epoch_count + 1):
        loss_list = []
        DLinear_model.train()
        for batch_idx, (data, target) in enumerate(train_dl):
            data, target = data.to(device), target.to(device)  # Move the data to the GPU if available
            optimizer.zero_grad()
            output = DLinear_model(data)
            loss = SMAPE(output, target.unsqueeze(-1))
            loss.backward()
            optimizer.step()
            loss_list.append(loss.item())
        if((epoch % 10) == 0):
            avg_loss = np.mean(loss_list)
            #print(f"Id {idx}, Epoch {epoch}: Loss = {avg_loss}")
            individual_loss_history.append(avg_loss)
            if avg_loss + min_delta < best_loss:
                best_loss = avg_loss
                no_improvement_count = 0
            else:
                no_improvement_count += 1
                if no_improvement_count >= patience:
                    #print(f"Early stopping at epoch {epoch} for ID {id_val}")
                    break


    loss_history[id_val] = individual_loss_history


    last_window_data = torch.tensor(standardized_data[-window_size:]).unsqueeze(0).unsqueeze(-1).float().to(device)
    future_prediction = DLinear_model(last_window_data)


    future_prediction = future_prediction.squeeze().detach().cpu().numpy() * std_ + mean_
    #print(id_val)
    # Store the prediction
    future_predictions_by_id[id_val] = future_prediction


future_predictions_by_id

100%|██████████| 100/100 [22:58<00:00, 13.78s/it]


{1: array([2038.9614, 1893.7472, 1817.9104, 1801.22  , 1845.583 , 2043.8711,
        2330.1958, 2594.1704, 3095.1545, 3504.2996, 4003.3901, 4214.533 ,
        4527.198 , 4564.668 , 4450.8066, 4317.4204, 4157.64  , 3752.747 ,
        2952.5205, 2844.2844, 1852.9685, 2212.23  , 1587.4865, 2165.4377,
        2174.1833, 2113.7144, 2040.8823, 1887.157 , 1908.9774, 2136.5635,
        2223.5562, 2492.016 , 3099.3257, 3166.0835, 4012.4194, 4301.5156,
        4350.848 , 4487.2764, 4586.9595, 4340.393 , 4065.7378, 3855.4849,
        3038.6497, 2900.6912, 2241.0205, 2048.709 , 1611.9603, 1919.9934,
        1882.9607, 1776.4857, 1916.8176, 1759.9016, 1787.8872, 1790.561 ,
        1958.9045, 2276.4785, 2883.94  , 3070.5317, 3796.3535, 3915.729 ,
        4122.837 , 4339.8525, 4232.084 , 4234.4585, 3969.3032, 3884.247 ,
        3133.3127, 2803.5674, 2356.667 , 2074.6023, 1519.5842, 1692.4878,
        1877.9388, 1580.6155, 1750.4753, 1420.9103, 1504.5974, 1554.363 ,
        1728.196 , 1676.3184, 2122.

In [95]:
future_predictions_by_id

{1: array([2038.9614, 1893.7472, 1817.9104, 1801.22  , 1845.583 , 2043.8711,
        2330.1958, 2594.1704, 3095.1545, 3504.2996, 4003.3901, 4214.533 ,
        4527.198 , 4564.668 , 4450.8066, 4317.4204, 4157.64  , 3752.747 ,
        2952.5205, 2844.2844, 1852.9685, 2212.23  , 1587.4865, 2165.4377,
        2174.1833, 2113.7144, 2040.8823, 1887.157 , 1908.9774, 2136.5635,
        2223.5562, 2492.016 , 3099.3257, 3166.0835, 4012.4194, 4301.5156,
        4350.848 , 4487.2764, 4586.9595, 4340.393 , 4065.7378, 3855.4849,
        3038.6497, 2900.6912, 2241.0205, 2048.709 , 1611.9603, 1919.9934,
        1882.9607, 1776.4857, 1916.8176, 1759.9016, 1787.8872, 1790.561 ,
        1958.9045, 2276.4785, 2883.94  , 3070.5317, 3796.3535, 3915.729 ,
        4122.837 , 4339.8525, 4232.084 , 4234.4585, 3969.3032, 3884.247 ,
        3133.3127, 2803.5674, 2356.667 , 2074.6023, 1519.5842, 1692.4878,
        1877.9388, 1580.6155, 1750.4753, 1420.9103, 1504.5974, 1554.363 ,
        1728.196 , 1676.3184, 2122.

In [96]:
# 예측값을 형식에 맞게 정리합니다.

# future_predictions_by_id는 이전 코드에서 생성된 예측값이며, 이를 실제 코드 실행 후에 사용해야 합니다.
# 이 예시에서는 더미 데이터를 사용하겠습니다. 실제로는 아래 주석을 해제하고 진행해야 합니다.
# future_predictions_by_id = {1: np.zeros(504), 2: np.zeros(504), ...} # 예시 더미 데이터
sample_submission = pd.read_csv('../../../sample_submission.csv')

# 예측값을 제출 형식에 맞게 삽입합니다.
for building_num, predictions in future_predictions_by_id.items():
    # 각 빌딩 번호에 해당하는 행을 찾습니다.
    building_rows = sample_submission['num_date_time'].str.startswith(str(building_num) + '_')
    # 해당 행에 예측값을 삽입합니다.
    sample_submission.loc[building_rows, 'answer'] = predictions

sample_submission

Unnamed: 0,num_date_time,answer
0,1_20220825 00,2038.961426
1,1_20220825 01,1893.747192
2,1_20220825 02,1817.910400
3,1_20220825 03,1801.219971
4,1_20220825 04,1845.583008
...,...,...
16795,100_20220831 19,1094.417969
16796,100_20220831 20,978.481445
16797,100_20220831 21,875.020752
16798,100_20220831 22,690.448914


In [97]:
sample_submission.to_csv('submitd_ws168_W.csv',index=False)
sample_submission

Unnamed: 0,num_date_time,answer
0,1_20220825 00,2038.961426
1,1_20220825 01,1893.747192
2,1_20220825 02,1817.910400
3,1_20220825 03,1801.219971
4,1_20220825 04,1845.583008
...,...,...
16795,100_20220831 19,1094.417969
16796,100_20220831 20,978.481445
16797,100_20220831 21,875.020752
16798,100_20220831 22,690.448914


In [16]:
import json

# File path
file_path = "loss_history.txt"

# Open the file for writing
with open(file_path, 'w') as file:
    # Iterate through the loss_history dictionary
    for id_val, loss_list in loss_history.items():
        # Write the ID and corresponding loss values
        file.write(f"ID: {id_val}, Losses: {', '.join(map(str, loss_list))}\n")
        # Add 5 spaces as a separator
        file.write('     \n')

print(f"Loss history saved to {file_path}")


Loss history saved to loss_history.txt


In [9]:
# Load the loss_history dictionary from a JSON file
with open(file_path, 'r') as file:
    loaded_loss_history = json.load(file)


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

# PSFA

In [49]:
import pandas as pd
train_df = pd.read_csv('train.csv')
submit = pd.read_csv('sample_submission.csv')

# Class Definitions

class moving_avg(torch.nn.Module):
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = torch.nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

class series_decomp(torch.nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return moving_mean, residual

class LTSF_DLinear(torch.nn.Module):
    def __init__(self, window_size, forecast_size, kernel_size, individual, feature_size):
        super(LTSF_DLinear, self).__init__()
        self.window_size = window_size
        self.forecast_size = forecast_size
        self.decomposition = series_decomp(kernel_size)
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear_Seasonal = torch.nn.ModuleList()
            self.Linear_Trend = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Trend.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Trend[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
                self.Linear_Seasonal.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Seasonal[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
        else:
            self.Linear_Trend = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Trend.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
            self.Linear_Seasonal = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Seasonal.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))

    def forward(self, x):
        trend_init, seasonal_init = self.decomposition(x)
        trend_init, seasonal_init = trend_init.permute(0, 2, 1), seasonal_init.permute(0, 2, 1)
        if self.individual:
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.forecast_size], dtype=trend_init.dtype).to(trend_init.device)
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.forecast_size], dtype=seasonal_init.dtype).to(seasonal_init.device)
            for idx in range(self.channels):
                trend_output[:, idx, :] = self.Linear_Trend[idx](trend_init[:, idx, :])
                seasonal_output[:, idx, :] = self.Linear_Seasonal[idx](seasonal_init[:, idx, :])
        else:
            trend_output = self.Linear_Trend(trend_init)
            seasonal_output = self.Linear_Seasonal(seasonal_init)
        x = seasonal_output + trend_output

        ################
        #x = torch.relu(x) # 음수 값 제거
        ################
        return x.permute(0, 2, 1)

class Data(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]
# Function to reshape the data into a time series format for each ID
def reshape_data(df):
    time_series_data = []
    for idx, row in df.iterrows():
        sales_data = row[6:].values.astype(float)
        time_series_data.append(sales_data)
    return np.array(time_series_data)

# Modified time_slide_df function to work with the current data format
def time_slide_df(data, window_size, forecast_size):
    data_list = []
    dap_list = []
    for idx in range(0, len(data) - window_size - forecast_size + 1):
        x = data[idx:idx + window_size].reshape(window_size, 1)
        y = data[idx + window_size:idx + window_size + forecast_size]
        data_list.append(x)
        dap_list.append(y)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32')

# Function to create DataLoader for each ID
def create_dataloader(data, window_size, forecast_size, batch_size):
    X, Y = time_slide_df(data, window_size, forecast_size)
    ds = Data(X, Y)
    return DataLoader(ds, batch_size=batch_size, shuffle=True)

# Reshape the data
time_series_data = reshape_data(train_df)

# Define the window size, forecast size, and batch size
window_size = 105   # Considering the last 30 days for prediction
forecast_size = 21 # Predicting the next 21 days
batch_size = 128
epoch_count = 777
lr = 0.001
min_delta = 0.001
patience = 10

future_predictions_by_id = {}
loss_history = {}

# Iterate through the data by ID
for idx, (id_val, data) in tqdm(enumerate(zip(train_df["ID"], time_series_data)), total=len(train_df["ID"])):
    # Standardizing the data
    mean_ = np.mean(data)
    std_ = np.std(data)
    standardized_data = (data - mean_) / std_
    individual_loss_history = []
    # Create DataLoader
    train_dl = create_dataloader(standardized_data, window_size, forecast_size, batch_size)
    best_loss = float('inf')
    no_improvement_count = 0

    # Training the model
    DLinear_model = LTSF_DLinear(window_size=window_size, forecast_size=forecast_size, kernel_size=15, individual=False, feature_size=1)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
    for epoch in range(1, epoch_count + 1):
        loss_list = []
        DLinear_model.train()
        for batch_idx, (data, target) in enumerate(train_dl):
            optimizer.zero_grad()
            output = DLinear_model(data)
            loss = criterion(output, target.unsqueeze(-1))
            loss.backward()
            optimizer.step()
            loss_list.append(loss.item())
        if((epoch % 10) == 0):
            avg_loss = np.mean(loss_list)
            print(f"Id {idx}, Epoch {epoch}: Loss = {avg_loss}")
            individual_loss_history.append(avg_loss)
            if avg_loss + min_delta < best_loss:
                best_loss = avg_loss
                no_improvement_count = 0
            else:
                no_improvement_count += 1
                if no_improvement_count >= patience:
                    print(f"Early stopping at epoch {epoch} for ID {id_val}")
                    break


    loss_history[id_val] = individual_loss_history

    # Predicting the future 15 days using the last window of data
    last_window_data = torch.tensor(standardized_data[-window_size:]).unsqueeze(0).unsqueeze(-1).float()
    future_prediction = DLinear_model(last_window_data)

    # Converting the prediction back to the original scale
    future_prediction = future_prediction.squeeze().detach().numpy() * std_ + mean_

    # Store the prediction
    future_predictions_by_id[id_val] = future_prediction

# Future predictions for each ID from 2023-04-05 to 2023-04-25
# Filling the submission DataFrame with the predicted values for 21 days (converted to float64)
for id_val, predictions in future_predictions_by_id.items():
    rounded_predictions = np.round(predictions).astype(np.float64)
    submit.loc[submit['ID'] == id_val, '2023-04-05':'2023-04-25'] = rounded_predictions.astype(np.float64)

submit.iloc[:, 1:] = submit.iloc[:, 1:].applymap(lambda x: 0 if x < 0 else x)
submit.to_csv('submit_v1_ws105_ks15.csv',index=False)
submit

FileNotFoundError: [Errno 2] No such file or directory: 'train.csv'

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt
best_window_sizes = {}

# Iterate through the data by ID
for idx, (id_val, data) in enumerate(zip(train_df["ID"], time_series_data)):
    # Analyze the autocorrelation function
    plot_acf(data)
    plt.show()

    # Set the window size based on the analysis
    best_window_size = int(input(f"Enter the best window size for ID {id_val}: "))
    best_window_sizes[id_val] = best_window_size
