In [1]:
import torch
from torch.utils.data import Dataset
import numpy as np
from torch.utils.data import DataLoader
import torch.nn as nn
import pandas as pd
from tqdm import tqdm
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

  from .autonotebook import tqdm as notebook_tqdm


device(type='cuda')

In [2]:
import pandas as pd
train_df = pd.read_csv('train.csv')
submit = pd.read_csv('sample_submission.csv')


In [3]:
# Class Definitions

class moving_avg(torch.nn.Module):
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = torch.nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)

        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

class series_decomp(torch.nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return moving_mean, residual

class LTSF_DLinear(torch.nn.Module):
    def __init__(self, window_size, forecast_size, kernel_size, individual, feature_size):
        super(LTSF_DLinear, self).__init__()
        self.window_size = window_size
        self.forecast_size = forecast_size
        self.decomposition = series_decomp(kernel_size)
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear_Seasonal = torch.nn.ModuleList()
            self.Linear_Trend = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Trend.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Trend[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
                self.Linear_Seasonal.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Seasonal[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
        else:
            self.Linear_Trend = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Trend.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
            self.Linear_Seasonal = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Seasonal.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))

    def forward(self, x):
        trend_init, seasonal_init = self.decomposition(x)
        trend_init, seasonal_init = trend_init.permute(0, 2, 1), seasonal_init.permute(0, 2, 1)
        if self.individual:
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.forecast_size], dtype=trend_init.dtype).to(trend_init.device)
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.forecast_size], dtype=seasonal_init.dtype).to(seasonal_init.device)
            for idx in range(self.channels):
                trend_output[:, idx, :] = self.Linear_Trend[idx](trend_init[:, idx, :])
                seasonal_output[:, idx, :] = self.Linear_Seasonal[idx](seasonal_init[:, idx, :])
        else:
            trend_output = self.Linear_Trend(trend_init)
            seasonal_output = self.Linear_Seasonal(seasonal_init)
        x = seasonal_output + trend_output
        ################
        #x = torch.relu(x) # 음수 값 제거
        ################
        return x.permute(0, 2, 1)

class Data(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

In [3]:
train_df

Unnamed: 0,ID,제품,대분류,중분류,소분류,브랜드,2022-01-01,2022-01-02,2022-01-03,2022-01-04,...,2023-03-26,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-01,2023-04-02,2023-04-03,2023-04-04
0,0,B002-00001-00001,B002-C001-0002,B002-C002-0007,B002-C003-0038,B002-00001,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,B002-00002-00001,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-00002,0,0,0,0,...,0,0,0,1,3,2,0,0,2,0
2,2,B002-00002-00002,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-00002,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,B002-00002-00003,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-00002,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,B002-00003-00001,B002-C001-0001,B002-C002-0001,B002-C003-0003,B002-00003,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15885,15885,B002-03799-00002,B002-C001-0003,B002-C002-0008,B002-C003-0042,B002-03799,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15886,15886,B002-03799-00003,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03799,0,0,0,0,...,0,0,0,3,0,2,4,1,1,3
15887,15887,B002-03799-00004,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03799,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15888,15888,B002-03799-00005,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03799,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2


In [4]:
# Function to reshape the data into a time series format for each ID
def reshape_data(df):
    time_series_data = []
    for idx, row in df.iterrows():
        sales_data = row[6:].values.astype(float)
        time_series_data.append(sales_data)
    return np.array(time_series_data)

# Modified time_slide_df function to work with the current data format
def time_slide_df(data, window_size, forecast_size):
    data_list = []
    dap_list = []
    for idx in range(0, len(data) - window_size - forecast_size + 1):
        x = data[idx:idx + window_size].reshape(window_size, 1)
        y = data[idx + window_size:idx + window_size + forecast_size]
        data_list.append(x)
        dap_list.append(y)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32')

# Function to create DataLoader for each ID
def create_dataloader(data, window_size, forecast_size, batch_size):
    X, Y = time_slide_df(data, window_size, forecast_size)
    ds = Data(X, Y)
    return DataLoader(ds, batch_size=batch_size, shuffle=True)

# Reshape the data
time_series_data = reshape_data(train_df)

In [12]:
# Function to reshape the data into a time series format for each ID
def reshape_data(df):
    time_series_data = []
    for idx, row in df.iterrows():
        sales_data = row[6:].values.astype(float)
        time_series_data.append(sales_data)
    return np.array(time_series_data)

# Modified time_slide_df function to work with the current data format
def time_slide_df(data, window_size, forecast_size):
    data_list = []
    dap_list = []
    for idx in range(0, len(data) - window_size - forecast_size + 1):
        x = data[idx:idx + window_size].reshape(window_size, 1)
        y = data[idx + window_size:idx + window_size + forecast_size]
        data_list.append(x)
        dap_list.append(y)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32')

# Function to create DataLoader for each ID
def create_dataloader(data, window_size, forecast_size, batch_size):
    X, Y = time_slide_df(data, window_size, forecast_size)
    ds = Data(X, Y)
    return DataLoader(ds, batch_size=batch_size, shuffle=True)

# Reshape the data
time_series_data = reshape_data(train_df)

# Define the window size, forecast size, and batch size
window_size = 7   # Considering the last 30 days for prediction
forecast_size = 21 # Predicting the next 21 days
batch_size = 128
epoch_count = 777
lr = 0.001
min_delta = 0.001
patience = 10

future_predictions_by_id = {}
loss_history = {}

# Iterate through the data by ID
for idx, (id_val, data) in tqdm(enumerate(zip(train_df["ID"], time_series_data)), total=len(train_df["ID"])):
    # Standardizing the data
    mean_ = np.mean(data)
    std_ = np.std(data)
    standardized_data = (data - mean_) / std_
    individual_loss_history = []
    # Create DataLoader
    train_dl = create_dataloader(standardized_data, window_size, forecast_size, batch_size)
    best_loss = float('inf')
    no_improvement_count = 0

    # Training the model
    DLinear_model = LTSF_DLinear(window_size=window_size, forecast_size=forecast_size, kernel_size=3, individual=False, feature_size=1)
    DLinear_model.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
    for epoch in range(1, epoch_count + 1):
        loss_list = []
        DLinear_model.train()
        for batch_idx, (data, target) in enumerate(train_dl):
            data, target = data.to(device), target.to(device)  # Move the data to the GPU if available
            optimizer.zero_grad()
            output = DLinear_model(data)
            loss = criterion(output, target.unsqueeze(-1))
            loss.backward()
            optimizer.step()
            loss_list.append(loss.item())
        if((epoch % 10) == 0):
            avg_loss = np.mean(loss_list)
            #print(f"Id {idx}, Epoch {epoch}: Loss = {avg_loss}")
            individual_loss_history.append(avg_loss)
            if avg_loss + min_delta < best_loss:
                best_loss = avg_loss
                no_improvement_count = 0
            else:
                no_improvement_count += 1
                if no_improvement_count >= patience:
                    #print(f"Early stopping at epoch {epoch} for ID {id_val}")
                    break


    loss_history[id_val] = individual_loss_history

    # Predicting the future 15 days using the last window of data
    #last_window_data = torch.tensor(standardized_data[-window_size:]).unsqueeze(0).unsqueeze(-1).float()
    last_window_data = torch.tensor(standardized_data[-window_size:]).unsqueeze(0).unsqueeze(-1).float().to(device)
    future_prediction = DLinear_model(last_window_data)

    # Converting the prediction back to the original scale
    #future_prediction = future_prediction.squeeze().detach().numpy() * std_ + mean_
    future_prediction = future_prediction.squeeze().detach().cpu().numpy() * std_ + mean_

    # Store the prediction
    future_predictions_by_id[id_val] = future_prediction

# Future predictions for each ID from 2023-04-05 to 2023-04-25
future_predictions_by_id

  standardized_data = (data - mean_) / std_
  standardized_data = (data - mean_) / std_
100%|██████████| 15890/15890 [12:13:32<00:00,  2.77s/it]  


{0: array([0.19279969, 0.24472684, 0.31764698, 0.3258279 , 0.35596067,
        0.39280728, 0.39859745, 0.42677778, 0.4282786 , 0.4200973 ,
        0.44979104, 0.4467415 , 0.43059322, 0.446095  , 0.4417674 ,
        0.46744087, 0.47369474, 0.48543096, 0.47861254, 0.47727484,
        0.47012877], dtype=float32),
 1: array([1.0203805 , 0.8247552 , 0.7808642 , 0.67040443, 0.89065564,
        0.6197645 , 0.8468385 , 0.84435236, 0.7773961 , 1.0209701 ,
        1.1761489 , 0.9669947 , 0.83653426, 1.1181977 , 1.0924066 ,
        1.2029642 , 1.2532195 , 1.3382348 , 1.132129  , 1.1232442 ,
        1.0284002 ], dtype=float32),
 2: array([0.4212786, 0.8682232, 1.1935382, 1.4428651, 1.4981425, 1.5081166,
        1.4941697, 1.4942257, 1.5112038, 1.5343068, 1.5469182, 1.5512872,
        1.5372529, 1.5234026, 1.504009 , 1.4709414, 1.4460843, 1.4293529,
        1.4233384, 1.410097 , 1.4069474], dtype=float32),
 3: array([1.1712463, 1.9775838, 2.5015974, 2.898819 , 2.974639 , 3.0334003,
        3.081381

In [19]:
future_predictions_by_id

{0: array([0.21332073, 0.12340939, 0.20749056, 0.2702606 , 0.24365175,
        0.28887573, 0.20540333, 0.3280228 , 0.35516036, 0.30956155,
        0.41412935, 0.36331022, 0.35734424, 0.35320085, 0.37792277,
        0.4274581 , 0.40100732, 0.4611228 , 0.4885825 , 0.5402522 ,
        0.5643487 ], dtype=float32),
 1: array([1.039295  , 0.87748766, 0.88476104, 0.7042916 , 0.7000917 ,
        0.52954316, 0.63303256, 0.4686969 , 0.41959673, 0.60071135,
        0.70014846, 0.5271128 , 0.48363966, 0.70143735, 0.77308875,
        0.87690175, 0.9548194 , 1.0957456 , 0.9660872 , 0.9085319 ,
        0.9497354 ], dtype=float32),
 2: array([0.22093344, 0.3963951 , 0.5567422 , 0.7317163 , 0.79051447,
        0.8294132 , 0.79702616, 0.7869375 , 0.76207864, 0.78718865,
        0.87680125, 0.9199629 , 0.99168885, 1.0344228 , 1.0642807 ,
        1.0785923 , 1.1180254 , 1.135438  , 1.130678  , 1.1287845 ,
        1.140805  ], dtype=float32),
 3: array([0.5189915, 0.8967273, 1.3034382, 1.5406865, 1.6212593

In [13]:
# Filling the submission DataFrame with the predicted values for 21 days (converted to float64)
for id_val, predictions in future_predictions_by_id.items():
    rounded_predictions = np.round(predictions).astype(np.float64)
    submit.loc[submit['ID'] == id_val, '2023-04-05':'2023-04-25'] = rounded_predictions.astype(np.float64)

# Displaying the first few rows of the filled submission file
submit.head(20)

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2,0.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,...,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0
3,3,1.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
4,4,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
5,5,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,7,1.0,2.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,...,5.0,5.0,5.0,6.0,6.0,6.0,7.0,7.0,7.0,8.0
8,8,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,9,1.0,1.0,1.0,2.0,2.0,2.0,2.0,3.0,3.0,...,3.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0


In [14]:
submit.iloc[:, 1:] = submit.iloc[:, 1:].applymap(lambda x: 0 if x < 0 else x)
submit

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2,0.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,...,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0
3,3,1.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
4,4,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15885,15885,4.0,8.0,11.0,12.0,12.0,12.0,12.0,12.0,13.0,...,13.0,13.0,12.0,12.0,12.0,12.0,12.0,12.0,11.0,11.0
15886,15886,3.0,4.0,4.0,4.0,5.0,5.0,4.0,4.0,4.0,...,4.0,4.0,4.0,4.0,4.0,5.0,5.0,4.0,5.0,5.0
15887,15887,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
15888,15888,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0


In [15]:
submit = submit.fillna(0)

In [16]:
submit.to_csv('submit_v1_ws7_ks3_fillna0.csv',index=False)
submit

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2,0.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,...,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0
3,3,1.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
4,4,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15885,15885,4.0,8.0,11.0,12.0,12.0,12.0,12.0,12.0,13.0,...,13.0,13.0,12.0,12.0,12.0,12.0,12.0,12.0,11.0,11.0
15886,15886,3.0,4.0,4.0,4.0,5.0,5.0,4.0,4.0,4.0,...,4.0,4.0,4.0,4.0,4.0,5.0,5.0,4.0,5.0,5.0
15887,15887,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
15888,15888,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0


In [16]:
import json

# File path
file_path = "loss_history.txt"

# Open the file for writing
with open(file_path, 'w') as file:
    # Iterate through the loss_history dictionary
    for id_val, loss_list in loss_history.items():
        # Write the ID and corresponding loss values
        file.write(f"ID: {id_val}, Losses: {', '.join(map(str, loss_list))}\n")
        # Add 5 spaces as a separator
        file.write('     \n')

print(f"Loss history saved to {file_path}")


Loss history saved to loss_history.txt


In [9]:
# Load the loss_history dictionary from a JSON file
with open(file_path, 'r') as file:
    loaded_loss_history = json.load(file)


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

# PSFA

In [None]:
import pandas as pd
train_df = pd.read_csv('train.csv')
submit = pd.read_csv('sample_submission.csv')

# Class Definitions

class moving_avg(torch.nn.Module):
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = torch.nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

class series_decomp(torch.nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return moving_mean, residual

class LTSF_DLinear(torch.nn.Module):
    def __init__(self, window_size, forecast_size, kernel_size, individual, feature_size):
        super(LTSF_DLinear, self).__init__()
        self.window_size = window_size
        self.forecast_size = forecast_size
        self.decomposition = series_decomp(kernel_size)
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear_Seasonal = torch.nn.ModuleList()
            self.Linear_Trend = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Trend.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Trend[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
                self.Linear_Seasonal.append(torch.nn.Linear(self.window_size, self.forecast_size))
                self.Linear_Seasonal[i].weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
        else:
            self.Linear_Trend = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Trend.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))
            self.Linear_Seasonal = torch.nn.Linear(self.window_size, self.forecast_size)
            self.Linear_Seasonal.weight = torch.nn.Parameter((1 / self.window_size) * torch.ones([self.forecast_size, self.window_size]))

    def forward(self, x):
        trend_init, seasonal_init = self.decomposition(x)
        trend_init, seasonal_init = trend_init.permute(0, 2, 1), seasonal_init.permute(0, 2, 1)
        if self.individual:
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.forecast_size], dtype=trend_init.dtype).to(trend_init.device)
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.forecast_size], dtype=seasonal_init.dtype).to(seasonal_init.device)
            for idx in range(self.channels):
                trend_output[:, idx, :] = self.Linear_Trend[idx](trend_init[:, idx, :])
                seasonal_output[:, idx, :] = self.Linear_Seasonal[idx](seasonal_init[:, idx, :])
        else:
            trend_output = self.Linear_Trend(trend_init)
            seasonal_output = self.Linear_Seasonal(seasonal_init)
        x = seasonal_output + trend_output

        ################
        #x = torch.relu(x) # 음수 값 제거
        ################
        return x.permute(0, 2, 1)

class Data(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]
# Function to reshape the data into a time series format for each ID
def reshape_data(df):
    time_series_data = []
    for idx, row in df.iterrows():
        sales_data = row[6:].values.astype(float)
        time_series_data.append(sales_data)
    return np.array(time_series_data)

# Modified time_slide_df function to work with the current data format
def time_slide_df(data, window_size, forecast_size):
    data_list = []
    dap_list = []
    for idx in range(0, len(data) - window_size - forecast_size + 1):
        x = data[idx:idx + window_size].reshape(window_size, 1)
        y = data[idx + window_size:idx + window_size + forecast_size]
        data_list.append(x)
        dap_list.append(y)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32')

# Function to create DataLoader for each ID
def create_dataloader(data, window_size, forecast_size, batch_size):
    X, Y = time_slide_df(data, window_size, forecast_size)
    ds = Data(X, Y)
    return DataLoader(ds, batch_size=batch_size, shuffle=True)

# Reshape the data
time_series_data = reshape_data(train_df)

# Define the window size, forecast size, and batch size
window_size = 105   # Considering the last 30 days for prediction
forecast_size = 21 # Predicting the next 21 days
batch_size = 128
epoch_count = 777
lr = 0.001
min_delta = 0.001
patience = 10

future_predictions_by_id = {}
loss_history = {}

# Iterate through the data by ID
for idx, (id_val, data) in tqdm(enumerate(zip(train_df["ID"], time_series_data)), total=len(train_df["ID"])):
    # Standardizing the data
    mean_ = np.mean(data)
    std_ = np.std(data)
    standardized_data = (data - mean_) / std_
    individual_loss_history = []
    # Create DataLoader
    train_dl = create_dataloader(standardized_data, window_size, forecast_size, batch_size)
    best_loss = float('inf')
    no_improvement_count = 0

    # Training the model
    DLinear_model = LTSF_DLinear(window_size=window_size, forecast_size=forecast_size, kernel_size=15, individual=False, feature_size=1)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
    for epoch in range(1, epoch_count + 1):
        loss_list = []
        DLinear_model.train()
        for batch_idx, (data, target) in enumerate(train_dl):
            optimizer.zero_grad()
            output = DLinear_model(data)
            loss = criterion(output, target.unsqueeze(-1))
            loss.backward()
            optimizer.step()
            loss_list.append(loss.item())
        if((epoch % 10) == 0):
            avg_loss = np.mean(loss_list)
            print(f"Id {idx}, Epoch {epoch}: Loss = {avg_loss}")
            individual_loss_history.append(avg_loss)
            if avg_loss + min_delta < best_loss:
                best_loss = avg_loss
                no_improvement_count = 0
            else:
                no_improvement_count += 1
                if no_improvement_count >= patience:
                    print(f"Early stopping at epoch {epoch} for ID {id_val}")
                    break


    loss_history[id_val] = individual_loss_history

    # Predicting the future 15 days using the last window of data
    last_window_data = torch.tensor(standardized_data[-window_size:]).unsqueeze(0).unsqueeze(-1).float()
    future_prediction = DLinear_model(last_window_data)

    # Converting the prediction back to the original scale
    future_prediction = future_prediction.squeeze().detach().numpy() * std_ + mean_

    # Store the prediction
    future_predictions_by_id[id_val] = future_prediction

# Future predictions for each ID from 2023-04-05 to 2023-04-25
# Filling the submission DataFrame with the predicted values for 21 days (converted to float64)
for id_val, predictions in future_predictions_by_id.items():
    rounded_predictions = np.round(predictions).astype(np.float64)
    submit.loc[submit['ID'] == id_val, '2023-04-05':'2023-04-25'] = rounded_predictions.astype(np.float64)

submit.iloc[:, 1:] = submit.iloc[:, 1:].applymap(lambda x: 0 if x < 0 else x)
submit.to_csv('submit_v1_ws105_ks15.csv',index=False)
submit

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt
best_window_sizes = {}

# Iterate through the data by ID
for idx, (id_val, data) in enumerate(zip(train_df["ID"], time_series_data)):
    # Analyze the autocorrelation function
    plot_acf(data)
    plt.show()

    # Set the window size based on the analysis
    best_window_size = int(input(f"Enter the best window size for ID {id_val}: "))
    best_window_sizes[id_val] = best_window_size
