- kmeans 등으로 y의 max랑 구분지어보기

# Import

In [1]:
# Mac에서 torch 다운로드
# pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu

In [2]:
import gc
gc.collect()

5

In [3]:
import random
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
import numpy as np
import os
import glob

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, TensorDataset

from tqdm.auto import tqdm

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
fp = fm.FontProperties(fname='/home/studio-lab-user/Dacon/tools/NanumFont/NanumGothic.ttf', size=10)
import seaborn as sns

import warnings
warnings.filterwarnings(action='ignore')

In [4]:
# # cuda (not Mac)
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# # mps (Mac)
# device = torch.device('mps') if torch.backends.mps.is_available() else torch.device('cpu')

device = torch.device('cpu')
print('device :',device)

device : cpu


<br></br>

# Setting

<br>

## Hyperparameter Setting

In [5]:
CFG = {
    'EPOCHS':128,#1024,
    'PATIENCE':30,
    'LEARNING_RATE':0.05,
    'BATCH_SIZE':16,
    'SEED':42,
}

<br>

## Fixed RandomSeed

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

<br></br>

# Data Pre-processing

In [7]:
def moving_average(x,size):

    ma = []
    for i in range(len(x)):
        if i<size:
            values = x.values[:(i+1)]
        else:
            values = x.values[(i-size+1):(i+1)]
        ma_value = values.mean()
        ma.append(ma_value)
        
    return ma

In [8]:
import os
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split

class Preprocess:
    def __init__(self, input_paths, label_paths, test_input_paths, test_label_paths):
        
        self.input, self.label, self.test_input, self.test_label = None, None, None, None
        
        self.X_train, self.X_valid = None, None
        self.y_train, self.y_valid = None, None
        self.X, self.y = None, None

        input_fn = []
        label_fn = []
        for input_path, label_path in zip(input_paths, label_paths):
            case_num = input_path.replace('./data/train_input/CASE_','').replace('.csv','')
            
            input_df = pd.read_csv(input_path)
            label_df = pd.read_csv(label_path)

            input_df = input_df.fillna(0)

            input_df['case_num'] = case_num
            label_df['case_num'] = case_num
            
            input_fn.append(input_df)
            label_fn.append(label_df)
        
        test_input_fn = []
        test_label_fn = []
        for test_input_path, test_label_path in zip(test_input_paths, test_label_paths):
            case_num = test_input_path.replace('./data/test_input/TEST_','').replace('.csv','')
            
            test_input_df = pd.read_csv(test_input_path)
            test_label_df = pd.read_csv(test_label_path)
            
            test_input_df['case_num'] = case_num
            test_label_df['case_num'] = case_num
            
            test_input_fn.append(test_input_df)
            test_label_fn.append(test_label_df)
            
        self.input = pd.concat(input_fn,axis=0).sort_values(['case_num','DAT','obs_time'])
        self.label = pd.concat(label_fn,axis=0)
        self.test_input  = pd.concat(test_input_fn ,axis=0)
        self.test_label  = pd.concat(test_label_fn ,axis=0)
        
        self.input     .obs_time = list(np.arange(0,24))*int(self.input     .shape[0]/24)
        self.test_input.obs_time = list(np.arange(0,24))*int(self.test_input.shape[0]/24)
        
    def _data_return(self):
        return self.input,self.label,self.test_input,self.test_label
            
    def _target_log(self):
        self.label['predicted_weight_g'] = np.log(self.label['predicted_weight_g'])
    
    def _preprocess(self):
        # 1. time 추가 : 1~672 (24시간 x 28일)
        self.input     ['time'] = [i+1 for i in range(28*24)]*self.input     .case_num.nunique()
        self.test_input['time'] = [i+1 for i in range(28*24)]*self.test_input.case_num.nunique()

        features = [
            'DAT', 'obs_time', '내부온도관측치', '내부습도관측치', 'co2관측치', 'ec관측치', 
            '시간당분무량', '일간누적분무량', '시간당백색광량', '일간누적백색광량', '시간당적색광량', '일간누적적색광량', 
            '시간당청색광량', '일간누적청색광량', '시간당총광량', '일간누적총광량', 'case_num', 'time'
        ]
        # del_features = [
        #     '시간당분무량','시간당백색광량','시간당적색광량','시간당청색광량','시간당총광량',
        #     # '일간누적분무량','일간누적백색광량','일간누적적색광량','일간누적청색광량','일간누적총광량'
        # ]
        # self.input     .drop(columns=del_features,inplace=True)
        # self.test_input.drop(columns=del_features,inplace=True)
        
        # 2. 각 컬럼들의 파생변수
        input_df       = []
        test_input_df  = []
        for case_num in self.input.case_num.unique():
            i_df = self.input     [self.input     .case_num==case_num]
            t_df = self.test_input[self.test_input.case_num==case_num]
            
            for col in list(set(self.input.columns)-set(['case_num','DAT','obs_time','time'])):
                for i in range(4):
                    # (1) 이전시간 값
                    i_df[f'{col}_bf{i+1}'] = i_df[col].shift(i+1).fillna(0)
                    t_df[f'{col}_bf{i+1}'] = t_df[col].shift(i+1).fillna(0)
                
                    # (2) 전시간대 대비 상승했는지 여부
                    i_df[f'{col}_higher_than_{i+1}d'] = np.where(i_df[col]>i_df[col].shift(i+1),1,0)
                    t_df[f'{col}_higher_than_{i+1}d'] = np.where(t_df[col]>t_df[col].shift(i+1),1,0)

                    # (3) 전시간대 대비 상승률 -> 넣으면 NaN 발생
                    if i_df[col].min()<=0:
                        offset = i_df[col].min()
                        i_df[col] = i_df[col] + offset
                        t_df[col] = t_df[col] + offset
                    i_df[f'{col}_{i+1}d_rise_rate'] = (i_df[col] - i_df[col].shift(i+1)) / i_df[col]
                    t_df[f'{col}_{i+1}d_rise_rate'] = (t_df[col] - t_df[col].shift(i+1)) / t_df[col]

                    # -inf -> min, inf -> max
                    tmp = i_df[f'{col}_{i+1}d_rise_rate'].copy()
                    tmp = tmp[(tmp!=-np.inf) & (tmp!=np.inf)]
                    min_info, max_info = tmp.min(), tmp.max()

                    i_df[f'{col}_{i+1}d_rise_rate'][i_df[f'{col}_{i+1}d_rise_rate']==-np.inf] = min_info
                    i_df[f'{col}_{i+1}d_rise_rate'][i_df[f'{col}_{i+1}d_rise_rate']== np.inf] = max_info
                    t_df[f'{col}_{i+1}d_rise_rate'][t_df[f'{col}_{i+1}d_rise_rate']==-np.inf] = min_info
                    t_df[f'{col}_{i+1}d_rise_rate'][t_df[f'{col}_{i+1}d_rise_rate']== np.inf] = max_info
                    
                    # fill nan to zero
                    i_df[f'{col}_{i+1}d_rise_rate'].fillna(0,inplace=True)
                    t_df[f'{col}_{i+1}d_rise_rate'].fillna(0,inplace=True)
                    
                    # (4) moving average
                    for size in [2,4,7]:
                        i_df[f'{col}_ma{size}'] = moving_average(i_df[col],size=size)
                        t_df[f'{col}_ma{size}'] = moving_average(t_df[col],size=size)
                
                # (5) cumulative sum
                i_df[f'{col}_cumsum'] = i_df[col].cumsum()
                t_df[f'{col}_cumsum'] = t_df[col].cumsum()

            input_df     .append(i_df)
            test_input_df.append(t_df)
        
        # concat
        self.input       = pd.concat(input_df     ,axis=0)
        self.test_input  = pd.concat(test_input_df,axis=0)
        
        # 파생변수 생성 후, 모든 값이 동일하면 삭제
        unique_info = self.input.apply(lambda x: x.nunique())
        unique_cols = unique_info[unique_info==1].index.tolist()
        
        # final dataset
        self.input      = self.input     .drop(unique_cols,axis=1)
        self.test_input = self.test_input.drop(unique_cols,axis=1)
        
        #---------------------------------------------------------------------------
        # agg
        #---------------------------------------------------------------------------
        self.input      = self.input     .drop(['obs_time'],axis=1)
        self.test_input = self.test_input.drop(['obs_time'],axis=1)
        
        self.input      = self.input     .groupby(['case_num','DAT']).mean().reset_index()
        self.test_input = self.test_input.groupby(['case_num','DAT']).mean().reset_index()
        
        self.input.DAT      = self.input     .DAT + 1
        self.test_input.DAT = self.test_input.DAT + 1
        
        # 파생변수 생성 후, 모든 값이 동일하면 삭제
        unique_info = self.input.apply(lambda x: x.nunique())
        unique_cols = unique_info[unique_info==1].index.tolist()
        
        # final dataset
        self.input      = self.input     .drop(unique_cols,axis=1)
        self.test_input = self.test_input.drop(unique_cols,axis=1)
        
    # https://dacon.io/competitions/official/236033/talkboard/407304?page=1&dtype=recent
    def _scale_dataset(self,outlier):
        
        minmax_info = {
            #'None':[0,0],
            'DAT':[1,28],
            'obs_time':[0,23],
            'time':[0,28*24],
            '내부온도관측치':[4,40],
            '내부습도관측치':[0,100],
            'co2관측치':[0,1200],
            'ec관측치':[0,8],
            '시간당분무량':[0,3000],
            '일간누적분무량':[0,72000],
            '시간당백색광량':[0,120000],
            '일간누적백색광량':[0,2880000],
            '시간당적색광량':[0,120000],
            '일간누적적색광량':[0,2880000],
            '시간당청색광량':[0,120000],
            '일간누적청색광량':[0,2880000],
            '시간당총광량':[0,120000],
            '일간누적총광량':[0,2880000],
        }
            
        scale_feature = [feature for feature,(min_info,max_info) in minmax_info.items() if feature in self.input.columns]
        
        # for train dataset
        for col in scale_feature:
            min_info,max_info = minmax_info[col]
            self.input[col] = (self.input[col]-min_info) / (max_info-min_info)
            
            if outlier=='keep':
                # 0~1을 벗어나는 값 (minmax_info의 범위를 벗어나는 값)은 0,1로 넣기
                # -> 삭제하게되면 24시간의 term이 깨짐
                self.input[col][self.input[col]<0] = 0
                self.input[col][self.input[col]>1] = 1
            elif outlier=='drop':
                self.input[col][(self.input[col]<0) | (self.input[col]>1)] = np.nan
            
        # for test dataset
        for col in scale_feature:
            min_info,max_info = minmax_info[col]
            self.test_input[col] = (self.test_input[col]-min_info) / (max_info-min_info)
            
            if outlier=='keep':
                # 0~1을 벗어나는 값 (minmax_info의 범위를 벗어나는 값)은 0,1로 넣기
                # -> 삭제하게되면 24시간의 term이 깨짐
                self.test_input[col][self.test_input[col]<0] = 0
                self.test_input[col][self.test_input[col]>1] = 1
            elif outlier=='drop':
                self.test_input[col][(self.test_input[col]<0) | (self.test_input[col]>1)] = np.nan
        
        # another features
        another_features = list(set(self.input.select_dtypes(exclude=[object]).columns)-set(scale_feature))
        for col in another_features:
            if self.input[col].min()==0:
                offset=1e-4
                self.input[col]      = self.input     [col]+offset
                self.test_input[col] = self.test_input[col]+offset
            
            min_info,max_info = self.input[col].min(),self.input[col].max()    
            self.input[col]      = (self.input[col]     -min_info) / (max_info-min_info)
            self.test_input[col] = (self.test_input[col]-min_info) / (max_info-min_info)
        
    def _interaction_term(self):
        # num_features = self.input.select_dtypes(exclude=[object]).columns
        num_features = [
            'DAT','time',
            '내부온도관측치', '내부습도관측치', 'co2관측치', 'ec관측치', 
            '시간당분무량', '일간누적분무량', '시간당백색광량', '일간누적백색광량', '시간당적색광량', '일간누적적색광량', 
            '시간당청색광량', '일간누적청색광량', '시간당총광량', '일간누적총광량',
        ]
        for i in range(len(num_features)):
            for j in range(len(num_features)):
                if i>j:
                    self.input     [f'{num_features[i]}*{num_features[j]}'] = self.input     [num_features[i]]*self.input     [num_features[j]]
                    self.test_input[f'{num_features[i]}*{num_features[j]}'] = self.test_input[num_features[i]]*self.test_input[num_features[j]]

In [9]:
def abline(slope, intercept, color):
    axes = plt.gca()
    x_vals = np.array(axes.get_xlim())
    y_vals = intercept + slope * x_vals
    plt.plot(x_vals, y_vals, '--', color=color)

In [10]:
# from scipy.stats import pearsonr

# val_rate = 0.05

# dataset = Preprocess(
#     input_paths = all_input_list,
#     label_paths = all_target_list,
#     test_paths = all_test_list,
# )

# dataset._preprocess()
# dataset._scale_dataset()
# input_df, label_df = dataset._data_return()

# for case_num in tqdm(sorted(input_df.case_num.unique())):

#     input = input_df[input_df.case_num==case_num].drop('case_num',axis=1)
#     label = label_df[label_df.case_num==case_num].drop('case_num',axis=1)

#     fig = plt.figure(figsize=(20,15))
#     nrow = 3
#     ncol = 5

#     iter = 0
#     total = len(input.columns)-3
#     for col in input.columns:
#         if col not in ['time','DAT','obs_time']:
#             iter+=1

#             y1 = input[col]
#             #y1 = (y1-y1.min())/(y1.max()-y1.min())

#             y2 = label['predicted_weight_g']
#             y2 = (y2-y2.min())/(y2.max()-y2.min())

#             y3 = input.groupby('DAT')[col].mean().values

#             corr, pvalue = pearsonr(y2,y3)

#             fig.add_subplot(ncol,nrow,iter)
#             sns.scatterplot(x=input.time  ,y=y1)
#             sns.scatterplot(x=label.DAT*24,y=y2,color='red')
#             sns.lineplot   (x=label.DAT*24,y=y3,color='blue',linestyle='--',alpha=0.7)
#             plt.ylabel('')

#             plt.title(f'{col}(corr={corr:.3f}(pvalue={pvalue:.3f}))',fontproperties=fp)


#     plt.tight_layout()
#     plt.savefig(f'./fig/{case_num}.png',dpi=100)
#     plt.close()

<br></br>

# Data Load

In [11]:
all_input_list = sorted(glob.glob('./data/train_input/*.csv'))
all_label_list = sorted(glob.glob('./data/train_target/*.csv'))
all_test_input_list = sorted(glob.glob('./data/test_input/*.csv'))
all_test_label_list = sorted(glob.glob('./data/test_target/*.csv'))

In [12]:
%%time

from sklearn.model_selection import train_test_split

# Preprocess Class
dataset = Preprocess(
    input_paths = all_input_list,
    label_paths = all_label_list,
    test_input_paths = all_test_input_list,
    test_label_paths = all_test_label_list,
)

# (1) preprocessing + scaling + interaction term
dataset._preprocess()
# dataset._target_log()
dataset._scale_dataset(outlier='keep')
# dataset._interaction_term()

# (2) Data Return for check
input_df, label_df, test_input_df, test_label_df = dataset._data_return()

# # (3) Delete Std zero features
# std_zero_features = []
# for case_num in input_df.case_num.unique():
#     tmp = input_df[input_df.case_num==case_num]
#     std_zero_feature = tmp.std().index[tmp.std()==0].tolist()
#     std_zero_features += std_zero_feature
    
# std_zero_features = pd.unique(std_zero_features)

# input_df = input_df.drop(std_zero_features,axis=1)

# # (4) Select Columns
# input_df = input_df.drop(columns=['obs_time'])
# label_df = label_df['predicted_weight_g']

CPU times: user 40.2 s, sys: 138 ms, total: 40.3 s
Wall time: 40.3 s


In [13]:
null_info = input_df.isnull().sum()
null_info[null_info!=0]

Series([], dtype: int64)

In [14]:
# d = input_df.copy()

# cols = [col for col in d.columns if col.find('_higher_than_')>=0]
# std_zero_cols = d[cols].std()==0
# del_cols = std_zero_cols[std_zero_cols].index.tolist()
# print(del_cols)

# # for i in range(len(cols)):
# #     print(f'({i}/{len(cols)}) {cols[i]}')
# #     plt.figure(figsize=(8,7))
# #     sns.lineplot(x=d.time,y=d[cols[i]])
# #     plt.show()

In [15]:
# input_df.isnull().sum()
input_df.shape

(784, 241)

In [16]:
print([col for col in input_df.columns if col.find('*')<0])
# print(input_df.columns)

['case_num', 'DAT', '내부온도관측치', '내부습도관측치', 'co2관측치', 'ec관측치', '시간당분무량', '일간누적분무량', '시간당백색광량', '일간누적백색광량', '시간당적색광량', '일간누적적색광량', '시간당청색광량', '일간누적청색광량', '시간당총광량', '일간누적총광량', 'time', 'co2관측치_bf1', 'co2관측치_higher_than_1d', 'co2관측치_1d_rise_rate', 'co2관측치_ma2', 'co2관측치_ma4', 'co2관측치_ma7', 'co2관측치_bf2', 'co2관측치_higher_than_2d', 'co2관측치_2d_rise_rate', 'co2관측치_bf3', 'co2관측치_higher_than_3d', 'co2관측치_3d_rise_rate', 'co2관측치_bf4', 'co2관측치_higher_than_4d', 'co2관측치_4d_rise_rate', 'co2관측치_cumsum', '시간당적색광량_bf1', '시간당적색광량_higher_than_1d', '시간당적색광량_1d_rise_rate', '시간당적색광량_ma2', '시간당적색광량_ma4', '시간당적색광량_ma7', '시간당적색광량_bf2', '시간당적색광량_higher_than_2d', '시간당적색광량_2d_rise_rate', '시간당적색광량_bf3', '시간당적색광량_higher_than_3d', '시간당적색광량_3d_rise_rate', '시간당적색광량_bf4', '시간당적색광량_higher_than_4d', '시간당적색광량_4d_rise_rate', '시간당적색광량_cumsum', '시간당총광량_bf1', '시간당총광량_higher_than_1d', '시간당총광량_1d_rise_rate', '시간당총광량_ma2', '시간당총광량_ma4', '시간당총광량_ma7', '시간당총광량_bf2', '시간당총광량_higher_than_2d', '시간당총광량_2d_rise_rate', '시간당총광량_bf3', '시간당총광량_hi

In [17]:
from scipy.stats import pearsonr

corr_df = []
for col in input_df.drop(columns=['case_num']).columns:
    r_value, p_value = pearsonr(input_df[col],label_df['predicted_weight_g'])
    corr_df.append([col,r_value,p_value])
    
corr_df = pd.DataFrame(corr_df,columns=['feature','r_value','p_value'])
# corr_df.sort_values('p_value')

In [18]:
alpha = 0.05
del_feature = corr_df.feature[corr_df.p_value>alpha].tolist()
len(del_feature)
print(np.array(del_feature))

['co2관측치_higher_than_1d' 'co2관측치_higher_than_2d' 'co2관측치_higher_than_3d'
 'co2관측치_higher_than_4d' 'co2관측치_4d_rise_rate' '시간당적색광량_higher_than_1d'
 '시간당적색광량_higher_than_2d' '시간당적색광량_higher_than_3d'
 '시간당적색광량_higher_than_4d' '시간당총광량_bf2' '시간당총광량_cumsum'
 '시간당청색광량_1d_rise_rate' '시간당청색광량_cumsum' '일간누적총광량_1d_rise_rate'
 '일간누적총광량_2d_rise_rate' '일간누적총광량_3d_rise_rate' '일간누적총광량_4d_rise_rate'
 '내부습도관측치_1d_rise_rate' '내부습도관측치_higher_than_2d' '내부습도관측치_2d_rise_rate'
 '내부습도관측치_higher_than_3d' '내부습도관측치_3d_rise_rate' '내부습도관측치_higher_than_4d'
 '내부습도관측치_4d_rise_rate' '내부온도관측치_higher_than_1d' '내부온도관측치_1d_rise_rate'
 '내부온도관측치_higher_than_2d' '내부온도관측치_2d_rise_rate' '내부온도관측치_higher_than_3d'
 '내부온도관측치_3d_rise_rate' '내부온도관측치_higher_than_4d' '내부온도관측치_4d_rise_rate'
 '시간당백색광량_bf2' '시간당백색광량_bf3' '시간당백색광량_cumsum' '일간누적백색광량_1d_rise_rate'
 '일간누적백색광량_2d_rise_rate' '일간누적백색광량_3d_rise_rate' '일간누적백색광량_4d_rise_rate'
 '시간당분무량_1d_rise_rate' '시간당분무량_2d_rise_rate' '시간당분무량_3d_rise_rate'
 '시간당분무량_4d_rise_rate' '일간누적분무량_1d_rise_r

In [19]:
input_df     .drop(columns=del_feature,inplace=True)
test_input_df.drop(columns=del_feature,inplace=True)

In [20]:
print(f'asis({input_df.shape[1]+len(del_feature)}) -> tobe({input_df.shape[1]})')

asis(241) -> tobe(186)


<br></br>

# Pre-Fit Catboost
- 아래 모델링과 비슷하게 case_num으로 split

In [21]:
import datetime

save_mark = '3'

paths = [f'./out/kf_cat_{save_mark}',f'./out/kf_cat_{save_mark}_fn']
for path in paths:
    if not os.path.isdir(path):
        os.mkdir(path)

In [24]:
input_df.shape, label_df.shape

((784, 186), (784, 3))

In [25]:
# # 1시간 (cpu)

# from catboost import CatBoostRegressor
# from sklearn.model_selection import KFold

# n_splits = 10

# case_num = input_df.case_num.unique()
# kf = KFold(n_splits=n_splits,shuffle=True,random_state=42)

# kf_iter = 0
# for tr_idx,va_idx in tqdm(kf.split(case_num),total=n_splits):
#     kf_iter+=1
#     print(f'-'*100)
#     print(f'({kf_iter}/{n_splits})')
#     print(f'-'*100)
    
#     #------------------------------------------------------------------------------------
#     # (1) train validation split
#     #------------------------------------------------------------------------------------
#     tr_case_num = case_num[tr_idx]
#     va_case_num = case_num[va_idx]
    
#     X_train = input_df[input_df.case_num.isin(tr_case_num)].drop(columns=['case_num'])
#     X_valid = input_df[input_df.case_num.isin(va_case_num)].drop(columns=['case_num'])

#     y_train = label_df[label_df.case_num.isin(tr_case_num)]['predicted_weight_g']
#     y_valid = label_df[label_df.case_num.isin(va_case_num)]['predicted_weight_g']
#     # print(X_train.shape, X_valid.shape, y_train.shape, y_valid.shape)
    
#     model = CatBoostRegressor(iterations=5000,metric_period=1000,random_state=42)
#     model.fit(X_train,y_train,eval_set=[(X_valid,y_valid)])

#     tr_pred = model.predict(X_train)
#     va_pred = model.predict(X_valid)
    
#     pred_input_df[f'pred_{kf_iter}'] = model.predict(input_df.drop(columns=['case_num']))
#     pred_input_df.to_csv(f'./out/kf_cat_{save_mark}/pred_input_df_{kf_iter}.csv',index=False)
    
#     pred_test_df[f'pred_{kf_iter}'] = model.predict(test_input_df.drop(columns=['case_num']))
#     pred_test_df.to_csv(f'./out/kf_cat_{save_mark}/pred_test_df_{kf_iter}.csv',index=False)

<br>

## kfold-CV

In [None]:
pred_input_df = pd.concat([input_df,label_df['predicted_weight_g'].reset_index(drop=True)],axis=1)
pred_test_df  = pd.concat([test_input_df,test_label_df['predicted_weight_g'].reset_index(drop=True)],axis=1)

In [39]:
# 1시간 (cpu)

from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import KFold

n_splits = 10
kf = KFold(n_splits=n_splits,shuffle=True,random_state=42)

kf_iter = 0
for tr_idx,va_idx in tqdm(kf.split(input_df),total=n_splits):
    kf_iter+=1
    print(f'')
    print(f'-'*100)
    print(f'({kf_iter}/{n_splits})')
    print(f'-'*100)
    
    #------------------------------------------------------------------------------------
    # (1) train validation split
    #------------------------------------------------------------------------------------
    X_train = input_df.iloc[tr_idx,:].drop(columns=['case_num'])
    X_valid = input_df.iloc[va_idx,:].drop(columns=['case_num'])

    y_train = label_df.iloc[tr_idx]['predicted_weight_g']
    y_valid = label_df.iloc[va_idx]['predicted_weight_g']
    # print(X_train.shape, X_valid.shape, y_train.shape, y_valid.shape)
    
    model = CatBoostRegressor(iterations=40000,metric_period=2000,random_state=42,early_stopping_rounds=4000)
    model.fit(X_train,y_train,eval_set=[(X_valid,y_valid)])

    tr_pred = model.predict(X_train)
    va_pred = model.predict(X_valid)
    
    fig = plt.figure(figsize=(15,7))
    fig.add_subplot(121)
    sns.scatterplot(x=tr_pred,y=y_train)
    abline(slope=1,intercept=0,color='red')
    plt.title(f'Train : RMSE={np.sqrt(mean_squared_error(tr_pred,y_train)):.4f}')
    fig.add_subplot(122)
    sns.scatterplot(x=va_pred,y=y_valid)
    abline(slope=1,intercept=0,color='red')
    plt.title(f'Validation : RMSE={np.sqrt(mean_squared_error(va_pred,y_valid)):.4f}')
    plt.tight_layout()
    plt.show()
    
    pred_input_df[f'pred_{kf_iter}'] = model.predict(input_df.drop(columns=['case_num']))
    pred_input_df.to_csv(f'./out/kf_cat_{save_mark}/pred_input_df_{kf_iter}.csv',index=False)
    
    pred_test_df[f'pred_{kf_iter}'] = model.predict(test_input_df.drop(columns=['case_num']))
    pred_test_df.to_csv(f'./out/kf_cat_{save_mark}/pred_test_df_{kf_iter}.csv',index=False)

<br>

## optuna

In [118]:
pred_input_df = pd.concat([input_df,label_df['predicted_weight_g'].reset_index(drop=True)],axis=1)
pred_test_df  = pd.concat([test_input_df,test_label_df['predicted_weight_g'].reset_index(drop=True)],axis=1)

In [131]:
def objective(trial,X_train,X_valid,y_train,y_valid,verbose):

    params = {
        'learning_rate' : trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),

        'subsample': trial.suggest_uniform('subsample',0,1),
        'od_wait': trial.suggest_int('od_wait', 500, 2300),

        'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
        'random_strength': trial.suggest_uniform('random_strength',0,100),
        'depth': trial.suggest_int('depth',1, 6),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf',1,100),
        'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations',1,15),

        'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.0, 1.0),
        'max_bin': trial.suggest_int('max_bin', 10, 300),
        'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
    }
    if verbose:
        print('> Hyper-Parameters')
        for key,value in params.items():
            print(f'  - {key}: {value}')
        print('')
    
    if verbose:
        metric_period = 5000
    else:
        metric_period = None

    train_dataset = Pool(data=X_train,label=y_train)
    valid_dataset = Pool(data=X_valid,label=y_valid)
    
    try:
        model = CatBoostRegressor(
            random_state=0,
            loss_function='RMSE',
            metric_period=metric_period,
            iterations=50000,
            **params,
        )

        model.fit(
            train_dataset,
            eval_set=valid_dataset,
            use_best_model=True,
            early_stopping_rounds=1000,
            verbose=verbose,
        )
    
        va_pred = model.predict(valid_dataset)
        va_true = y_valid

        score = mean_squared_error(y_true=va_true,y_pred=va_pred)
        score = np.sqrt(score)
    except:
        score = 99999
    
    text = f'Cat : score={score:.3f}'
    pbar_cat.set_description(text)
    pbar_cat.update(1)
    
    return score

In [132]:
# 1시간 (cpu)

import optuna
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import KFold, train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(input_df,label_df,test_size=0.2,random_state=42)

X_train = X_train.drop('case_num',axis=1)
X_valid = X_valid.drop('case_num',axis=1)
y_train = y_train['predicted_weight_g']
y_valid = y_valid['predicted_weight_g']

study_cat = optuna.create_study(direction='minimize', study_name='CatBoost', sampler=optuna.samplers.TPESampler(seed=0))
func = lambda trial: objective(trial,X_train,X_valid,y_train,y_valid,verbose=False)

n_trials = 50

global pbar_cat
with tqdm(total=n_trials) as pbar_cat:
    study_cat.optimize(func, n_trials=n_trials)

[32m[I 2022-12-14 05:18:02,094][0m A new study created in memory with name: CatBoost[0m


  0%|          | 0/50 [00:00<?, ?it/s]

[32m[I 2022-12-14 05:19:55,152][0m Trial 0 finished with value: 7.836777683932372 and parameters: {'learning_rate': 0.0012520653814999472, 'subsample': 0.7151893663724195, 'od_wait': 1585, 'reg_lambda': 54.488322850857855, 'random_strength': 42.36547993389047, 'depth': 4, 'min_data_in_leaf': 44, 'leaf_estimation_iterations': 14, 'bagging_temperature': 71.55682161754872, 'colsample_bylevel': 0.3834415188257777, 'max_bin': 240, 'od_type': 'Iter'}. Best is trial 0 with value: 7.836777683932372.[0m
[32m[I 2022-12-14 05:24:51,879][0m Trial 1 finished with value: 8.661178289677688 and parameters: {'learning_rate': 0.007098936257405904, 'subsample': 0.07103605819788694, 'od_wait': 656, 'reg_lambda': 2.0218495418485976, 'random_strength': 83.2619845547938, 'depth': 5, 'min_data_in_leaf': 88, 'leaf_estimation_iterations': 15, 'bagging_temperature': 15.726578854179516, 'colsample_bylevel': 0.46147936225293185, 'max_bin': 237, 'od_type': 'Iter'}. Best is trial 0 with value: 7.836777683932372

In [137]:
study_cat.best_params

{'learning_rate': 0.003832622042155371,
 'subsample': 0.36292672548257277,
 'od_wait': 2267,
 'reg_lambda': 69.51018764739234,
 'random_strength': 1.028534640055302,
 'depth': 3,
 'min_data_in_leaf': 2,
 'leaf_estimation_iterations': 9,
 'bagging_temperature': 0.21893053909412485,
 'colsample_bylevel': 0.8856926987679451,
 'max_bin': 23,
 'od_type': 'IncToDec'}

In [133]:
train_dataset = Pool(data=X_train,label=y_train)
valid_dataset = Pool(data=X_valid,label=y_valid)

model = CatBoostRegressor(
    random_state=0,
    loss_function='RMSE',
    metric_period=10000,
    iterations=500000,
    **study_cat.best_params,
)

model.fit(
    train_dataset,
    eval_set=valid_dataset,
    use_best_model=True,
    early_stopping_rounds=1000,
    verbose=True,
)

pred_input_df[f'pred'] = model.predict(input_df.drop(columns=['case_num']))
pred_input_df.to_csv(f'./out/kf_cat_{save_mark}/pred_input_df_{kf_iter}.csv',index=False)

pred_test_df[f'pred'] = model.predict(test_input_df.drop(columns=['case_num']))
pred_test_df.to_csv(f'./out/kf_cat_{save_mark}/pred_test_df_{kf_iter}.csv',index=False)



0:	learn: 41.2221293	test: 42.5178837	best: 42.5178837 (0)	total: 1.12ms	remaining: 9m 18s
10000:	learn: 2.6505602	test: 7.5892120	best: 7.5892120 (10000)	total: 10.4s	remaining: 8m 30s
20000:	learn: 1.5003878	test: 7.0859806	best: 7.0859806 (20000)	total: 20.7s	remaining: 8m 17s
30000:	learn: 1.0099165	test: 6.9365059	best: 6.9365011 (29998)	total: 31s	remaining: 8m 5s
40000:	learn: 0.7377927	test: 6.8643549	best: 6.8643549 (40000)	total: 41.2s	remaining: 7m 54s
50000:	learn: 0.5630418	test: 6.8233631	best: 6.8233491 (49993)	total: 51.5s	remaining: 7m 43s
60000:	learn: 0.4419458	test: 6.8022462	best: 6.8022028 (59965)	total: 1m 1s	remaining: 7m 32s
70000:	learn: 0.3529503	test: 6.7855347	best: 6.7854422 (69887)	total: 1m 12s	remaining: 7m 22s
80000:	learn: 0.2866395	test: 6.7737127	best: 6.7737001 (79999)	total: 1m 22s	remaining: 7m 12s
Stopped by overfitting detector  (1000 iterations wait)

bestTest = 6.768572792
bestIteration = 87747

Shrink model to first 87748 iterations.


In [134]:
pred_cols = 'pred'
pred_test_df['predicted_weight_g'] = pred_test_df[pred_cols]

In [135]:
sub = pred_test_df.sort_values(['case_num','DAT'])

for case_num in sub.case_num.unique():
    s = sub[sub.case_num==case_num][['DAT','predicted_weight_g']]
    s.DAT = [i+1 for i in range(28)]
    s.to_csv(f'./out/kf_cat_{save_mark}_fn/TEST_{case_num}.csv',index=False)

In [136]:
import zipfile
os.chdir('/Users/khj/MyPython/Dacon/6_상추생육환경생성')
os.chdir(f"./out/kf_cat_{save_mark}_fn/")
submission = zipfile.ZipFile(f"../kf_cat_{save_mark}.zip", 'w')
for path in all_test_label_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()
os.chdir('/Users/khj/MyPython/Dacon/6_상추생육환경생성')

<br>

## kfold + optuna

In [141]:
# 1시간 (cpu)

import optuna
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import KFold, train_test_split

n_splits = 10
kf = KFold(n_splits=n_splits,shuffle=True,random_state=42)

kf_iter = 0
for tr_idx,va_idx in tqdm(kf.split(input_df),total=n_splits):
    kf_iter+=1
    print(f'')
    print(f'-'*100)
    print(f'({kf_iter}/{n_splits})')
    print(f'-'*100)
    
    #------------------------------------------------------------------------------------
    # (1) train validation split
    #------------------------------------------------------------------------------------
    X_train = input_df.iloc[tr_idx,:].drop(columns=['case_num'])
    X_valid = input_df.iloc[va_idx,:].drop(columns=['case_num'])

    y_train = label_df.iloc[tr_idx]['predicted_weight_g']
    y_valid = label_df.iloc[va_idx]['predicted_weight_g']
    
    train_dataset = Pool(data=X_train,label=y_train)
    valid_dataset = Pool(data=X_valid,label=y_valid)

    model = CatBoostRegressor(
        random_state=0,
        loss_function='RMSE',
        metric_period=10000,
        iterations=500000,
        **study_cat.best_params,
    )

    model.fit(
        train_dataset,
        eval_set=valid_dataset,
        use_best_model=True,
        early_stopping_rounds=1000,
        verbose=True,
    )

    pred_input_df[f'pred_{kf_iter}'] = model.predict(input_df.drop(columns=['case_num']))
    pred_input_df.to_csv(f'./out/kf_cat_{save_mark}/pred_input_df_{kf_iter}.csv',index=False)

    pred_test_df[f'pred_{kf_iter}'] = model.predict(test_input_df.drop(columns=['case_num']))
    pred_test_df.to_csv(f'./out/kf_cat_{save_mark}/pred_test_df_{kf_iter}.csv',index=False)

  0%|          | 0/10 [00:00<?, ?it/s]


----------------------------------------------------------------------------------------------------
(1/10)
----------------------------------------------------------------------------------------------------
0:	learn: 41.2164619	test: 43.6867119	best: 43.6867119 (0)	total: 3.31ms	remaining: 27m 35s




10000:	learn: 2.5875074	test: 5.1533543	best: 5.1533543 (10000)	total: 31s	remaining: 25m 18s
20000:	learn: 1.5036043	test: 4.8713857	best: 4.8710127 (19992)	total: 46s	remaining: 18m 23s
30000:	learn: 1.0439258	test: 4.7801514	best: 4.7800976 (29998)	total: 56.2s	remaining: 14m 41s
40000:	learn: 0.7818381	test: 4.7394446	best: 4.7394095 (39990)	total: 1m 6s	remaining: 12m 40s
50000:	learn: 0.6118600	test: 4.7080512	best: 4.7077694 (49822)	total: 1m 16s	remaining: 11m 25s
60000:	learn: 0.4911956	test: 4.6920059	best: 4.6920059 (60000)	total: 1m 26s	remaining: 10m 33s
70000:	learn: 0.4029936	test: 4.6798266	best: 4.6797971 (69974)	total: 1m 36s	remaining: 9m 53s
80000:	learn: 0.3344967	test: 4.6717233	best: 4.6717059 (79999)	total: 1m 46s	remaining: 9m 20s
Stopped by overfitting detector  (1000 iterations wait)

bestTest = 4.667409441
bestIteration = 85488

Shrink model to first 85489 iterations.

------------------------------------------------------------------------------------------



10000:	learn: 2.5712226	test: 7.3453684	best: 7.3451275 (9993)	total: 9.88s	remaining: 8m 4s
20000:	learn: 1.4966131	test: 6.6181518	best: 6.6181518 (20000)	total: 19.8s	remaining: 7m 54s
30000:	learn: 1.0486684	test: 6.3875065	best: 6.3873047 (29977)	total: 29.7s	remaining: 7m 45s
40000:	learn: 0.7943446	test: 6.2883671	best: 6.2883257 (39994)	total: 39.6s	remaining: 7m 35s
50000:	learn: 0.6257826	test: 6.2315926	best: 6.2315090 (49987)	total: 49.6s	remaining: 7m 26s
60000:	learn: 0.5036846	test: 6.1903042	best: 6.1902832 (59998)	total: 59.8s	remaining: 7m 18s
70000:	learn: 0.4127863	test: 6.1619082	best: 6.1618112 (69969)	total: 1m 9s	remaining: 7m 7s
80000:	learn: 0.3419879	test: 6.1429482	best: 6.1429482 (80000)	total: 1m 19s	remaining: 6m 59s
90000:	learn: 0.2859060	test: 6.1290157	best: 6.1290157 (90000)	total: 1m 30s	remaining: 6m 51s
100000:	learn: 0.2401963	test: 6.1163103	best: 6.1163024 (99993)	total: 1m 40s	remaining: 6m 41s
110000:	learn: 0.2032799	test: 6.1059925	best: 6.



0:	learn: 40.2587683	test: 51.2532865	best: 51.2532865 (0)	total: 1.69ms	remaining: 14m 2s
10000:	learn: 2.4984770	test: 6.8837520	best: 6.8837520 (10000)	total: 10.8s	remaining: 8m 49s
20000:	learn: 1.4631240	test: 6.5522907	best: 6.5522907 (20000)	total: 20.6s	remaining: 8m 14s
30000:	learn: 1.0331246	test: 6.4414863	best: 6.4413914 (29997)	total: 30.4s	remaining: 7m 56s
40000:	learn: 0.7842977	test: 6.3950553	best: 6.3950301 (39997)	total: 40.6s	remaining: 7m 46s
50000:	learn: 0.6173272	test: 6.3708319	best: 6.3707569 (49991)	total: 50.7s	remaining: 7m 35s
60000:	learn: 0.4956285	test: 6.3578577	best: 6.3577800 (59847)	total: 1m	remaining: 7m 23s
Stopped by overfitting detector  (1000 iterations wait)

bestTest = 6.351364381
bestIteration = 67812

Shrink model to first 67813 iterations.

----------------------------------------------------------------------------------------------------
(4/10)
------------------------------------------------------------------------------------------



10000:	learn: 2.6422744	test: 5.2948949	best: 5.2948949 (10000)	total: 9.9s	remaining: 8m 4s
20000:	learn: 1.5493270	test: 4.7481073	best: 4.7479296 (19993)	total: 19.6s	remaining: 7m 50s
30000:	learn: 1.0851399	test: 4.5935045	best: 4.5934733 (29995)	total: 29.4s	remaining: 7m 40s
40000:	learn: 0.8204299	test: 4.5259125	best: 4.5259125 (40000)	total: 39.2s	remaining: 7m 30s
50000:	learn: 0.6458728	test: 4.4933778	best: 4.4933679 (49994)	total: 49s	remaining: 7m 21s
60000:	learn: 0.5190575	test: 4.4741378	best: 4.4740556 (59997)	total: 58.8s	remaining: 7m 11s
70000:	learn: 0.4241660	test: 4.4603333	best: 4.4601747 (69870)	total: 1m 8s	remaining: 7m 1s
80000:	learn: 0.3515361	test: 4.4503530	best: 4.4502612 (79948)	total: 1m 18s	remaining: 6m 52s
Stopped by overfitting detector  (1000 iterations wait)

bestTest = 4.448356222
bestIteration = 81796

Shrink model to first 81797 iterations.

----------------------------------------------------------------------------------------------------



10000:	learn: 2.6519451	test: 4.7898901	best: 4.7898901 (10000)	total: 9.79s	remaining: 7m 59s
20000:	learn: 1.5739934	test: 4.5094809	best: 4.5092695 (19996)	total: 19.6s	remaining: 7m 50s
30000:	learn: 1.1029676	test: 4.4135800	best: 4.4131263 (29982)	total: 29.3s	remaining: 7m 39s
40000:	learn: 0.8309635	test: 4.3644985	best: 4.3644554 (39991)	total: 39.1s	remaining: 7m 29s
50000:	learn: 0.6523188	test: 4.3369571	best: 4.3367299 (49937)	total: 48.8s	remaining: 7m 19s
60000:	learn: 0.5235455	test: 4.3182792	best: 4.3181509 (59979)	total: 58.7s	remaining: 7m 10s
70000:	learn: 0.4283815	test: 4.3057327	best: 4.3053446 (69756)	total: 1m 8s	remaining: 6m 59s
80000:	learn: 0.3543039	test: 4.2984437	best: 4.2981804 (79670)	total: 1m 18s	remaining: 6m 49s
90000:	learn: 0.2958489	test: 4.2943606	best: 4.2941740 (89524)	total: 1m 27s	remaining: 6m 40s
Stopped by overfitting detector  (1000 iterations wait)

bestTest = 4.290488236
bestIteration = 96110

Shrink model to first 96111 iterations.




10000:	learn: 2.5834676	test: 6.2629392	best: 6.2629392 (10000)	total: 9.81s	remaining: 8m
20000:	learn: 1.5059532	test: 5.7416566	best: 5.7412497 (19996)	total: 19.7s	remaining: 7m 51s
30000:	learn: 1.0505181	test: 5.5540251	best: 5.5540251 (30000)	total: 29.5s	remaining: 7m 41s
40000:	learn: 0.7929423	test: 5.4694465	best: 5.4693894 (39990)	total: 39.4s	remaining: 7m 32s
50000:	learn: 0.6248293	test: 5.4321089	best: 5.4318738 (49958)	total: 49s	remaining: 7m 21s
60000:	learn: 0.5044771	test: 5.4075242	best: 5.4074287 (59810)	total: 58.8s	remaining: 7m 11s
70000:	learn: 0.4152271	test: 5.3902570	best: 5.3902570 (70000)	total: 1m 8s	remaining: 7m 1s
80000:	learn: 0.3472666	test: 5.3775351	best: 5.3775184 (79999)	total: 1m 18s	remaining: 6m 51s
Stopped by overfitting detector  (1000 iterations wait)

bestTest = 5.368883416
bestIteration = 88912

Shrink model to first 88913 iterations.

----------------------------------------------------------------------------------------------------
(



10000:	learn: 2.7353462	test: 5.1015922	best: 5.1015922 (10000)	total: 9.85s	remaining: 8m 2s
20000:	learn: 1.5695779	test: 4.4050074	best: 4.4050074 (20000)	total: 19.9s	remaining: 7m 57s
30000:	learn: 1.0886208	test: 4.1865049	best: 4.1864491 (29998)	total: 29.8s	remaining: 7m 46s
40000:	learn: 0.8088875	test: 4.0686618	best: 4.0686541 (39999)	total: 39.7s	remaining: 7m 36s
50000:	learn: 0.6269449	test: 4.0002828	best: 4.0001890 (49997)	total: 49.5s	remaining: 7m 25s
60000:	learn: 0.4970474	test: 3.9577668	best: 3.9577668 (60000)	total: 59.2s	remaining: 7m 14s
70000:	learn: 0.4004642	test: 3.9304600	best: 3.9304597 (69992)	total: 1m 9s	remaining: 7m 4s
80000:	learn: 0.3285902	test: 3.9100031	best: 3.9099759 (79995)	total: 1m 18s	remaining: 6m 54s
90000:	learn: 0.2713618	test: 3.8954742	best: 3.8954742 (90000)	total: 1m 29s	remaining: 6m 46s
100000:	learn: 0.2264365	test: 3.8847538	best: 3.8847538 (100000)	total: 1m 40s	remaining: 6m 42s
110000:	learn: 0.1901118	test: 3.8769521	best: 



10000:	learn: 2.6450791	test: 5.9365785	best: 5.9365785 (10000)	total: 10.6s	remaining: 8m 40s
20000:	learn: 1.5518096	test: 5.5180403	best: 5.5178600 (19996)	total: 21.2s	remaining: 8m 27s
30000:	learn: 1.0862213	test: 5.3794928	best: 5.3793862 (29976)	total: 32s	remaining: 8m 21s
40000:	learn: 0.8189869	test: 5.3092600	best: 5.3092600 (40000)	total: 42s	remaining: 8m 2s
50000:	learn: 0.6415231	test: 5.2665275	best: 5.2664943 (49991)	total: 52s	remaining: 7m 48s
60000:	learn: 0.5151881	test: 5.2371557	best: 5.2371501 (59998)	total: 1m 2s	remaining: 7m 37s
70000:	learn: 0.4205642	test: 5.2208431	best: 5.2206429 (69961)	total: 1m 12s	remaining: 7m 27s
80000:	learn: 0.3479977	test: 5.2059625	best: 5.2057987 (79856)	total: 1m 23s	remaining: 7m 17s
90000:	learn: 0.2903505	test: 5.1967975	best: 5.1967262 (89841)	total: 1m 33s	remaining: 7m 5s
100000:	learn: 0.2446218	test: 5.1890495	best: 5.1890276 (99871)	total: 1m 43s	remaining: 6m 54s
Stopped by overfitting detector  (1000 iterations wai



10000:	learn: 2.5353707	test: 6.6804265	best: 6.6803440 (9999)	total: 10s	remaining: 8m 12s
20000:	learn: 1.4693797	test: 6.1524765	best: 6.1524765 (20000)	total: 20.1s	remaining: 8m 1s
30000:	learn: 1.0089480	test: 5.9639789	best: 5.9636613 (29965)	total: 30.7s	remaining: 8m
40000:	learn: 0.7452626	test: 5.8794339	best: 5.8794339 (40000)	total: 40.9s	remaining: 7m 49s
50000:	learn: 0.5747216	test: 5.8270755	best: 5.8270687 (49999)	total: 50.8s	remaining: 7m 37s
60000:	learn: 0.4563293	test: 5.7949422	best: 5.7949273 (59996)	total: 1m 1s	remaining: 7m 28s
70000:	learn: 0.3690264	test: 5.7775288	best: 5.7775284 (69999)	total: 1m 11s	remaining: 7m 20s
80000:	learn: 0.3029964	test: 5.7649518	best: 5.7649134 (79983)	total: 1m 22s	remaining: 7m 10s
90000:	learn: 0.2515978	test: 5.7569456	best: 5.7569438 (89998)	total: 1m 32s	remaining: 7m
100000:	learn: 0.2107703	test: 5.7512570	best: 5.7512392 (99964)	total: 1m 42s	remaining: 6m 49s
110000:	learn: 0.1776952	test: 5.7473936	best: 5.7473824 



10000:	learn: 2.6434041	test: 6.5356621	best: 6.5354243 (9996)	total: 10.2s	remaining: 8m 18s
20000:	learn: 1.5294404	test: 5.9725871	best: 5.9725871 (20000)	total: 20.4s	remaining: 8m 9s
30000:	learn: 1.0541063	test: 5.8115562	best: 5.8114696 (29999)	total: 30.8s	remaining: 8m 2s
40000:	learn: 0.7940645	test: 5.7445260	best: 5.7443340 (39968)	total: 40.6s	remaining: 7m 46s
50000:	learn: 0.6243734	test: 5.7088396	best: 5.7088101 (49907)	total: 50.5s	remaining: 7m 34s
60000:	learn: 0.5045552	test: 5.6864819	best: 5.6863247 (59966)	total: 1m	remaining: 7m 21s
70000:	learn: 0.4152344	test: 5.6709982	best: 5.6708785 (69835)	total: 1m 10s	remaining: 7m 13s
80000:	learn: 0.3466206	test: 5.6583518	best: 5.6583435 (79991)	total: 1m 21s	remaining: 7m 8s
90000:	learn: 0.2910936	test: 5.6496238	best: 5.6495604 (89962)	total: 1m 31s	remaining: 6m 56s
100000:	learn: 0.2466563	test: 5.6422929	best: 5.6422866 (99999)	total: 1m 41s	remaining: 6m 45s
110000:	learn: 0.2103727	test: 5.6371476	best: 5.637

In [142]:
from sklearn.metrics import mean_squared_error

In [143]:
pred_cols = [col for col in pred_input_df.columns if col.find('pred_')>=0]
pred_input_df['preds'] = pred_input_df[pred_cols].apply(lambda x: x.mean(), axis=1)

mean_squared_error(pred_input_df['preds'],pred_input_df['predicted_weight_g'])

0.39927131629106166

In [144]:
pred_cols = [col for col in pred_input_df.columns if col.find('pred_')>=0]
pred_test_df['predicted_weight_g'] = pred_test_df[pred_cols].apply(lambda x: x.mean(), axis=1)

In [145]:
sub = pred_test_df.sort_values(['case_num','DAT'])

for case_num in sub.case_num.unique():
    s = sub[sub.case_num==case_num][['DAT','predicted_weight_g']]
    s.DAT = [i+1 for i in range(28)]
    s.to_csv(f'./out/kf_cat_{save_mark}_fn/TEST_{case_num}.csv',index=False)

In [146]:
import zipfile
os.chdir('/Users/khj/MyPython/Dacon/6_상추생육환경생성')
os.chdir(f"./out/kf_cat_{save_mark}_fn/")
submission = zipfile.ZipFile(f"../kf_cat_{save_mark}.zip", 'w')
for path in all_test_label_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()
os.chdir('/Users/khj/MyPython/Dacon/6_상추생육환경생성')

<br>

<br></br>

# Model Define

<br>

## Utils

In [None]:
class TimeDistributed(nn.Module):
    def __init__(self, module, batch_first=False):
        super(TimeDistributed, self).__init__()
        self.module = module
        self.batch_first = batch_first

    def forward(self, x):

        if len(x.size()) <= 2:
            return self.module(x)

        # Squash samples and timesteps into a single axis
        x_reshape = x.contiguous().view(-1, x.size(-1))  # (samples * timesteps, input_size)
        # print(x.shape,x_reshape.shape)

        y = self.module(x_reshape)

        # We have to reshape Y
        if self.batch_first:
            y = y.contiguous().view(x.size(0), -1, y.size(-1))  # (samples, timesteps, output_size)
        else:
            y = y.view(-1, x.size(1), y.size(-1))  # (timesteps, samples, output_size)

        return y

<br>

## LSTM

In [None]:
# https://www.kaggle.com/code/junkoda/pytorch-lstm-with-tensorflow-like-initialization
class LSTM_Model(nn.Module):
    def __init__(self, input_size):
        hidden  = [40]*4      # 40
        dropout = [0.5]*4     # 0.5
        num_layers = [1]*4    # 1
        bidirectional = False # False
        if bidirectional:
            offset = 2
        else:
            offset = 1
        
        super().__init__()
        
        self.lstm1 = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden[0],
            dropout=dropout[0],
            num_layers=num_layers[0],
            batch_first=True,
            bidirectional=bidirectional,
        )
        self.lstm2 = nn.LSTM(
            input_size=offset*hidden[0],
            hidden_size=hidden[1],
            dropout=dropout[1],
            num_layers=num_layers[1],
            batch_first=True,
            bidirectional=bidirectional,
        )
        self.lstm3 = nn.LSTM(
            input_size=offset*hidden[1],
            hidden_size=hidden[2],
            dropout=dropout[2],
            num_layers=num_layers[2],
            batch_first=True,
            bidirectional=bidirectional,
        )
        self.lstm4 = nn.LSTM(
            input_size=offset*hidden[2],
            hidden_size=hidden[3],
            dropout=dropout[3],
            num_layers=num_layers[3],
            batch_first=True,
            bidirectional=bidirectional,
        )
        self.leakyrelu = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        self.selu = nn.SELU()
        self.gelu = nn.GELU()
        self.elu  = nn.ELU()
        
        self.activation = self.leakyrelu
        
        self.fc = nn.Linear(offset * hidden[3], 1)
        self.fc = TimeDistributed(self.fc)
        self._reinitialize()
        
    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if 'lstm' in name:
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(p.data)
                elif 'bias_ih' in name:
                    p.data.fill_(0)
                    # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4):(n // 2)].fill_(1)
                elif 'bias_hh' in name:
                    p.data.fill_(0)
            elif 'fc' in name:
                if 'weight' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'bias' in name:
                    p.data.fill_(0)

    def forward(self, x):
        # 1st
        x, _ = self.lstm1(x)
        # x    = self.bn(x)
        x    = self.activation(x)
        # 2nd
        x, _ = self.lstm2(x)
        # x    = self.bn(x)
        x    = self.activation(x)
        # 3rd
        x, _ = self.lstm3(x)
        # x    = self.bn(x)
        x    = self.activation(x)
        # 4th
        x, _ = self.lstm4(x)
        # x    = self.bn(x)
        x    = self.activation(x)
        # fully connected layer
        x    = self.fc(x[:,-1,:])
        return x

<br>

## Scinet

In [None]:
import math
import torch.nn.functional as F
from torch.autograd import Variable
from torch import nn
import torch
import argparse
import numpy as np

class Splitting(nn.Module):
    def __init__(self):
        super(Splitting, self).__init__()

    def even(self, x):
        return x[:, ::2, :]

    def odd(self, x):
        return x[:, 1::2, :]

    def forward(self, x):
        '''Returns the odd and even part'''
        return (self.even(x), self.odd(x))


class Interactor(nn.Module):
    def __init__(self, in_planes, splitting=True,
                 kernel = 5, dropout=0.5, groups = 1, hidden_size = 1, INN = True):
        super(Interactor, self).__init__()
        self.modified = INN
        self.kernel_size = kernel
        self.dilation = 1
        self.dropout = dropout
        self.hidden_size = hidden_size
        self.groups = groups
        if self.kernel_size % 2 == 0:
            pad_l = self.dilation * (self.kernel_size - 2) // 2 + 1 #by default: stride==1 
            pad_r = self.dilation * (self.kernel_size) // 2 + 1 #by default: stride==1 

        else:
            pad_l = self.dilation * (self.kernel_size - 1) // 2 + 1 # we fix the kernel size of the second layer as 3.
            pad_r = self.dilation * (self.kernel_size - 1) // 2 + 1
        self.splitting = splitting
        self.split = Splitting()

        modules_P = []
        modules_U = []
        modules_psi = []
        modules_phi = []
        prev_size = 1

        size_hidden = self.hidden_size
        modules_P += [
            nn.ReplicationPad1d((pad_l, pad_r)),

            nn.Conv1d(in_planes * prev_size, int(in_planes * size_hidden),
                      kernel_size=self.kernel_size, dilation=self.dilation, stride=1, groups= self.groups),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),

            nn.Dropout(self.dropout),
            nn.Conv1d(int(in_planes * size_hidden), in_planes,
                      kernel_size=3, stride=1, groups= self.groups),
            nn.Tanh()
        ]
        modules_U += [
            nn.ReplicationPad1d((pad_l, pad_r)),
            nn.Conv1d(in_planes * prev_size, int(in_planes * size_hidden),
                      kernel_size=self.kernel_size, dilation=self.dilation, stride=1, groups= self.groups),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.Dropout(self.dropout),
            nn.Conv1d(int(in_planes * size_hidden), in_planes,
                      kernel_size=3, stride=1, groups= self.groups),
            nn.Tanh()
        ]

        modules_phi += [
            nn.ReplicationPad1d((pad_l, pad_r)),
            nn.Conv1d(in_planes * prev_size, int(in_planes * size_hidden),
                      kernel_size=self.kernel_size, dilation=self.dilation, stride=1, groups= self.groups),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.Dropout(self.dropout),
            nn.Conv1d(int(in_planes * size_hidden), in_planes,
                      kernel_size=3, stride=1, groups= self.groups),
            nn.Tanh()
        ]
        modules_psi += [
            nn.ReplicationPad1d((pad_l, pad_r)),
            nn.Conv1d(in_planes * prev_size, int(in_planes * size_hidden),
                      kernel_size=self.kernel_size, dilation=self.dilation, stride=1, groups= self.groups),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.Dropout(self.dropout),
            nn.Conv1d(int(in_planes * size_hidden), in_planes,
                      kernel_size=3, stride=1, groups= self.groups),
            nn.Tanh()
        ]
        self.phi = nn.Sequential(*modules_phi)
        self.psi = nn.Sequential(*modules_psi)
        self.P = nn.Sequential(*modules_P)
        self.U = nn.Sequential(*modules_U)

    def forward(self, x):
        if self.splitting:
            (x_even, x_odd) = self.split(x)
        else:
            (x_even, x_odd) = x

        if self.modified:
            x_even = x_even.permute(0, 2, 1)
            x_odd = x_odd.permute(0, 2, 1)

            d = x_odd.mul(torch.exp(self.phi(x_even)))
            c = x_even.mul(torch.exp(self.psi(x_odd)))

            x_even_update = c + self.U(d)
            x_odd_update = d - self.P(c)

            return (x_even_update, x_odd_update)

        else:
            x_even = x_even.permute(0, 2, 1)
            x_odd = x_odd.permute(0, 2, 1)

            d = x_odd - self.P(x_even)
            c = x_even + self.U(d)

            return (c, d)


class InteractorLevel(nn.Module):
    def __init__(self, in_planes, kernel, dropout, groups , hidden_size, INN):
        super(InteractorLevel, self).__init__()
        self.level = Interactor(in_planes = in_planes, splitting=True,
                 kernel = kernel, dropout=dropout, groups = groups, hidden_size = hidden_size, INN = INN)

    def forward(self, x):
        (x_even_update, x_odd_update) = self.level(x)
        return (x_even_update, x_odd_update)

class LevelSCINet(nn.Module):
    def __init__(self,in_planes, kernel_size, dropout, groups, hidden_size, INN):
        super(LevelSCINet, self).__init__()
        self.interact = InteractorLevel(in_planes= in_planes, kernel = kernel_size, dropout = dropout, groups =groups , hidden_size = hidden_size, INN = INN)

    def forward(self, x):
        (x_even_update, x_odd_update) = self.interact(x)
        return x_even_update.permute(0, 2, 1), x_odd_update.permute(0, 2, 1) #even: B, T, D odd: B, T, D

class SCINet_Tree(nn.Module):
    def __init__(self, in_planes, current_level, kernel_size, dropout, groups, hidden_size, INN):
        super().__init__()
        self.current_level = current_level


        self.workingblock = LevelSCINet(
            in_planes = in_planes,
            kernel_size = kernel_size,
            dropout = dropout,
            groups= groups,
            hidden_size = hidden_size,
            INN = INN)


        if current_level!=0:
            self.SCINet_Tree_odd =SCINet_Tree(in_planes, current_level-1, kernel_size, dropout, groups, hidden_size, INN)
            self.SCINet_Tree_even=SCINet_Tree(in_planes, current_level-1, kernel_size, dropout, groups, hidden_size, INN)
    
    def zip_up_the_pants(self, even, odd):
        even = even.permute(1, 0, 2)
        odd = odd.permute(1, 0, 2) #L, B, D
        even_len = even.shape[0]
        odd_len = odd.shape[0]
        mlen = min((odd_len, even_len))
        _ = []
        for i in range(mlen):
            _.append(even[i].unsqueeze(0))
            _.append(odd[i].unsqueeze(0))
        if odd_len < even_len: 
            _.append(even[-1].unsqueeze(0))
        return torch.cat(_,0).permute(1,0,2) #B, L, D
        
    def forward(self, x):
        x_even_update, x_odd_update= self.workingblock(x)
        # We recursively reordered these sub-series. You can run the ./utils/recursive_demo.py to emulate this procedure. 
        if self.current_level ==0:
            return self.zip_up_the_pants(x_even_update, x_odd_update)
        else:
            return self.zip_up_the_pants(self.SCINet_Tree_even(x_even_update), self.SCINet_Tree_odd(x_odd_update))

class EncoderTree(nn.Module):
    def __init__(self, in_planes,  num_levels, kernel_size, dropout, groups, hidden_size, INN):
        super().__init__()
        self.levels=num_levels
        self.SCINet_Tree = SCINet_Tree(
            in_planes = in_planes,
            current_level = num_levels-1,
            kernel_size = kernel_size,
            dropout =dropout ,
            groups = groups,
            hidden_size = hidden_size,
            INN = INN)
        
    def forward(self, x):

        x= self.SCINet_Tree(x)

        return x

class SCINet(nn.Module):
    def __init__(self, output_len, input_len, input_dim = 9, hid_size = 1, num_stacks = 1,
                num_levels = 3, num_decoder_layer = 1, concat_len = 0, groups = 1, kernel = 5, dropout = 0.5,
                 single_step_output_One = 0, input_len_seg = 0, positionalE = False, modified = True, RIN=False):
        super(SCINet, self).__init__()

        self.input_dim = input_dim
        self.input_len = input_len
        self.output_len = output_len
        self.hidden_size = hid_size
        self.num_levels = num_levels
        self.groups = groups
        self.modified = modified
        self.kernel_size = kernel
        self.dropout = dropout
        self.single_step_output_One = single_step_output_One
        self.concat_len = concat_len
        self.pe = positionalE
        self.RIN=RIN
        self.num_decoder_layer = num_decoder_layer

        self.blocks1 = EncoderTree(
            in_planes=self.input_dim,
            num_levels = self.num_levels,
            kernel_size = self.kernel_size,
            dropout = self.dropout,
            groups = self.groups,
            hidden_size = self.hidden_size,
            INN =  modified)

        if num_stacks == 2: # we only implement two stacks at most.
            self.blocks2 = EncoderTree(
                in_planes=self.input_dim,
            num_levels = self.num_levels,
            kernel_size = self.kernel_size,
            dropout = self.dropout,
            groups = self.groups,
            hidden_size = self.hidden_size,
            INN =  modified)

        self.stacks = num_stacks

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
        self.projection1 = nn.Conv1d(self.input_len, self.output_len, kernel_size=1, stride=1, bias=False)
        self.div_projection = nn.ModuleList()
        self.overlap_len = self.input_len//4
        self.div_len = self.input_len//6

        if self.num_decoder_layer > 1:
            self.projection1 = nn.Linear(self.input_len, self.output_len)
            for layer_idx in range(self.num_decoder_layer-1):
                div_projection = nn.ModuleList()
                for i in range(6):
                    lens = min(i*self.div_len+self.overlap_len,self.input_len) - i*self.div_len
                    div_projection.append(nn.Linear(lens, self.div_len))
                self.div_projection.append(div_projection)

        if self.single_step_output_One: # only output the N_th timestep.
            if self.stacks == 2:
                if self.concat_len:
                    self.projection2 = nn.Conv1d(self.concat_len + self.output_len, 1,
                                                kernel_size = 1, bias = False)
                else:
                    self.projection2 = nn.Conv1d(self.input_len + self.output_len, 1,
                                                kernel_size = 1, bias = False)
        else: # output the N timesteps.
            if self.stacks == 2:
                if self.concat_len:
                    self.projection2 = nn.Conv1d(self.concat_len + self.output_len, self.output_len,
                                                kernel_size = 1, bias = False)
                else:
                    self.projection2 = nn.Conv1d(self.input_len + self.output_len, self.output_len,
                                                kernel_size = 1, bias = False)

        # For positional encoding
        self.pe_hidden_size = input_dim
        if self.pe_hidden_size % 2 == 1:
            self.pe_hidden_size += 1
    
        num_timescales = self.pe_hidden_size // 2
        max_timescale = 10000.0
        min_timescale = 1.0

        log_timescale_increment = (
                math.log(float(max_timescale) / float(min_timescale)) /
                max(num_timescales - 1, 1))
        temp = torch.arange(num_timescales, dtype=torch.float32)
        inv_timescales = min_timescale * torch.exp(
            torch.arange(num_timescales, dtype=torch.float32) *
            -log_timescale_increment)
        self.register_buffer('inv_timescales', inv_timescales)

        ### RIN Parameters ###
        if self.RIN:
            self.affine_weight = nn.Parameter(torch.ones(1, 1, input_dim))
            self.affine_bias = nn.Parameter(torch.zeros(1, 1, input_dim))
    
    def get_position_encoding(self, x):
        max_length = x.size()[1]
        position = torch.arange(max_length, dtype=torch.float32, device=x.device)  # tensor([0., 1., 2., 3., 4.], device='cuda:0')
        temp1 = position.unsqueeze(1)  # 5 1
        temp2 = self.inv_timescales.unsqueeze(0)  # 1 256
        scaled_time = position.unsqueeze(1) * self.inv_timescales.unsqueeze(0)  # 5 256
        signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)  #[T, C]
        signal = F.pad(signal, (0, 0, 0, self.pe_hidden_size % 2))
        signal = signal.view(1, max_length, self.pe_hidden_size)
    
        return signal

    def forward(self, x):
        assert self.input_len % (np.power(2, self.num_levels)) == 0 # evenly divided the input length into two parts. (e.g., 32 -> 16 -> 8 -> 4 for 3 levels)
        if self.pe:
            pe = self.get_position_encoding(x)
            if pe.shape[2] > x.shape[2]:
                x += pe[:, :, :-1]
            else:
                x += self.get_position_encoding(x)

        ### activated when RIN flag is set ###
        if self.RIN:
            print('/// RIN ACTIVATED ///\r',end='')
            means = x.mean(1, keepdim=True).detach()
            #mean
            x = x - means
            #var
            stdev = torch.sqrt(torch.var(x, dim=1, keepdim=True, unbiased=False) + 1e-5)
            x /= stdev
            # affine
            # print(x.shape,self.affine_weight.shape,self.affine_bias.shape)
            x = x * self.affine_weight + self.affine_bias

        # the first stack
        res1 = x
        x = self.blocks1(x)
        x += res1
        if self.num_decoder_layer == 1:
            x = self.projection1(x)
        else:
            x = x.permute(0,2,1)
            for div_projection in self.div_projection:
                output = torch.zeros(x.shape,dtype=x.dtype).cuda()
                for i, div_layer in enumerate(div_projection):
                    div_x = x[:,:,i*self.div_len:min(i*self.div_len+self.overlap_len,self.input_len)]
                    output[:,:,i*self.div_len:(i+1)*self.div_len] = div_layer(div_x)
                x = output
            x = self.projection1(x)
            x = x.permute(0,2,1)

        if self.stacks == 1:
            ### reverse RIN ###
            if self.RIN:
                x = x - self.affine_bias
                x = x / (self.affine_weight + 1e-10)
                x = x * stdev
                x = x + means

            return x

        elif self.stacks == 2:
            MidOutPut = x
            if self.concat_len:
                x = torch.cat((res1[:, -self.concat_len:,:], x), dim=1)
            else:
                x = torch.cat((res1, x), dim=1)

            # the second stack
            res2 = x
            x = self.blocks2(x)
            x += res2
            x = self.projection2(x)
            
            ### Reverse RIN ###
            if self.RIN:
                MidOutPut = MidOutPut - self.affine_bias
                MidOutPut = MidOutPut / (self.affine_weight + 1e-10)
                MidOutPut = MidOutPut * stdev
                MidOutPut = MidOutPut + means

            if self.RIN:
                x = x - self.affine_bias
                x = x / (self.affine_weight + 1e-10)
                x = x * stdev
                x = x + means

            return x, MidOutPut

def get_variable(x):
    x = Variable(x)
    return x.cuda() if torch.cuda.is_available() else x

In [None]:
import math
import torch.nn.functional as F
from torch.autograd import Variable
from torch import nn
import torch
import argparse
import numpy as np

class moving_avg(nn.Module):
    """
    Moving average block to highlight the trend of time series
    """
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x


class series_decomp(nn.Module):
    """
    Series decomposition block
    """
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean

class SCINet_decompose(nn.Module):
    def __init__(self, output_len, input_len, input_dim = 9, hid_size = 1, num_stacks = 1,
                num_levels = 3, concat_len = 0, groups = 1, kernel = 5, dropout = 0.5,
                 single_step_output_One = 0, input_len_seg = 0, positionalE = False, modified = True, RIN=False):
        super(SCINet_decompose, self).__init__()

        self.input_dim = input_dim
        self.input_len = input_len
        self.output_len = output_len
        self.hidden_size = hid_size
        self.num_levels = num_levels
        self.groups = groups
        self.modified = modified
        self.kernel_size = kernel
        self.dropout = dropout
        self.single_step_output_One = single_step_output_One
        self.concat_len = concat_len
        self.pe = positionalE
        self.RIN=RIN
        self.decomp = series_decomp(25)
        self.trend = nn.Linear(input_len,input_len)
        self.trend_dec = nn.Linear(input_len,output_len)
        self.blocks1 = EncoderTree(
            in_planes=self.input_dim,
            num_levels = self.num_levels,
            kernel_size = self.kernel_size,
            dropout = self.dropout,
            groups = self.groups,
            hidden_size = self.hidden_size,
            INN =  modified)

        if num_stacks == 2: # we only implement two stacks at most.
            self.blocks2 = EncoderTree(
                in_planes=self.input_dim,
            num_levels = self.num_levels,
            kernel_size = self.kernel_size,
            dropout = self.dropout,
            groups = self.groups,
            hidden_size = self.hidden_size,
            INN =  modified)

        self.stacks = num_stacks

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
        self.projection1 = nn.Conv1d(self.input_len, self.output_len, kernel_size=1, stride=1, bias=False)
        if self.single_step_output_One: # only output the N_th timestep.
            if self.stacks == 2:
                if self.concat_len:
                    self.projection2 = nn.Conv1d(self.concat_len + self.output_len, 1,
                                                kernel_size = 1, bias = False)
                else:
                    self.projection2 = nn.Conv1d(self.input_len + self.output_len, 1,
                                                kernel_size = 1, bias = False)
        else: # output the N timesteps.
            if self.stacks == 2:
                if self.concat_len:
                    self.projection2 = nn.Conv1d(self.concat_len + self.output_len, self.output_len,
                                                kernel_size = 1, bias = False)
                else:
                    self.projection2 = nn.Conv1d(self.input_len + self.output_len, self.output_len,
                                                kernel_size = 1, bias = False)

        # For positional encoding
        self.pe_hidden_size = input_dim
        if self.pe_hidden_size % 2 == 1:
            self.pe_hidden_size += 1
    
        num_timescales = self.pe_hidden_size // 2
        max_timescale = 10000.0
        min_timescale = 1.0

        log_timescale_increment = (
                math.log(float(max_timescale) / float(min_timescale)) /
                max(num_timescales - 1, 1))
        temp = torch.arange(num_timescales, dtype=torch.float32)
        inv_timescales = min_timescale * torch.exp(
            torch.arange(num_timescales, dtype=torch.float32) *
            -log_timescale_increment)
        self.register_buffer('inv_timescales', inv_timescales)

        ### RIN Parameters ###
        if self.RIN:
            self.affine_weight = nn.Parameter(torch.ones(1, 1, input_dim))
            self.affine_bias = nn.Parameter(torch.zeros(1, 1, input_dim))
            self.affine_weight2 = nn.Parameter(torch.ones(1, 1, input_dim))
            self.affine_bias2 = nn.Parameter(torch.zeros(1, 1, input_dim))
    
    def get_position_encoding(self, x):
        max_length = x.size()[1]
        position = torch.arange(max_length, dtype=torch.float32, device=x.device)  # tensor([0., 1., 2., 3., 4.], device='cuda:0')
        temp1 = position.unsqueeze(1)  # 5 1
        temp2 = self.inv_timescales.unsqueeze(0)  # 1 256
        scaled_time = position.unsqueeze(1) * self.inv_timescales.unsqueeze(0)  # 5 256
        signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)  #[T, C]
        signal = F.pad(signal, (0, 0, 0, self.pe_hidden_size % 2))
        signal = signal.view(1, max_length, self.pe_hidden_size)
    
        return signal

    def forward(self, x):
        assert self.input_len % (np.power(2, self.num_levels)) == 0 # evenly divided the input length into two parts. (e.g., 32 -> 16 -> 8 -> 4 for 3 levels)
        x, trend = self.decomp(x)

        if self.RIN:
            means = x.mean(1, keepdim=True).detach()
            x = x - means
            stdev = torch.sqrt(torch.var(x, dim=1, keepdim=True, unbiased=False) + 1e-5)
            x /= stdev
            # seq_means = x[:,-1,:].unsqueeze(1).repeat(1,self.input_len,1).detach()
            # pred_means = x[:,-1,:].unsqueeze(1).repeat(1,self.output_len,1).detach()
            # x = x - seq_means
            x = x * self.affine_weight + self.affine_bias

            # print('/// RIN ACTIVATED ///\r',end='')
            means2 = trend.mean(1, keepdim=True).detach()
            trend = trend - means2
            stdev2 = torch.sqrt(torch.var(trend, dim=1, keepdim=True, unbiased=False) + 1e-5)
            trend /= stdev2
            # seq_means2 = trend[:,-1,:].unsqueeze(1).repeat(1,self.input_len,1).detach()
            # pred_means2 = trend[:,-1,:].unsqueeze(1).repeat(1,self.output_len,1).detach()
            # trend = trend - seq_means2 
            trend = trend * self.affine_weight2 + self.affine_bias2
        

        if self.pe:
            pe = self.get_position_encoding(x)
            if pe.shape[2] > x.shape[2]:
                x = x + pe[:, :, :-1]
            else:
                x = x + self.get_position_encoding(x)

        ### activated when RIN flag is set ###
        

        # the first stack
        res1 = x
        x = self.blocks1(x)
        x = self.projection1(x)

        trend = trend.permute(0,2,1)
        trend = self.trend(trend)  
        trend = self.trend_dec(trend).permute(0,2,1)

        if self.stacks == 1:
            ### reverse RIN ###
            if self.RIN:
                x = x - self.affine_bias
                x = x / (self.affine_weight + 1e-10)
                # x = x + pred_means
                x = x * stdev
                x = x + means

                trend = trend - self.affine_bias2
                trend = trend / (self.affine_weight2 + 1e-10)
                # trend = trend + pred_means2
                trend = trend * stdev2
                trend = trend + means2

            return x + trend

        elif self.stacks == 2:
            MidOutPut = x
            if self.concat_len:
                x = torch.cat((res1[:, -self.concat_len:,:], x), dim=1)
            else:
                x = torch.cat((res1, x), dim=1)

            # the second stack
            x = self.blocks2(x)
            x = self.projection2(x)
            
            ### Reverse RIN ###
            if self.RIN:
                MidOutPut = MidOutPut - self.affine_bias
                MidOutPut = MidOutPut / (self.affine_weight + 1e-10)
                MidOutPut = MidOutPut * stdev
                MidOutPut = MidOutPut + means

                x = x - self.affine_bias
                x = x / (self.affine_weight + 1e-10)
                x = x * stdev
                x = x + means

                trend = trend - self.affine_bias2
                trend = trend / (self.affine_weight2 + 1e-10)
                # trend = trend + pred_means2
                trend = trend * stdev2
                trend = trend + means2

            return x + trend, MidOutPut


def get_variable(x):
    x = Variable(x)
    return x.cuda() if torch.cuda.is_available() else x

In [None]:
class SCINet_Model(nn.Module):
    def __init__(self,input_size):
        super(SCINet_Model, self).__init__()
        super().__init__()
        
        # 24,4,1,1,2,0.5,False,1,True,1
        window_size = 1 # in (fixed)
        horizon = 1      # out
        hidden_size = 1
        groups = 1
        kernel = 1
        dropout = 0.5
        single_step_output_One = False
        num_levels = 1
        positionalEcoding = True
        num_stacks = 1
        self.scinet = SCINet(
            output_len = horizon, input_len = window_size, input_dim = input_size, hid_size = hidden_size, 
            num_stacks = num_stacks, num_levels = num_levels, concat_len = 0, groups = groups, kernel = kernel, 
            dropout = dropout, single_step_output_One = single_step_output_One, positionalE =  positionalEcoding, 
            modified = True, RIN = True,
        )
        self.scinet_decompose = SCINet_decompose(
            output_len = horizon, input_len = window_size, input_dim = input_size, hid_size = hidden_size, 
            num_stacks = num_stacks, num_levels = num_levels, concat_len = 0, groups = groups, kernel = kernel, 
            dropout = dropout, single_step_output_One = single_step_output_One, positionalE =  positionalEcoding, 
            modified = True, RIN = True,
        )
        
        # hidden  = [64, 64, 64]
        # dropout = [0.2, 0.5, 0.5]
        # num_layers = [1,1,1]
        # self.lstm1 = nn.LSTM(
        #     input_size=input_size,
        #     hidden_size=hidden[0],
        #     dropout=dropout[0],
        #     num_layers=num_layers[0],
        #     batch_first=True,
        #     bidirectional=True,
        # )
        # self.lstm2 = nn.LSTM(
        #     input_size=2*hidden[0],
        #     hidden_size=hidden[1],
        #     dropout=dropout[1],
        #     num_layers=num_layers[1],
        #     batch_first=True,
        #     bidirectional=True,
        # )
        # self.lstm3 = nn.LSTM(
        #     input_size=2*hidden[1],
        #     hidden_size=hidden[2],
        #     dropout=dropout[2],
        #     num_layers=num_layers[2],
        #     batch_first=True,
        #     bidirectional=True,
        # )
        
        self.dropout = nn.Dropout(p=dropout)
        self.bn = nn.BatchNorm1d(24)
        self.relu = nn.ReLU()
        self.selu = nn.SELU()
        self.leakyrelu = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        # self.fc = nn.Linear(2*hidden[0], 1)
        self.fc_1 = nn.Linear(input_size, 16)
        self.fc_1 = TimeDistributed(self.fc_1)
        self.fc_2 = nn.Linear(16, 1)
        self.fc_2 = TimeDistributed(self.fc_2)
        self.fc   = nn.Linear(input_size,1)
        self.fc   = TimeDistributed(self.fc)
        
        self.nlinear = NLinear(input_size,1)
        self._reinitialize()

        # for name, p in self.named_parameters():
        #     print(name, 'scinet' in name)
        
    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if 'lstm' in name:
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(p.data)
                elif 'bias_ih' in name:
                    p.data.fill_(0)
                    # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4):(n // 2)].fill_(1)
                elif 'bias_hh' in name:
                    p.data.fill_(0)
            elif 'fc' in name:
                if 'weight' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'bias' in name:
                    p.data.fill_(0)
        
#     def forward(self, x):
#         x = self.scinet(x)
#         # x = self.bn(x)
#         # x = self.relu(x)
#         # x,_ = self.lstm1(x)
#         # x = self.relu(x)
#         # x,_ = self.lstm2(x)
#         # x = self.selu(x)
#         # x,_ = self.lstm3(x)
#         # x = self.selu(x)

#         x = self.fc_1(x)
#         x = self.dropout(x)
#         x = self.leakyrelu(x)
#         x = self.fc_2(x[:,-1,:]) # [:,:,-1]
        
#         # x = self.fc_2(x[:,-1,:])
#         return x

#     def forward(self, x):
#         # x = self.scinet(x)
#         x = self.scinet_decompose(x)
#         x1,x2 = x[0],x[1]
#         x = torch.cat([x1,x2],dim=1)
#         x = self.fc(x[:,-1,:])
        
#         return x
    
    def forward(self, x):
        # x = self.scinet(x)
        x = self.scinet_decompose(x)
        # x = self.fc(x[:,-1,:])
        x = self.nlinear(x[:,-1,:])
        return x

<br>

## NLinear

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class NLinear(nn.Module):
    """
    Normalization-Linear
    """
    def __init__(self, seq_len, pred_len, td=True):
        super(NLinear, self).__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.Linear = nn.Linear(self.seq_len, self.pred_len)
        if td:
            self.Linear = TimeDistributed(self.Linear)
        # Use this line if you want to visualize the weights
        self.Linear.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
        self._reinitialize()
    
    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if 'Linear' in name:
                if 'weight' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'bias' in name:
                    p.data.fill_(0)
        
    def forward(self, x):
        # x: [Batch, Input length, Channel]
        seq_last = x[:,-1:,:].detach()
        x = x - seq_last
        x = self.Linear(x.permute(0,2,1)).permute(0,2,1)
        x = x + seq_last
        return x # [Batch, Output length, Channel]

In [None]:
class NLinear_Model(nn.Module):
    def __init__(self, seq_len, pred_len, input_size):
        super(NLinear_Model, self).__init__()
        super().__init__()
        
        # nn.BatchNorm1d(nodes[1]) , 
        nodes = [40]*4
        dropout = 0.5
        
        self.relu = nn.ReLU()
        self.selu = nn.SELU()
        self.gelu = nn.GELU()
        self.leakyrelu = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        self.dropout = nn.Dropout(dropout)
        
        self.activation = self.leakyrelu
        
        self.model = nn.Sequential(
            NLinear(seq_len ,nodes[0]), self.dropout, self.activation,
            NLinear(nodes[0],nodes[1]), self.dropout, self.activation,
            NLinear(nodes[1],nodes[2]), self.dropout, self.activation,
            NLinear(nodes[2],nodes[3]), self.dropout, self.activation,
            NLinear(nodes[3],pred_len),
        )

        self.fc   = nn.Linear(input_size,1)
        self.fc   = TimeDistributed(self.fc)
        # self._reinitialize()

        # for name, p in self.named_parameters():
        #     print(name, 'scinet' in name)
        
    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if 'lstm' in name:
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(p.data)
                elif 'bias_ih' in name:
                    p.data.fill_(0)
                    # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4):(n // 2)].fill_(1)
                elif 'bias_hh' in name:
                    p.data.fill_(0)
            elif 'fc' in name:
                if 'weight' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'bias' in name:
                    p.data.fill_(0)
        

#     def forward(self, x):
#         x = self.nlinear(x)
#         # x = self.bn(x)
#         x = self.dropout(x)
#         x = self.gelu(x)
        
#         x = self.fc(x[:,-1,:])
        
#         return x
        
#     def forward(self, x):
#         x = self.nlinear_1(x)
#         # x = self.bn(x)
#         x = self.dropout(x)
#         x = self.gelu(x)
        
#         x = self.nlinear_2(x)
#         # x = self.bn(x)
#         x = self.dropout(x)
#         x = self.gelu(x)
        
#         x = self.fc(x[:,-1,:])
        
#         return x

    def forward(self,x):
        x = self.model(x)
        x = x[:,-1,:]
        return x

<br>

## Train, Validation Define

In [None]:
from lib.EarlyStopping import EarlyStopping

inverse_transform_function = np.exp

In [None]:
def rmse_loss_fn(output, target):
    return torch.sqrt(torch.mean((output-target)**2))

In [None]:
import time

def train(
    model, optimizer, train_loader, valid_loader, scheduler, device, 
    early_stopping, epochs, metric_period=1, best_model_only=True, verbose=True,
):
    
    es = EarlyStopping(patience = CFG['ES_PATIENCE'], verbose = CFG['ES_VERBOSE'], path='./model/checkpoint.pt')
    
    model.to(device)
    # criterion = nn.L1Loss().to(device)
    criterion = nn.MSELoss().to(device)

    best_loss = 9999
    best_model = None
    start_time = time.time()
    epoch_s = time.time()
    for epoch in range(1, epochs+1):
        
        model.train()
        train_loss = []
        for X, Y in iter(train_loader):

            X = X.float().to(device)
            Y = Y.float().to(device)

            optimizer.zero_grad()
            output = model(X).float()
            # print(output.shape,Y.shape) # torch.Size([4, 28, 1]) torch.Size([4, 24])
            # print(output[:5],Y[:5])
            
            # # log -> exp
            # output = torch.exp(output)
            # Y      = torch.exp(Y)
            
            # print(output[:5],Y[:5],output.shape,Y.shape)
            loss = criterion(output, Y)
            loss = torch.sqrt(loss) # MSE -> RMSE
            
            loss.backward() # Getting gradients
            optimizer.step() # Updating parameters

            train_loss.append(loss.item())

        valid_loss = validation(model, valid_loader, criterion, device)

        epoch_e = time.time()
            
        if scheduler is not None:
            scheduler.step(valid_loss)

        if verbose:
            if epoch % metric_period == 0:
                epoch_str = '0'*(len(str(epochs))-len(str(epoch))) + str(epoch)
                progress = '[{}/{}] tr_loss : {:.5f}, val_loss : {:.5f}, elapsed : {:.2f}s, total : {:.2f}s, remaining : {:.2f}s'\
                    .format(
                        epoch_str,
                        epochs,np.mean(train_loss),
                        valid_loss,
                        epoch_e-epoch_s,
                        epoch_e-start_time,
                        (epoch_e-epoch_s)*(epochs-epoch)
                    )
                epoch_s = time.time()

                if best_loss > valid_loss:
                    mark = '*'
                else:
                    mark = ' '
            
                print(mark+progress)
            
        if best_model_only:
            if best_loss > valid_loss:
                best_loss = valid_loss
                best_model = model
                
                path = f'./model/best_model.pt'
                torch.save(best_model.state_dict(), path)

        # early stopping 여부를 체크. 현재 과적합 상황 추적
        if early_stopping:
            es(valid_loss, model)

            if es.early_stop:
                break

    return best_model

In [None]:
def validation(model, valid_loader, criterion, device):
    model.eval()
    valid_loss = []
    with torch.no_grad():
        for X, Y in iter(valid_loader):
            X = X.float().to(device)
            Y = Y.float().to(device)

            output = model(X).float()
            
            # # log -> exp
            # output = torch.exp(output)
            # Y      = torch.exp(Y)
            
            loss = criterion(output, Y)
            loss = torch.sqrt(loss) # MSE -> RMSE

            valid_loss.append(loss.item())

    return np.mean(valid_loss)

<br></br>

# 모델링

In [None]:
class CustomDataset(Dataset):
    def __init__(self,input,label,infer_mode,seq_length):
        self.infer_mode = infer_mode
        
        input = input.sort_values(['case_num','DAT'])
        label = label.sort_values(['case_num','DAT'])

        self.input_list = []
        self.label_list = []
        for i in range(int(label.shape[0]/seq_length)):
            i_df = input.iloc[i*seq_length:(i+1)*seq_length,:].drop('case_num',axis=1)
            l_df = label.iloc[i*seq_length:(i+1)*seq_length]['predicted_weight_g']
            
            self.input_list.append(torch.Tensor(i_df.values))
            self.label_list.append(torch.Tensor(l_df.values))

    def __getitem__(self, index):
        data  = self.input_list[index]
        label = self.label_list[index]
        if self.infer_mode == False:
            return data, label
        else:
            return data

    def __len__(self):
        return len(self.input_list)

In [None]:
input_df      = pd.read_csv(f'./out/kf_cat_1/pred_input_df_10.csv').drop('predicted_weight_g',axis=1)
test_input_df = pd.read_csv(f'./out/kf_cat_1/pred_test_df_10.csv') .drop('predicted_weight_g',axis=1)

input_df.case_num      = ['0'+str(x) if x<10 else str(x) for x in input_df     .case_num]
test_input_df.case_num = ['0'+str(x) if x<10 else str(x) for x in test_input_df.case_num]

In [None]:
seq_length = 1
batch_size = 16
num_workers = 0

input_dataset = CustomDataset(input=input_df, label=label_df, infer_mode=False, seq_length=seq_length)
input_loader  = DataLoader(input_dataset, batch_size = batch_size, shuffle=False, num_workers=num_workers) # CFG['BATCH_SIZE']

test_dataset = CustomDataset(input=test_input_df, label=test_label_df, infer_mode=True, seq_length=seq_length)
test_loader  = DataLoader(test_dataset  , batch_size = batch_size, shuffle=False, num_workers=num_workers) # CFG['BATCH_SIZE']

In [None]:
pred_input_df = pd.concat([input_df,label_df['predicted_weight_g'].reset_index(drop=True)],axis=1)
pred_test_df  = pd.concat([test_input_df,test_label_df['predicted_weight_g'].reset_index(drop=True)],axis=1)

In [None]:
import datetime

save_mark = '2'

paths = [f'./out/kf_lstm_{save_mark}',f'./out/kf_lstm_{save_mark}_fn']
for path in paths:
    if not os.path.isdir(path):
        os.mkdir(path)

In [None]:
# 1시간 (cpu)

from sklearn.model_selection import KFold

n_splits = 10

case_num = input_df.case_num.unique()
kf = KFold(n_splits=n_splits,shuffle=True,random_state=42)

kf_iter = 0
for tr_idx,va_idx in tqdm(kf.split(case_num),total=n_splits):
    kf_iter+=1
    print(f'-'*100)
    print(f'({kf_iter}/{n_splits})')
    print(f'-'*100)
    
    #------------------------------------------------------------------------------------
    # (1) train validation split
    #------------------------------------------------------------------------------------
    tr_case_num = case_num[tr_idx]
    va_case_num = case_num[va_idx]
    
    X_train = input_df[input_df.case_num.isin(tr_case_num)]
    X_valid = input_df[input_df.case_num.isin(va_case_num)]

    y_train = label_df[label_df.case_num.isin(tr_case_num)]
    y_valid = label_df[label_df.case_num.isin(va_case_num)]
    # print(X_train.shape, X_valid.shape, y_train.shape, y_valid.shape)

    #------------------------------------------------------------------------------------
    # (2) custom dataset
    #------------------------------------------------------------------------------------
    train_dataset = CustomDataset(input=X_train, label=y_train, infer_mode=False, seq_length=seq_length)
    train_loader  = DataLoader(train_dataset, batch_size = batch_size, shuffle=False, num_workers=num_workers) # CFG['BATCH_SIZE']

    valid_dataset = CustomDataset(input=X_valid, label=y_valid, infer_mode=False, seq_length=seq_length)
    valid_loader  = DataLoader(valid_dataset, batch_size = batch_size, shuffle=False, num_workers=num_workers) # CFG['BATCH_SIZE']
    
    # [(x.size(),y.size()) for x,y in iter(train_loader)]
    # [y for x,y in iter(train_loader)]
    # sum([y.size(0) for x,y in iter(train_loader)])

    # len([x for x,y in iter(train_loader)])

    # [(x[0].size(),x[1].size()) for x in train_loader]
    
    #------------------------------------------------------------------------------------
    # (3) modeling
    #------------------------------------------------------------------------------------
    seed_everything(CFG['SEED'])

    input_size = [np.array(x[0]).shape for x in train_loader][0][2]
    model = LSTM_Model(input_size=input_size)
    # model = NLinear_Model(seq_len=1,pred_len=1,input_size=input_size)
    # model = Model(input_size=input_size)
    # model = SCINet_Model(input_size=input_size)
    # model = BaseModel(
    #     input_size = input_size,
    #     hidden_sizes=[400,300],
    #     dropout_rates=[0.2,0.2],
    #     num_classes=seq_length,
    #     num_layers=2,
    #     bidirectional=True,
    # )

    model.eval()
    optimizer = torch.optim.Adam(params = model.parameters(), lr = 1e-4, weight_decay=1e-5)
    # optimizer = torch.optim.SGD(params = model.parameters(), lr = 1e-4, momentum=0.9)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=100, threshold_mode='abs',min_lr=1e-7, verbose=False)

    CFG['ES_PATIENCE'] = 2000
    CFG['ES_VERBOSE']  = 0
    best_model = train(
        model,
        optimizer=optimizer,
        train_loader=train_loader,
        valid_loader=valid_loader,
        scheduler=scheduler,
        device=device,
        early_stopping=True,
        metric_period=100,
        epochs=16000,
        best_model_only=True,
        verbose=1,
    )
    
    #------------------------------------------------------------------------------------
    # (4-1) predict : input dataset
    #------------------------------------------------------------------------------------
    best_model.to(device)
    best_model.eval()
    pred_list = []
    #true_list = []
    with torch.no_grad():
        for X,y in iter(input_loader): # train_loader, valid_loader
            X = X.float().to(device)

            model_pred = best_model(X)
            # model_pred = torch.exp(model_pred)

            pred_list += model_pred.cpu().numpy().reshape(-1).tolist()
            #true_list += y         .cpu().numpy().reshape(-1).tolist()
            
    pred_input_df[f'pred_{kf_iter}'] = pred_list
    pred_input_df.to_csv(f'./out/kf_lstm_{save_mark}/pred_input_df_{kf_iter}.csv',index=False)
            
    #------------------------------------------------------------------------------------
    # (4-2) predict : test dataset
    #------------------------------------------------------------------------------------
    best_model.to(device)
    best_model.eval()
    pred_list = []
    #true_list = []
    with torch.no_grad():
        for X in iter(test_loader): # train_loader, valid_loader
            X = X.float().to(device)

            model_pred = best_model(X)
            # model_pred = torch.exp(model_pred)

            pred_list += model_pred.cpu().numpy().reshape(-1).tolist()
            #true_list += y         .cpu().numpy().reshape(-1).tolist()
            
    pred_test_df[f'pred_{kf_iter}'] = pred_list
    pred_test_df.to_csv(f'./out/kf_lstm_{save_mark}/pred_test_df_{kf_iter}.csv',index=False)

<br></br>

# Inference

## mean

In [None]:
from sklearn.metrics import mean_squared_error

pred_cols = [col for col in pred_input_df.columns if col.find('pred_')>=0]

mse_list = []
for case_num in pred_input_df.case_num.unique():
    d = pred_input_df[pred_input_df.case_num==case_num]

    _mse = mean_squared_error(
        d['pred_2'],
        #d[pred_cols].apply(lambda x: x.mean(), axis=1),
        d['predicted_weight_g'],
    )
    mse_list.append([case_num,_mse])

    # plt.figure(figsize=(6,4))
    # sns.scatterplot(
    #     x=d['pred_2'],
    #     #x=d[pred_cols].apply(lambda x: x.mean(), axis=1),
    #     y=d['predicted_weight_g'],
    # )
    # abline(slope=1,intercept=0,color='red')
    # plt.title(f'CASE={case_num} : MSE = {_mse:.5f}')
    # plt.show()

In [None]:
for i in range(1,10+1):
    _mse = mean_squared_error(
        pred_input_df['predicted_weight_g'],
        pred_input_df[f'pred_{i}']
        # pred_input_df[pred_cols].apply(lambda x: x.mean(), axis=1),
    )
    print(f'pred_{i} :',_mse)
    
print('all :',mean_squared_error(pred_input_df['predicted_weight_g'], pred_input_df[pred_cols].apply(lambda x: x.mean(),axis=1)))
print('2&4 :',mean_squared_error(pred_input_df['predicted_weight_g'], pred_input_df[['pred_2','pred_4']].apply(lambda x: x.mean(),axis=1)))

In [None]:
plt.figure(figsize=(6,4))
sns.scatterplot(
    x=pred_input_df['predicted_weight_g'],
    #y=pred_input_df['pred_2'],
    y=pred_input_df[['pred_2','pred_4']].apply(lambda x: x.mean(),axis=1),
)
abline(slope=1,intercept=0,color='red')
plt.show()

In [None]:
# # mse<10 : y최대가 80정도 이하
# # mse>10 : y최대가 80정도인 것도 포함되지만, 높은 것들이 많음
# d = pred_input_df[pred_input_df.case_num.isin([case_num for case_num, mse in mse_list if mse>10])]
# for case_num in d.case_num.unique():
#     dd = d[d.case_num==case_num]
#     print(case_num)
#     sns.lineplot(x=dd.DAT,y=(dd.pred_2+dd.pred_4)/2)
#     sns.lineplot(x=dd.DAT,y=dd.predicted_weight_g)
#     plt.show()

In [None]:
pred_cols = [col for col in pred_input_df.columns if col.find('pred_')>=0]
# pred_cols = ['pred_2','pred_4']
pred_input_df['preds'] = pred_input_df[pred_cols].apply(lambda x: x.mean(), axis=1)

mean_squared_error(pred_input_df['preds'],pred_input_df['predicted_weight_g'])

In [None]:
pred_cols = [col for col in pred_input_df.columns if col.find('pred_')>=0]
# pred_cols = ['pred_2','pred_4']
pred_test_df['predicted_weight_g'] = pred_test_df[pred_cols].apply(lambda x: x.mean(), axis=1)

In [None]:
save_mark

In [None]:
sub = pred_test_df.sort_values(['case_num','DAT'])

for case_num in sub.case_num.unique():
    s = sub[sub.case_num==case_num][['DAT','predicted_weight_g']]
    s.DAT = [i+1 for i in range(28)]
    s.to_csv(f'./out/kf_lstm_{save_mark}_fn/TEST_{case_num}.csv',index=False)

In [None]:
import zipfile
os.chdir('/Users/khj/MyPython/Dacon/6_상추생육환경생성')
os.chdir(f"./out/kf_lstm_{save_mark}_fn/")
submission = zipfile.ZipFile(f"../kf_lstm_{save_mark}.zip", 'w')
for path in all_test_label_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()
os.chdir('/Users/khj/MyPython/Dacon/6_상추생육환경생성')

In [None]:
# 9.8433

<br>

## weighted

In [None]:
from sklearn.metrics import mean_squared_error

pred_cols = [col for col in pred_input_df.columns if col.find('pred_')>=0]

mse_list = []
for col in pred_cols:
    _mse = mean_squared_error(pred_input_df[col],pred_input_df['predicted_weight_g'])
    mse_list.append(_mse)
    
# weights = [1]*len(mse_list)/sum(mse_list)
weights = 1/(mse_list/sum(mse_list))
weights = weights / sum(weights)

final_pred = weights * pred_input_df[pred_cols]
final_pred = final_pred.apply(lambda x: x.sum(),axis=1)

mean_squared_error(final_pred,pred_input_df['predicted_weight_g'])

In [None]:
test_pred = weights * pred_test_df[pred_cols]
test_pred = test_pred.apply(lambda x: x.sum(),axis=1)
pred_test_df['predicted_weight_g'] = test_pred

In [None]:
sub = pred_test_df.sort_values(['case_num','DAT'])

for case_num in sub.case_num.unique():
    s = sub[sub.case_num==case_num][['DAT','predicted_weight_g']]
    s.DAT = [i+1 for i in range(28)]
    s.to_csv(f'./out/kf_lstm_{save_mark}_fn/TEST_{case_num}.csv',index=False)

In [None]:
import zipfile
os.chdir('/Users/khj/MyPython/Dacon/6_상추생육환경생성')
os.chdir(f"./out/kf_lstm_{save_mark}_fn/")
submission = zipfile.ZipFile(f"../kf_lstm_{save_mark}.zip", 'w')
for path in all_test_label_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()
os.chdir('/Users/khj/MyPython/Dacon/6_상추생육환경생성')

In [None]:
# 10.39