## Import

In [1]:
import os
import random
import optuna
import pandas as pd
import numpy as np

from collections import Counter
from prophet import Prophet
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from datetime import datetime

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


## Fixed Random-Seed

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

- TG: 감귤
- BC: 브로콜리
- RD: 무
- CR: 당근
- CB: 양배추
- corporation : 법인 A부터 F 존재
- location : 지역코드
- J : 제주도 제주시
- S : 제주도 서귀포시
- supply(kg) : 유통된 물량, kg 단위
- price(원/kg) : 유통된 품목들의 kg 마다의 가격, 원 단위

## Load Data

In [3]:
train_df = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\제주 특산물\train.csv')
test_df = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\제주 특산물\test.csv')

international = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\제주 특산물\international_trade.csv')

## Data Pre-Processing

In [4]:
train_df

Unnamed: 0,ID,timestamp,item,corporation,location,supply(kg),price(원/kg)
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0
...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0


In [5]:
international.head()

Unnamed: 0,기간,품목명,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
0,2019-01,토마토(신선한 것이나 냉장한 것으로 한정한다),356571,990,0,0,990
1,2019-01,양파,821330,222,4003206,1118,-896
2,2019-01,쪽파,60,1,93405,128,-127
3,2019-01,꽃양배추와 브로콜리(broccoli),160,1,638913,563,-562
4,2019-01,방울다다기 양배추,0,0,7580,38,-38


In [6]:
international = international.rename(columns={
    '기간': 'Period',
    '품목명': 'item',
    '수출 중량': 'Export Weight',
    '수출 금액': 'Export Value',
    '수입 중량': 'Import Weight',
    '수입 금액': 'Import Value',
    '무역수지': 'Trade Balance'
})

international

Unnamed: 0,Period,item,Export Weight,Export Value,Import Weight,Import Value,Trade Balance
0,2019-01,토마토(신선한 것이나 냉장한 것으로 한정한다),356571,990,0,0,990
1,2019-01,양파,821330,222,4003206,1118,-896
2,2019-01,쪽파,60,1,93405,128,-127
3,2019-01,꽃양배추와 브로콜리(broccoli),160,1,638913,563,-562
4,2019-01,방울다다기 양배추,0,0,7580,38,-38
...,...,...,...,...,...,...,...
1269,2023-02,포포(papaw)[파파야(papaya)],0,0,23830,71,-71
1270,2023-02,사과,135165,351,0,0,351
1271,2023-02,배,2206012,5411,1,0,5411
1272,2023-02,신 체리[프루너스 체라서스(Prunus cerasus)],5,0,0,0,0


In [7]:
selected_items = international[international['item'].str.contains('감귤|브로콜리|무|당근|양배추')].copy()

selected_items['item'] = selected_items['item'].replace({
    '감귤': 'TG',
    '브로콜리': 'BC',
    '무': 'RD',
    '당근': 'CR',
    '양배추': 'CB'
}, regex = True)

selected_items['item'] = selected_items['item'].str.replace('.*감귤.*', 'TG')
selected_items['item'] = selected_items['item'].str.replace('.*브로콜리.*', 'BC')
selected_items['item'] = selected_items['item'].str.replace('.*무.*', 'RD')
selected_items['item'] = selected_items['item'].str.replace('.*당근.*', 'CR')
selected_items['item'] = selected_items['item'].str.replace('.*양배추.*', 'CB')

selected_items = selected_items[selected_items['item'] != 'RD화과']
selected_items.loc[selected_items['item']=='꽃CB와 BC(broccoli)','item'] = 'BC'
selected_items.loc[selected_items['item']=='방울다다기 CB','item'] = 'CB'
selected_items.loc[selected_items['item']=='순RD','item'] = 'RD'
selected_items

Unnamed: 0,Period,item,Export Weight,Export Value,Import Weight,Import Value,Trade Balance
3,2019-01,BC,160,1,638913,563,-562
4,2019-01,CB,0,0,7580,38,-38
5,2019-01,CB,184650,94,395802,90,4
8,2019-01,CR,23150,22,7466150,2955,-2934
17,2019-01,TG,58368,172,0,0,172
...,...,...,...,...,...,...,...
1249,2023-02,CB,71,1,10362,55,-55
1250,2023-02,CB,13188,13,377456,104,-91
1253,2023-02,CR,22510,20,9260020,3758,-3737
1254,2023-02,RD,4000,4,2,0,4


In [8]:
selected_items.item.value_counts()

item
CB    100
BC     50
CR     50
TG     50
RD      6
Name: count, dtype: int64

In [9]:
selected_items.head(30)

Unnamed: 0,Period,item,Export Weight,Export Value,Import Weight,Import Value,Trade Balance
3,2019-01,BC,160,1,638913,563,-562
4,2019-01,CB,0,0,7580,38,-38
5,2019-01,CB,184650,94,395802,90,4
8,2019-01,CR,23150,22,7466150,2955,-2934
17,2019-01,TG,58368,172,0,0,172
28,2019-02,BC,780,1,396870,399,-398
29,2019-02,CB,0,0,4140,21,-21
30,2019-02,CB,182636,69,336142,77,-8
33,2019-02,CR,16250,16,6524716,2600,-2585
41,2019-02,TG,8474,33,0,0,33


In [10]:
train_df.head()

Unnamed: 0,ID,timestamp,item,corporation,location,supply(kg),price(원/kg)
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0


In [11]:
train_df.describe()

Unnamed: 0,supply(kg),price(원/kg)
count,59397.0,59397.0
mean,11894.53,1131.680674
std,52264.0,2029.941445
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,3800.0,1519.0
max,1222800.0,20909.0


In [12]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59397 entries, 0 to 59396
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   ID           59397 non-null  object 
 1   timestamp    59397 non-null  object 
 2   item         59397 non-null  object 
 3   corporation  59397 non-null  object 
 4   location     59397 non-null  object 
 5   supply(kg)   59397 non-null  float64
 6   price(원/kg)  59397 non-null  float64
dtypes: float64(2), object(5)
memory usage: 3.2+ MB


In [13]:
train_df.isna().sum()

ID             0
timestamp      0
item           0
corporation    0
location       0
supply(kg)     0
price(원/kg)    0
dtype: int64

In [14]:
train_df.item.value_counts()

item
TG    15230
BC    13707
RD    12184
CR    10661
CB     7615
Name: count, dtype: int64

In [15]:
test_df.head()

Unnamed: 0,ID,timestamp,item,corporation,location
0,TG_A_J_20230304,2023-03-04,TG,A,J
1,TG_A_J_20230305,2023-03-05,TG,A,J
2,TG_A_J_20230306,2023-03-06,TG,A,J
3,TG_A_J_20230307,2023-03-07,TG,A,J
4,TG_A_J_20230308,2023-03-08,TG,A,J


In [16]:
def outlier(df, n, cols):
    outs = []
    for col in cols :
        Q1 = np.percentile(df[col], 25)
        Q3 = np.percentile(df[col], 75)
        IQR = Q3 - Q1
        
        step = 1.5 * IQR
        indexes = df[(df[col] < Q1 - step) | (df[col] > Q3 + step)].index
        outs.extend(indexes)
    outs = Counter(outs)
    res = [k for k, v in outs.items() if v > n]
    return res

outlier_col = ['supply(kg)', 'price(원/kg)']
outlier(train_df, 2, outlier_col)

[]

In [17]:
# train_df = pd.merge(train_df, selected_items, on = 'item', how = 'left')
# test_df = pd.merge(test_df, selected_items, on = 'item', how = 'left')

# train_df = train_df.reset_index(drop=True)
# test_df = test_df.reset_index(drop=True)


In [18]:
# 공휴일
holi_weekday = ['2019-01-01', '2019-02-04', '2019-02-05', '2019-02-06', '2019-03-01', '2019-05-05', '2019-05-12', '2019-06-06', '2019-08-15', '2019-09-12', '2019-09-13', '2019-09-14', '2019-10-03', '2019-10-09', '2019-12-25',
                '2020-01-01' ,'2020-01-24' ,'2020-01-25', '2020-01-26', '2020-03-01', '2020-04-30', '2020-05-05', '2020-06-06', '2020-08-15', '2020-08-17', '2020-09-30', '2020-10-01', '2020-10-02', '2020-10-03', '2020-10-09', '2020-12-25',
                '2021-01-01' ,'2021-02-11' ,'2021-02-12', '2021-02-13', '2021-03-01', '2021-05-05', '2021-05-19', '2021-06-06', '2021-08-15', '2021-09-20', '2021-09-21', '2021-09-22', '2021-10-03', '2021-10-09', '2021-12-25',
                '2022-01-01' ,'2022-01-31' ,'2022-02-01', '2022-02-02', '2022-03-01', '2022-05-05', '2022-05-08', '2022-06-06', '2022-08-15', '2022-09-09', '2022-09-10', '2022-09-11', '2022-09-12', '2022-10-03', '2022-10-09', '2020-10-10', '2022-12-25',
                '2023-01-01' ,'2023-01-21' ,'2023-01-22', '2023-01-23', '2023-01-24', '2023-03-01']

In [25]:
# train_df['timestamp'] = pd.to_datetime(train_df['timestamp'])
# train_df['year'] = train_df['timestamp'].dt.year
# train_df['month'] = train_df['timestamp'].dt.month
# train_df['week'] = train_df['timestamp'].dt.isocalendar().week
# train_df['day'] = train_df['timestamp'].dt.day
# train_df['day_of_week'] = train_df['timestamp'].dt.dayofweek
# train_df['sin_month'] = np.sin(2 * np.pi * train_df['month'] / 12)
# train_df['cos_month'] = np.cos(2 * np.pi * train_df['month'] / 12)
# train_df['sin_date'] = np.sin(2 * np.pi * (train_df['month'] + train_df['day'] / 31) / 12)
# train_df['cos_date'] = np.cos(2 * np.pi * (train_df['month'] + train_df['day'] / 31) / 12)
# train_df['sin_week'] = np.sin(2 * np.pi * train_df['week'] / 4)
# train_df['cos_week'] = np.cos(2 * np.pi * train_df['week'] / 4)
# train_df['sin_dayofweek'] = np.sin(2 * np.pi * (train_df['day_of_week'] + 1) / 7)
# train_df['cos_dayofweek'] = np.cos(2 * np.pi * (train_df['day_of_week'] + 1) / 7)
# train_df['holiday'] = np.where((train_df.day_of_week >= 5) | (train_df.timestamp.dt.strftime('%Y-%m-%d').isin(holi_weekday)), 1, 0)

# test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
# test_df['year'] = test_df['timestamp'].dt.year
# test_df['month'] = test_df['timestamp'].dt.month
# test_df['week'] = test_df['timestamp'].dt.isocalendar().week
# test_df['day'] = test_df['timestamp'].dt.day
# test_df['day_of_week'] = test_df['timestamp'].dt.dayofweek
# test_df['sin_month'] = np.sin(2 * np.pi * test_df['month'] / 12)
# test_df['cos_month'] = np.cos(2 * np.pi * test_df['month'] / 12)
# test_df['sin_date'] = np.sin(2 * np.pi * (test_df['month'] + test_df['day'] / 31) / 12)
# test_df['cos_date'] = np.cos(2 * np.pi * (test_df['month'] + test_df['day'] / 31) / 12)
# test_df['sin_week'] = np.sin(2 * np.pi * test_df['week'] / 4)
# test_df['cos_week'] = np.cos(2 * np.pi * test_df['week'] / 4)
# test_df['sin_dayofweek'] = np.sin(2 * np.pi * (test_df['day_of_week'] + 1) / 7)
# test_df['cos_dayofweek'] = np.cos(2 * np.pi * (test_df['day_of_week'] + 1) / 7)
# test_df['holiday'] = np.where((test_df.day_of_week >= 5) | (test_df.timestamp.dt.strftime('%Y-%m-%d').isin(holi_weekday)), 1, 0)

def preprocess_time_series(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['year'] = df['timestamp'].dt.year
    df['month'] = df['timestamp'].dt.month
    df['week'] = df['timestamp'].dt.isocalendar().week
    df['day'] = df['timestamp'].dt.day
    df['day_of_week'] = df['timestamp'].dt.dayofweek
    df['sin_month'] = np.sin(2 * np.pi * df['month'] / 12)
    df['cos_month'] = np.cos(2 * np.pi * df['month'] / 12)
    df['season'] = df['month'].apply(lambda x: 'spring' if x in [3, 4, 5] else ('summer' if x in [6, 7, 8] else ('fall' if x in [9, 10, 11] else 'winter')))
    df['spring_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['spring_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['summer_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['summer_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['fall_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['fall_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['winter_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['winter_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    # df['sin_date'] = np.sin(2 * np.pi * (df['month'] + df['day'] / 31) / 12)
    # df['cos_date'] = np.cos(2 * np.pi * (df['month'] + df['day'] / 31) / 12)
    # df['sin_week'] = np.sin(2 * np.pi * df['week'] / 4)
    # df['cos_week'] = np.cos(2 * np.pi * df['week'] / 4)
    # df['sin_dayofweek'] = np.sin(2 * np.pi * (df['day_of_week'] + 1) / 7)
    # df['cos_dayofweek'] = np.cos(2 * np.pi * (df['day_of_week'] + 1) / 7)
    df['holiday'] = np.where((df.day_of_week >= 5) | (df.timestamp.dt.strftime('%Y-%m-%d').isin(holi_weekday)), 1, 0)

preprocess_time_series(train_df)
preprocess_time_series(test_df)

In [26]:
train_df

Unnamed: 0,ID,timestamp,item,corporation,location,supply(kg),price(원/kg),year,month,week,...,season,holiday,spring_sin,spring_cos,summer_sin,summer_cos,fall_sin,fall_cos,winter_sin,winter_cos
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,2019,1,1,...,winter,1,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0,2019,1,1,...,winter,0,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,2019,1,1,...,winter,0,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,2019,1,1,...,winter,0,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,2019,1,1,...,winter,1,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01,0.500000,8.660254e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0,2023,2,9,...,winter,0,0.866025,5.000000e-01,0.866025,5.000000e-01,0.866025,5.000000e-01,0.866025,5.000000e-01
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0,2023,2,9,...,winter,0,0.866025,5.000000e-01,0.866025,5.000000e-01,0.866025,5.000000e-01,0.866025,5.000000e-01
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0,2023,3,9,...,spring,1,1.000000,6.123234e-17,1.000000,6.123234e-17,1.000000,6.123234e-17,1.000000,6.123234e-17
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0,2023,3,9,...,spring,0,1.000000,6.123234e-17,1.000000,6.123234e-17,1.000000,6.123234e-17,1.000000,6.123234e-17


In [28]:
# # month_mean 계산
# price_month_mean = pd.pivot_table(train_df, values = 'price(원/kg)', index = ['corporation', 'month'], aggfunc = np.mean).reset_index()
# price_month_mean.columns = ['corporation', 'month', 'month_mean']

# # month_std 계산
# price_month_std = pd.pivot_table(train_df, values = 'price(원/kg)', index = ['corporation', 'month'], aggfunc = np.std).reset_index()
# price_month_std.columns = ['corporation', 'month', 'month_std']

# train_df = train_df.merge(price_month_mean, on = ['corporation', 'month'], how = 'left')
# test_df = test_df.merge(price_month_mean, on = ['corporation', 'month'], how = 'left')

# train_df = train_df.merge(price_month_std, on = ['corporation', 'month'], how = 'left')
# test_df = test_df.merge(price_month_std, on = ['corporation', 'month'], how = 'left')

# train_df = train_df.reset_index(drop = True)

# # 품목별 평균 가격 계산
# item_mean_price = train_df.groupby('item')['price(원/kg)'].mean().reset_index()
# item_mean_price.columns = ['item', 'item_mean_price']

# train_df = train_df.merge(item_mean_price, on='item', how='left')
# test_df = test_df.merge(item_mean_price, on='item', how='left')

# # 법인별 평균 가격 계산
# corp_mean_price = train_df.groupby('corporation')['price(원/kg)'].mean().reset_index()
# corp_mean_price.columns = ['corporation', 'corp_mean_price']

# train_df = train_df.merge(corp_mean_price, on='corporation', how='left')
# test_df = test_df.merge(corp_mean_price, on='corporation', how='left')


# # 지역별 평균 가격 계산
# location_mean_price = train_df.groupby('location')['price(원/kg)'].mean().reset_index()
# location_mean_price.columns = ['location', 'location_mean_price']

# train_df = train_df.merge(location_mean_price, on='location', how='left')
# test_df = test_df.merge(location_mean_price, on='location', how='left')

# window_size = 7 

# train_df['rolling_item_mean'] = train_df['item_mean_price'].rolling(window=window_size).mean()
# test_df['rolling_item_mean'] = test_df['item_mean_price'].rolling(window=window_size).mean()

# train_df['rolling_corp_mean'] = train_df['corp_mean_price'].rolling(window=window_size).mean()
# test_df['rolling_corp_mean'] = test_df['corp_mean_price'].rolling(window=window_size).mean()

# train_df['rolling_loca_mean'] = train_df['location_mean_price'].rolling(window=window_size).mean()
# test_df['rolling_loca_mean'] = test_df['location_mean_price'].rolling(window=window_size).mean()

# lag_period = 7

# train_df['lag_item_price'] = train_df['item_mean_price'].shift(lag_period)
# test_df['lag_item_price'] = test_df['item_mean_price'].shift(lag_period)

# train_df['lag_corp_price'] = train_df['corp_mean_price'].shift(lag_period)
# test_df['lag_corp_price'] = test_df['corp_mean_price'].shift(lag_period)

# train_df['lag_loca_price'] = train_df['location_mean_price'].shift(lag_period)
# test_df['lag_loca_price'] = test_df['location_mean_price'].shift(lag_period)


In [29]:
#학습에 사용하지 않을 변수들을 제거합니다
train_x = train_df.drop(columns=['ID', 'timestamp', 'price(원/kg)', 'supply(kg)'])
train_y = train_df['price(원/kg)']

test_x = test_df.drop(columns=['ID', 'timestamp'])

In [33]:
#질적 변수들을 수치화합니다
qual_col = ['item', 'corporation', 'location', 'season']

for i in qual_col:
    le = LabelEncoder()
    train_x[i]=le.fit_transform(train_x[i])
    test_x[i]=le.transform(test_x[i]) #test 데이터에 대해서 fit하는 것은 data leakage에 해당합니다

print('Done.')

Done.


## Regression Model Fit

In [34]:
# def objective(trial):
#     # 튜닝할 하이퍼파라미터와 탐색 공간을 정의합니다
#     params = {
#         'n_estimators': trial.suggest_int('n_estimators', 100, 3000),
#         'max_depth': trial.suggest_int('max_depth', 3, 50),
#         'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
#         'gamma': trial.suggest_float('gamma', 0, 1),
#         'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
#         'subsample': trial.suggest_float('subsample', 0.1, 1),
#         'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 1),
#         'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
#         'reg_lambda': trial.suggest_float('reg_lambda', 0, 10)
#     }
    
#     # 추천된 하이퍼파라미터로 XGBoost 모델을 초기화합니다
#     model = XGBRegressor(**params)
    
#     # 모델을 학습합니다
#     model.fit(train_x, train_y)
    
#     # 검증 세트에서 타깃 변수를 예측합니다
#     preds = model.predict(val_x)
    
#     # 평균 제곱 오차를 계산합니다
#     mse = mean_squared_error(val_y, preds)
    
#     return mse

# train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.2, random_state=42)

# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=100)

# # 최적의 하이퍼파라미터를 얻습니다
# best_params = study.best_params

# best_model = XGBRegressor(**best_params)
# best_model.fit(train_x, train_y)

In [35]:
model = XGBRegressor()
model.fit(train_x, train_y)

In [36]:
train_pred = model.predict(train_x)

train_rmse = np.sqrt(mean_squared_error(train_y, train_pred))

print(f"Train RMSE: {train_rmse}")

Train RMSE: 788.0184299935128


## Inference

In [37]:
preds = model.predict(test_x)

## Submission

In [38]:
submission = pd.read_csv(r'C:\Users\dlwks\OneDrive\바탕 화면\VSCode\제주 특산물\sample_submission.csv')
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,0
1,TG_A_J_20230305,0
2,TG_A_J_20230306,0
3,TG_A_J_20230307,0
4,TG_A_J_20230308,0
...,...,...
1087,RD_F_J_20230327,0
1088,RD_F_J_20230328,0
1089,RD_F_J_20230329,0
1090,RD_F_J_20230330,0


In [39]:
submission['answer'] = preds
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,3606.111816
1,TG_A_J_20230305,-36.288422
2,TG_A_J_20230306,3630.772217
3,TG_A_J_20230307,3704.773926
4,TG_A_J_20230308,3511.339844
...,...,...
1087,RD_F_J_20230327,425.060120
1088,RD_F_J_20230328,382.782959
1089,RD_F_J_20230329,318.050293
1090,RD_F_J_20230330,440.111572


In [40]:
submission.to_csv('C:\\Users\\dlwks\\OneDrive\\바탕 화면\\VSCode\\제주 특산물\\1107-1.csv', index=False)